Merge pull request #41 from victor9000/master

Fix broken Google News selectors, fixes #40
This commit is contained in:
Nikolai Tschacher 2019-08-08 21:57:14 +02:00 committed by GitHub
commit 8e629f6266
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -220,12 +220,12 @@ class GoogleNewsOldScraper extends Scraper {
// perform queries // perform queries
const results = []; const results = [];
$('.g').each((i, result) => { $('g-card').each((i, result) => {
results.push({ results.push({
link: $(result).find('h3 a').attr('href'), link: $(result).find('a').attr('href'),
title: $(result).find('h3 a').text(), title: $(result).find('a div div:nth-child(2) div:nth-child(2)').text(),
snippet: $(result).find('.st').text(), snippet: $(result).find('a div div:nth-child(2) div:nth-child(3) div:nth-child(1)').text(),
date: $(result).find('.nsa').text(), date: $(result).find('a div div:nth-child(2) div:nth-child(3) div:nth-child(2)').text(),
}) })
}); });
@ -263,6 +263,7 @@ class GoogleNewsOldScraper extends Scraper {
} }
async search_keyword(keyword) { async search_keyword(keyword) {
let url = this.build_start_url(`https://www.google.com/search?q=${keyword}&source=lnms&tbm=nws&`) || let url = this.build_start_url(`https://www.google.com/search?q=${keyword}&source=lnms&tbm=nws&`) ||
`https://www.google.com/search?q=${keyword}&hl=en&source=lnms&tbm=nws`; `https://www.google.com/search?q=${keyword}&hl=en&source=lnms&tbm=nws`;
@ -284,7 +285,7 @@ class GoogleNewsOldScraper extends Scraper {
} }
async wait_for_results() { async wait_for_results() {
await this.page.waitForSelector('#main .g', { timeout: this.STANDARD_TIMEOUT }); await this.page.waitForSelector('#rso', { timeout: this.STANDARD_TIMEOUT });
} }
async detected() { async detected() {
@ -380,7 +381,7 @@ class GoogleNewsScraper extends Scraper {
$('article > h3').each((i, headline) => { $('article > h3').each((i, headline) => {
let title = $(headline).find('a span').text(); let title = $(headline).find('a').text();
try { try {
var snippet = $(headline).parent().find('p').text(); var snippet = $(headline).parent().find('p').text();