mirror of
https://github.com/NikolaiT/se-scraper.git
synced 2025-01-27 06:38:34 +01:00
Merge pull request #41 from victor9000/master
Fix broken Google News selectors, fixes #40
This commit is contained in:
commit
8e629f6266
@ -220,12 +220,12 @@ class GoogleNewsOldScraper extends Scraper {
|
|||||||
// perform queries
|
// perform queries
|
||||||
const results = [];
|
const results = [];
|
||||||
|
|
||||||
$('.g').each((i, result) => {
|
$('g-card').each((i, result) => {
|
||||||
results.push({
|
results.push({
|
||||||
link: $(result).find('h3 a').attr('href'),
|
link: $(result).find('a').attr('href'),
|
||||||
title: $(result).find('h3 a').text(),
|
title: $(result).find('a div div:nth-child(2) div:nth-child(2)').text(),
|
||||||
snippet: $(result).find('.st').text(),
|
snippet: $(result).find('a div div:nth-child(2) div:nth-child(3) div:nth-child(1)').text(),
|
||||||
date: $(result).find('.nsa').text(),
|
date: $(result).find('a div div:nth-child(2) div:nth-child(3) div:nth-child(2)').text(),
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -263,6 +263,7 @@ class GoogleNewsOldScraper extends Scraper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async search_keyword(keyword) {
|
async search_keyword(keyword) {
|
||||||
|
|
||||||
let url = this.build_start_url(`https://www.google.com/search?q=${keyword}&source=lnms&tbm=nws&`) ||
|
let url = this.build_start_url(`https://www.google.com/search?q=${keyword}&source=lnms&tbm=nws&`) ||
|
||||||
`https://www.google.com/search?q=${keyword}&hl=en&source=lnms&tbm=nws`;
|
`https://www.google.com/search?q=${keyword}&hl=en&source=lnms&tbm=nws`;
|
||||||
|
|
||||||
@ -284,7 +285,7 @@ class GoogleNewsOldScraper extends Scraper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async wait_for_results() {
|
async wait_for_results() {
|
||||||
await this.page.waitForSelector('#main .g', { timeout: this.STANDARD_TIMEOUT });
|
await this.page.waitForSelector('#rso', { timeout: this.STANDARD_TIMEOUT });
|
||||||
}
|
}
|
||||||
|
|
||||||
async detected() {
|
async detected() {
|
||||||
@ -380,7 +381,7 @@ class GoogleNewsScraper extends Scraper {
|
|||||||
|
|
||||||
$('article > h3').each((i, headline) => {
|
$('article > h3').each((i, headline) => {
|
||||||
|
|
||||||
let title = $(headline).find('a span').text();
|
let title = $(headline).find('a').text();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
var snippet = $(headline).parent().find('p').text();
|
var snippet = $(headline).parent().find('p').text();
|
||||||
|
Loading…
Reference in New Issue
Block a user