remove hardcoded sleep() in Google Image

This commit is contained in:
Nikolai Tschacher 2019-06-17 00:03:13 +02:00
parent 43d5732de7
commit 0c9f353cb2
2 changed files with 26 additions and 9 deletions

24
examples/gimage.js Normal file
View File

@ -0,0 +1,24 @@
const se_scraper = require('./../src/node_scraper.js');
(async () => {
let browser_config = {
debug_level: 1,
output_file: '',
};
let scrape_job = {
search_engine: 'google_image',
keywords: ['manaslu', 'everest', 'pitcairn'],
num_pages: 1,
};
var scraper = new se_scraper.ScrapeManager(browser_config);
await scraper.start();
var results = await scraper.scrape(scrape_job);
console.dir(results, {depth: null, colors: true});
await scraper.quit();
})();

View File

@ -268,6 +268,8 @@ class GoogleImageScraper extends Scraper {
await this.sleep(50);
await input.focus();
await this.page.keyboard.press("Enter");
// this waitForNavigation makes hardcoded sleeps not necessary
await this.page.waitForNavigation();
}
async next_page() {
@ -275,16 +277,7 @@ class GoogleImageScraper extends Scraper {
}
async wait_for_results() {
// await this.page.waitForFunction(() => {
// var textnode = document.querySelector('.rg_bx .a-no-hover-decoration div:first-child');
// if (textnode) {
// return textnode.innerHTML.length > 0
// }
// return false;
// }, {timeout: this.STANDARD_TIMEOUT});
await this.page.waitForSelector('.rg_bx .a-no-hover-decoration div', {timeout: this.STANDARD_TIMEOUT});
await this.page.waitFor(500);
}
async detected() {