From 0c9f353cb202a06de122dd923a98bc08e9a98045 Mon Sep 17 00:00:00 2001 From: Nikolai Tschacher Date: Mon, 17 Jun 2019 00:03:13 +0200 Subject: [PATCH] remove hardcoded sleep() in Google Image --- examples/gimage.js | 24 ++++++++++++++++++++++++ src/modules/google.js | 11 ++--------- 2 files changed, 26 insertions(+), 9 deletions(-) create mode 100644 examples/gimage.js diff --git a/examples/gimage.js b/examples/gimage.js new file mode 100644 index 0000000..baf354a --- /dev/null +++ b/examples/gimage.js @@ -0,0 +1,24 @@ +const se_scraper = require('./../src/node_scraper.js'); + +(async () => { + let browser_config = { + debug_level: 1, + output_file: '', + }; + + let scrape_job = { + search_engine: 'google_image', + keywords: ['manaslu', 'everest', 'pitcairn'], + num_pages: 1, + }; + + var scraper = new se_scraper.ScrapeManager(browser_config); + + await scraper.start(); + + var results = await scraper.scrape(scrape_job); + + console.dir(results, {depth: null, colors: true}); + + await scraper.quit(); +})(); diff --git a/src/modules/google.js b/src/modules/google.js index 8c390a5..a8dde02 100644 --- a/src/modules/google.js +++ b/src/modules/google.js @@ -268,6 +268,8 @@ class GoogleImageScraper extends Scraper { await this.sleep(50); await input.focus(); await this.page.keyboard.press("Enter"); + // this waitForNavigation makes hardcoded sleeps not necessary + await this.page.waitForNavigation(); } async next_page() { @@ -275,16 +277,7 @@ class GoogleImageScraper extends Scraper { } async wait_for_results() { - // await this.page.waitForFunction(() => { - // var textnode = document.querySelector('.rg_bx .a-no-hover-decoration div:first-child'); - // if (textnode) { - // return textnode.innerHTML.length > 0 - // } - // return false; - // }, {timeout: this.STANDARD_TIMEOUT}); - await this.page.waitForSelector('.rg_bx .a-no-hover-decoration div', {timeout: this.STANDARD_TIMEOUT}); - await this.page.waitFor(500); } async detected() {