diff --git a/package.json b/package.json index 341ac46..330d68f 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "se-scraper", - "version": "1.3.5", + "version": "1.3.7", "description": "A module using puppeteer to scrape several search engines such as Google, Duckduckgo, Bing or Baidu", "homepage": "https://scrapeulous.com/", "main": "index.js", diff --git a/src/modules/se_scraper.js b/src/modules/se_scraper.js index 4cf8973..e7b7a3a 100644 --- a/src/modules/se_scraper.js +++ b/src/modules/se_scraper.js @@ -220,7 +220,11 @@ module.exports = class Scraper { console.error(`You have ${this.SOLVE_CAPTCHA_TIME}ms to enter the captcha.`); // expect that user filled out necessary captcha } else { - break; + if (this.config.throw_on_detection === true) { + throw( e ); + } else { + break; + } } } else { // some other error, quit scraping process if stuff is broken @@ -228,7 +232,11 @@ module.exports = class Scraper { console.error('You have 30 seconds to fix this.'); await this.sleep(30000); } else { - break; + if (this.config.throw_on_detection === true) { + throw( e ); + } else { + break; + } } } diff --git a/src/modules/user_agents.js b/src/modules/user_agents.js index 72a1df2..382ae1d 100644 --- a/src/modules/user_agents.js +++ b/src/modules/user_agents.js @@ -1,8 +1,4 @@ -function random_user_agent(user_agents) { - return user_agents[Math.floor(Math.random() * user_agents.length)]; -} - // updated: 17 June 2019 // https://techblog.willshouse.com/2012/01/03/most-common-user-agents/ const user_agents = [ @@ -104,6 +100,11 @@ const user_agents = [ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36', ]; + +function random_user_agent(ua_list = []) { + return user_agents[Math.floor(Math.random() * user_agents.length)]; +} + module.exports = { random_user_agent: random_user_agent, user_agents: user_agents, diff --git a/src/node_scraper.js b/src/node_scraper.js index b158163..f95d7a3 100644 --- a/src/node_scraper.js +++ b/src/node_scraper.js @@ -109,6 +109,7 @@ class ScrapeManager { // get_browser, handle_metadata, close_browser //custom_func: resolve('examples/pluggable.js'), custom_func: '', + throw_on_detection: true, // use a proxy for all connections // example: 'socks5://78.94.172.42:1080' // example: 'http://118.174.233.10:48400'