added option to throw on detection

This commit is contained in:
Nikolai Tschacher 2019-06-17 15:02:44 +02:00
parent caa93df3b0
commit ebe9ba8ea9
4 changed files with 17 additions and 7 deletions

View File

@ -1,6 +1,6 @@
{ {
"name": "se-scraper", "name": "se-scraper",
"version": "1.3.5", "version": "1.3.7",
"description": "A module using puppeteer to scrape several search engines such as Google, Duckduckgo, Bing or Baidu", "description": "A module using puppeteer to scrape several search engines such as Google, Duckduckgo, Bing or Baidu",
"homepage": "https://scrapeulous.com/", "homepage": "https://scrapeulous.com/",
"main": "index.js", "main": "index.js",

View File

@ -220,7 +220,11 @@ module.exports = class Scraper {
console.error(`You have ${this.SOLVE_CAPTCHA_TIME}ms to enter the captcha.`); console.error(`You have ${this.SOLVE_CAPTCHA_TIME}ms to enter the captcha.`);
// expect that user filled out necessary captcha // expect that user filled out necessary captcha
} else { } else {
break; if (this.config.throw_on_detection === true) {
throw( e );
} else {
break;
}
} }
} else { } else {
// some other error, quit scraping process if stuff is broken // some other error, quit scraping process if stuff is broken
@ -228,7 +232,11 @@ module.exports = class Scraper {
console.error('You have 30 seconds to fix this.'); console.error('You have 30 seconds to fix this.');
await this.sleep(30000); await this.sleep(30000);
} else { } else {
break; if (this.config.throw_on_detection === true) {
throw( e );
} else {
break;
}
} }
} }

View File

@ -1,8 +1,4 @@
function random_user_agent(user_agents) {
return user_agents[Math.floor(Math.random() * user_agents.length)];
}
// updated: 17 June 2019 // updated: 17 June 2019
// https://techblog.willshouse.com/2012/01/03/most-common-user-agents/ // https://techblog.willshouse.com/2012/01/03/most-common-user-agents/
const user_agents = [ const user_agents = [
@ -104,6 +100,11 @@ const user_agents = [
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36',
]; ];
function random_user_agent(ua_list = []) {
return user_agents[Math.floor(Math.random() * user_agents.length)];
}
module.exports = { module.exports = {
random_user_agent: random_user_agent, random_user_agent: random_user_agent,
user_agents: user_agents, user_agents: user_agents,

View File

@ -109,6 +109,7 @@ class ScrapeManager {
// get_browser, handle_metadata, close_browser // get_browser, handle_metadata, close_browser
//custom_func: resolve('examples/pluggable.js'), //custom_func: resolve('examples/pluggable.js'),
custom_func: '', custom_func: '',
throw_on_detection: true,
// use a proxy for all connections // use a proxy for all connections
// example: 'socks5://78.94.172.42:1080' // example: 'socks5://78.94.172.42:1080'
// example: 'http://118.174.233.10:48400' // example: 'http://118.174.233.10:48400'