mirror of
https://github.com/NikolaiT/se-scraper.git
synced 2025-06-20 17:47:49 +02:00
added option to throw on detection
This commit is contained in:
parent
caa93df3b0
commit
ebe9ba8ea9
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "se-scraper",
|
||||
"version": "1.3.5",
|
||||
"version": "1.3.7",
|
||||
"description": "A module using puppeteer to scrape several search engines such as Google, Duckduckgo, Bing or Baidu",
|
||||
"homepage": "https://scrapeulous.com/",
|
||||
"main": "index.js",
|
||||
|
@ -220,7 +220,11 @@ module.exports = class Scraper {
|
||||
console.error(`You have ${this.SOLVE_CAPTCHA_TIME}ms to enter the captcha.`);
|
||||
// expect that user filled out necessary captcha
|
||||
} else {
|
||||
break;
|
||||
if (this.config.throw_on_detection === true) {
|
||||
throw( e );
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// some other error, quit scraping process if stuff is broken
|
||||
@ -228,7 +232,11 @@ module.exports = class Scraper {
|
||||
console.error('You have 30 seconds to fix this.');
|
||||
await this.sleep(30000);
|
||||
} else {
|
||||
break;
|
||||
if (this.config.throw_on_detection === true) {
|
||||
throw( e );
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,8 +1,4 @@
|
||||
|
||||
function random_user_agent(user_agents) {
|
||||
return user_agents[Math.floor(Math.random() * user_agents.length)];
|
||||
}
|
||||
|
||||
// updated: 17 June 2019
|
||||
// https://techblog.willshouse.com/2012/01/03/most-common-user-agents/
|
||||
const user_agents = [
|
||||
@ -104,6 +100,11 @@ const user_agents = [
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36',
|
||||
];
|
||||
|
||||
|
||||
function random_user_agent(ua_list = []) {
|
||||
return user_agents[Math.floor(Math.random() * user_agents.length)];
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
random_user_agent: random_user_agent,
|
||||
user_agents: user_agents,
|
||||
|
@ -109,6 +109,7 @@ class ScrapeManager {
|
||||
// get_browser, handle_metadata, close_browser
|
||||
//custom_func: resolve('examples/pluggable.js'),
|
||||
custom_func: '',
|
||||
throw_on_detection: true,
|
||||
// use a proxy for all connections
|
||||
// example: 'socks5://78.94.172.42:1080'
|
||||
// example: 'http://118.174.233.10:48400'
|
||||
|
Loading…
x
Reference in New Issue
Block a user