mirror of
https://github.com/NikolaiT/se-scraper.git
synced 2025-06-20 17:47:49 +02:00
added option to throw on detection
This commit is contained in:
parent
caa93df3b0
commit
ebe9ba8ea9
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "se-scraper",
|
"name": "se-scraper",
|
||||||
"version": "1.3.5",
|
"version": "1.3.7",
|
||||||
"description": "A module using puppeteer to scrape several search engines such as Google, Duckduckgo, Bing or Baidu",
|
"description": "A module using puppeteer to scrape several search engines such as Google, Duckduckgo, Bing or Baidu",
|
||||||
"homepage": "https://scrapeulous.com/",
|
"homepage": "https://scrapeulous.com/",
|
||||||
"main": "index.js",
|
"main": "index.js",
|
||||||
|
@ -220,7 +220,11 @@ module.exports = class Scraper {
|
|||||||
console.error(`You have ${this.SOLVE_CAPTCHA_TIME}ms to enter the captcha.`);
|
console.error(`You have ${this.SOLVE_CAPTCHA_TIME}ms to enter the captcha.`);
|
||||||
// expect that user filled out necessary captcha
|
// expect that user filled out necessary captcha
|
||||||
} else {
|
} else {
|
||||||
break;
|
if (this.config.throw_on_detection === true) {
|
||||||
|
throw( e );
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// some other error, quit scraping process if stuff is broken
|
// some other error, quit scraping process if stuff is broken
|
||||||
@ -228,7 +232,11 @@ module.exports = class Scraper {
|
|||||||
console.error('You have 30 seconds to fix this.');
|
console.error('You have 30 seconds to fix this.');
|
||||||
await this.sleep(30000);
|
await this.sleep(30000);
|
||||||
} else {
|
} else {
|
||||||
break;
|
if (this.config.throw_on_detection === true) {
|
||||||
|
throw( e );
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,8 +1,4 @@
|
|||||||
|
|
||||||
function random_user_agent(user_agents) {
|
|
||||||
return user_agents[Math.floor(Math.random() * user_agents.length)];
|
|
||||||
}
|
|
||||||
|
|
||||||
// updated: 17 June 2019
|
// updated: 17 June 2019
|
||||||
// https://techblog.willshouse.com/2012/01/03/most-common-user-agents/
|
// https://techblog.willshouse.com/2012/01/03/most-common-user-agents/
|
||||||
const user_agents = [
|
const user_agents = [
|
||||||
@ -104,6 +100,11 @@ const user_agents = [
|
|||||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36',
|
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36',
|
||||||
];
|
];
|
||||||
|
|
||||||
|
|
||||||
|
function random_user_agent(ua_list = []) {
|
||||||
|
return user_agents[Math.floor(Math.random() * user_agents.length)];
|
||||||
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
random_user_agent: random_user_agent,
|
random_user_agent: random_user_agent,
|
||||||
user_agents: user_agents,
|
user_agents: user_agents,
|
||||||
|
@ -109,6 +109,7 @@ class ScrapeManager {
|
|||||||
// get_browser, handle_metadata, close_browser
|
// get_browser, handle_metadata, close_browser
|
||||||
//custom_func: resolve('examples/pluggable.js'),
|
//custom_func: resolve('examples/pluggable.js'),
|
||||||
custom_func: '',
|
custom_func: '',
|
||||||
|
throw_on_detection: true,
|
||||||
// use a proxy for all connections
|
// use a proxy for all connections
|
||||||
// example: 'socks5://78.94.172.42:1080'
|
// example: 'socks5://78.94.172.42:1080'
|
||||||
// example: 'http://118.174.233.10:48400'
|
// example: 'http://118.174.233.10:48400'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user