mirror of
https://github.com/NikolaiT/se-scraper.git
synced 2024-11-22 07:33:07 +01:00
using random user agents now from https://github.com/intoli/user-agents
This commit is contained in:
parent
59154694f2
commit
fcbe66b56b
7
TODO.md
7
TODO.md
@ -53,11 +53,14 @@
|
||||
### 16.7.2019
|
||||
|
||||
- resolve issues
|
||||
- fix this https://github.com/NikolaiT/se-scraper/issues/37
|
||||
- fix this https://github.com/NikolaiT/se-scraper/issues/37 [done]
|
||||
|
||||
- use puppeteer stealth plugin
|
||||
- user random user agents plugin
|
||||
|
||||
- user random user agents plugin: https://github.com/intoli/user-agents
|
||||
|
||||
- add screenshot capability (make the screen after parsing)
|
||||
- store as b64
|
||||
|
||||
### TODO:
|
||||
1. fix googlenewsscraper waiting for results and parsing. remove the static sleep [done]
|
||||
|
@ -4,17 +4,20 @@ const se_scraper = require('./../src/node_scraper.js');
|
||||
let browser_config = {
|
||||
debug_level: 1,
|
||||
test_evasion: false,
|
||||
headless: false,
|
||||
block_assets: false,
|
||||
random_user_agent: false,
|
||||
log_http_headers: false,
|
||||
html_output: false,
|
||||
log_http_headers: true,
|
||||
random_user_agent: true,
|
||||
};
|
||||
|
||||
let scrape_job = {
|
||||
search_engine: 'google',
|
||||
keywords: ['mercedes reifen'],
|
||||
keywords: ['too tired all the time'],
|
||||
num_pages: 1,
|
||||
google_settings: {
|
||||
"gl": "us",
|
||||
"hl": "en",
|
||||
"start": 0,
|
||||
"num": 10
|
||||
}
|
||||
};
|
||||
|
||||
var scraper = new se_scraper.ScrapeManager(browser_config);
|
||||
|
38
package-lock.json
generated
38
package-lock.json
generated
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "se-scraper",
|
||||
"version": "1.3.15",
|
||||
"version": "1.4.0",
|
||||
"lockfileVersion": 1,
|
||||
"requires": true,
|
||||
"dependencies": {
|
||||
@ -357,6 +357,11 @@
|
||||
"integrity": "sha512-A46qtFgd+g7pDZinpnwiRJtxbC1hpgf0uzP3iG89scHk0AUC7A1TGxf5OiiOUv/JMZR8GOt8hL900hV0bOy5xA==",
|
||||
"dev": true
|
||||
},
|
||||
"docopt": {
|
||||
"version": "0.6.2",
|
||||
"resolved": "https://registry.npmjs.org/docopt/-/docopt-0.6.2.tgz",
|
||||
"integrity": "sha1-so6eIiDaXsSffqW7JKR3h0Be6xE="
|
||||
},
|
||||
"dom-serializer": {
|
||||
"version": "0.1.0",
|
||||
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-0.1.0.tgz",
|
||||
@ -395,6 +400,15 @@
|
||||
"domelementtype": "1"
|
||||
}
|
||||
},
|
||||
"dot-json": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/dot-json/-/dot-json-1.1.0.tgz",
|
||||
"integrity": "sha512-PiQZW9/C8xILPYK2bOye/cbPZrakNEkt28jFb8RlPCwsoMAHYYw9T8JoACxgttHL9Y2AmdqVvibbZJHtLgeqTQ==",
|
||||
"requires": {
|
||||
"docopt": "~0.6.2",
|
||||
"underscore-keypath": "~0.0.22"
|
||||
}
|
||||
},
|
||||
"duplexer3": {
|
||||
"version": "0.1.4",
|
||||
"resolved": "https://registry.npmjs.org/duplexer3/-/duplexer3-0.1.4.tgz",
|
||||
@ -794,6 +808,11 @@
|
||||
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.14.tgz",
|
||||
"integrity": "sha512-mmKYbW3GLuJeX+iGP+Y7Gp1AiGHGbXHCOh/jZmrawMmsE7MS4znI3RL2FsjbqOyMayHInjOeykW7PEajUk1/xw=="
|
||||
},
|
||||
"lodash.clonedeep": {
|
||||
"version": "4.5.0",
|
||||
"resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz",
|
||||
"integrity": "sha1-4j8/nE+Pvd6HJSnBBxhXoIblzO8="
|
||||
},
|
||||
"log-symbols": {
|
||||
"version": "2.2.0",
|
||||
"resolved": "https://registry.npmjs.org/log-symbols/-/log-symbols-2.2.0.tgz",
|
||||
@ -1343,6 +1362,14 @@
|
||||
"resolved": "https://registry.npmjs.org/underscore/-/underscore-1.9.1.tgz",
|
||||
"integrity": "sha512-5/4etnCkd9c8gwgowi5/om/mYO5ajCaOgdzj/oW+0eQV9WxKBDZw5+ycmKmeaTXjInS/W0BzpGLo2xR2aBwZdg=="
|
||||
},
|
||||
"underscore-keypath": {
|
||||
"version": "0.0.22",
|
||||
"resolved": "https://registry.npmjs.org/underscore-keypath/-/underscore-keypath-0.0.22.tgz",
|
||||
"integrity": "sha1-SKUoOSu278QkvhyqVtpLX6zPJk0=",
|
||||
"requires": {
|
||||
"underscore": "*"
|
||||
}
|
||||
},
|
||||
"url-parse-lax": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/url-parse-lax/-/url-parse-lax-3.0.0.tgz",
|
||||
@ -1351,6 +1378,15 @@
|
||||
"prepend-http": "^2.0.0"
|
||||
}
|
||||
},
|
||||
"user-agents": {
|
||||
"version": "1.0.321",
|
||||
"resolved": "https://registry.npmjs.org/user-agents/-/user-agents-1.0.321.tgz",
|
||||
"integrity": "sha512-2oR/KDESLUTQLEKymUP6/p+jTSKW6fVEEGKXO0EF8jjHIj3QNDBTUGbvp3kpWvuO3k65xkrjHzatUWun1t0FAg==",
|
||||
"requires": {
|
||||
"dot-json": "^1.1.0",
|
||||
"lodash.clonedeep": "^4.5.0"
|
||||
}
|
||||
},
|
||||
"util-deprecate": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
|
||||
|
@ -27,7 +27,8 @@
|
||||
"lodash": "^4.17.14",
|
||||
"proxy-chain": "^0.2.7",
|
||||
"puppeteer": "^1.17.0",
|
||||
"puppeteer-cluster": "^0.13.0"
|
||||
"puppeteer-cluster": "^0.13.0",
|
||||
"user-agents": "^1.0.321"
|
||||
},
|
||||
"devDependencies": {
|
||||
"chai": "^4.2.0",
|
||||
|
@ -1,115 +0,0 @@
|
||||
|
||||
// updated: 17 June 2019
|
||||
// https://techblog.willshouse.com/2012/01/03/most-common-user-agents/
|
||||
const user_agents = [
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.1 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.80 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
|
||||
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:67.0) Gecko/20100101 Firefox/67.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/60.0.3112.78 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:67.0) Gecko/20100101 Firefox/67.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/18.17763',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0',
|
||||
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Safari/537.36',
|
||||
'Mozilla/5.0 (X11; Linux x86_64; rv:67.0) Gecko/20100101 Firefox/67.0',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
|
||||
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36 OPR/60.0.3255.109',
|
||||
'Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:66.0) Gecko/20100101 Firefox/66.0',
|
||||
'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36',
|
||||
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:66.0) Gecko/20100101 Firefox/66.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36 OPR/60.0.3255.95',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Safari/537.36',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.1 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 YaBrowser/19.4.2.702 Yowser/2.5 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36 OPR/60.0.3255.151',
|
||||
'Mozilla/5.0 (X11; Linux x86_64; rv:66.0) Gecko/20100101 Firefox/66.0',
|
||||
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/60.0.3112.78 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.3 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 YaBrowser/19.4.3.370 Yowser/2.5 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.80 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.80 Safari/537.36',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.80 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',
|
||||
'Mozilla/5.0 (Linux; U; Android 4.3; en-us; SM-N900T Build/JSS15J) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
|
||||
'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1 Mobile/15E148 Safari/604.1',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/73.0.3683.86 Chrome/73.0.3683.86 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:67.0) Gecko/20100101 Firefox/67.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 YaBrowser/19.6.0.1574 Yowser/2.5 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.2 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:67.0) Gecko/20100101 Firefox/67.0',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/74.0.3729.169 Chrome/74.0.3729.169 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:66.0) Gecko/20100101 Firefox/66.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:67.0) Gecko/20100101 Firefox/67.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36',
|
||||
'Mozilla/5.0 (iPad; CPU OS 12_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.1 Mobile/15E148 Safari/604.1',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; Touch; rv:11.0) like Gecko',
|
||||
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0',
|
||||
'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:67.0) Gecko/20100101 Firefox/67.0',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36',
|
||||
];
|
||||
|
||||
|
||||
function random_user_agent(config) {
|
||||
if (config.user_agents && config.user_agents.length > 0) {
|
||||
return config.user_agents[Math.floor(Math.random() * config.user_agents.length)];
|
||||
} else {
|
||||
return user_agents[Math.floor(Math.random() * user_agents.length)];
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
random_user_agent: random_user_agent,
|
||||
user_agents: user_agents,
|
||||
};
|
@ -1,13 +1,14 @@
|
||||
const zlib = require('zlib');
|
||||
var fs = require('fs');
|
||||
var os = require("os");
|
||||
|
||||
const UserAgent = require('user-agents');
|
||||
const google = require('./modules/google.js');
|
||||
const amazon = require('./modules/amazon.js');
|
||||
const bing = require('./modules/bing.js');
|
||||
const baidu = require('./modules/baidu.js');
|
||||
const infospace = require('./modules/infospace.js');
|
||||
const youtube = require('./modules/youtube.js');
|
||||
const ua = require('./modules/user_agents.js');
|
||||
const duckduckgo = require('./modules/duckduckgo.js');
|
||||
const tickersearch = require('./modules/ticker_search.js');
|
||||
const { Cluster } = require('./puppeteer-cluster/dist/index.js');
|
||||
@ -131,8 +132,6 @@ class ScrapeManager {
|
||||
// check if headless chrome escapes common detection techniques
|
||||
// this is a quick test and should be used for debugging
|
||||
test_evasion: false,
|
||||
// you may pass your own list of user agents
|
||||
user_agents: [],
|
||||
apply_evasion_techniques: true,
|
||||
// settings for puppeteer-cluster
|
||||
puppeteer_cluster_config: {
|
||||
@ -228,7 +227,8 @@ class ScrapeManager {
|
||||
}
|
||||
|
||||
if (this.config.random_user_agent) {
|
||||
user_agent = ua.random_user_agent(this.config);
|
||||
const userAgent = new UserAgent();
|
||||
user_agent = userAgent.toString();
|
||||
}
|
||||
|
||||
if (user_agent) {
|
||||
@ -302,10 +302,11 @@ class ScrapeManager {
|
||||
|
||||
// Give the per browser options each a random user agent when random user agent is set
|
||||
while (perBrowserOptions.length < this.numClusters) {
|
||||
const userAgent = new UserAgent();
|
||||
perBrowserOptions.push({
|
||||
headless: this.config.headless,
|
||||
ignoreHTTPSErrors: true,
|
||||
args: default_chrome_flags.slice().concat(`--user-agent=${ua.random_user_agent(this.config)}`)
|
||||
args: default_chrome_flags.slice().concat(`--user-agent=${userAgent.toString()}`)
|
||||
})
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user