From b4a86fcc518fdf85d9e4a2e01bd25adf20bc389b Mon Sep 17 00:00:00 2001 From: HugoPoi Date: Thu, 12 Dec 2019 15:16:37 +0100 Subject: [PATCH] refactor(proxy): remove proxy option not working replace by proxies --- package-lock.json | 406 +------------------------------------------- package.json | 1 - src/node_scraper.js | 91 ++++------ 3 files changed, 36 insertions(+), 462 deletions(-) diff --git a/package-lock.json b/package-lock.json index 1648a56..d997296 100644 --- a/package-lock.json +++ b/package-lock.json @@ -22,15 +22,6 @@ "resolved": "https://registry.npmjs.org/@types/node/-/node-12.7.5.tgz", "integrity": "sha512-9fq4jZVhPNW8r+UYKnxF1e2HkDWOWKM5bC2/7c9wPV835I0aOrVbS/Hw/pWPk2uKrNXQqg9Z959Kz+IYDd5p3w==" }, - "accepts": { - "version": "1.3.7", - "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.7.tgz", - "integrity": "sha512-Il80Qs2WjYlJIBNzNkK6KYqlVMTbZLXgHx2oT0pU/fjRHyEp+PEfEPY0R3WCwAGVOtauxh1hOxNgIf5bv7dQpA==", - "requires": { - "mime-types": "~2.1.24", - "negotiator": "0.6.2" - } - }, "agent-base": { "version": "4.3.0", "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-4.3.0.tgz", @@ -74,11 +65,6 @@ "resolved": "https://registry.npmjs.org/arr-union/-/arr-union-3.1.0.tgz", "integrity": "sha1-45sJrqne+Gao8gbiiK9jkZuuOcQ=" }, - "array-flatten": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz", - "integrity": "sha1-ml9pkFGx5wczKPKgCJaLZOopVdI=" - }, "assertion-error": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-1.1.0.tgz", @@ -95,38 +81,6 @@ "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.0.tgz", "integrity": "sha1-ibTRmasr7kneFk6gK4nORi1xt2c=" }, - "body-parser": { - "version": "1.19.0", - "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.19.0.tgz", - "integrity": "sha512-dhEPs72UPbDnAQJ9ZKMNTP6ptJaionhP5cBb541nXPlW60Jepo9RV/a4fX4XWW9CuFNK22krhrj1+rgzifNCsw==", - "requires": { - "bytes": "3.1.0", - "content-type": "~1.0.4", - "debug": "2.6.9", - "depd": "~1.1.2", - "http-errors": "1.7.2", - "iconv-lite": "0.4.24", - "on-finished": "~2.3.0", - "qs": "6.7.0", - "raw-body": "2.4.0", - "type-is": "~1.6.17" - }, - "dependencies": { - "debug": { - "version": "2.6.9", - "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", - "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", - "requires": { - "ms": "2.0.0" - } - }, - "ms": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", - "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" - } - } - }, "boolbase": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", @@ -152,11 +106,6 @@ "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.1.tgz", "integrity": "sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==" }, - "bytes": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.0.tgz", - "integrity": "sha512-zauLjrfCG+xvoyaqLoV8bLVXXNGC4JqlxFCutSDWA6fJrTo2ZuvLYTqZ7aHBLZSMOopbzwv8f+wZcVzfVTI2Dg==" - }, "cacheable-request": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/cacheable-request/-/cacheable-request-6.0.0.tgz", @@ -297,7 +246,7 @@ }, "concat-stream": { "version": "1.6.2", - "resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-1.6.2.tgz", + "resolved": "http://registry.npmjs.org/concat-stream/-/concat-stream-1.6.2.tgz", "integrity": "sha512-27HBghJxjiZtIk3Ycvn/4kbJk/1uZuJFfuPEns6LaEvpvG1f0hTea8lilrouyo9mVc2GWdcEZ8OLoGmSADlrCw==", "requires": { "buffer-from": "^1.0.0", @@ -308,7 +257,7 @@ "dependencies": { "readable-stream": { "version": "2.3.6", - "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.6.tgz", + "resolved": "http://registry.npmjs.org/readable-stream/-/readable-stream-2.3.6.tgz", "integrity": "sha512-tQtKA9WIAhBF3+VLAseyMqZeBjW0AHJoxOtYqSUZNJxauErmLbVm2FW1y+J/YA9dUrAC39ITejlZWhVIwawkKw==", "requires": { "core-util-is": "~1.0.0", @@ -322,7 +271,7 @@ }, "string_decoder": { "version": "1.1.1", - "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", + "resolved": "http://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", "requires": { "safe-buffer": "~5.1.0" @@ -330,29 +279,6 @@ } } }, - "content-disposition": { - "version": "0.5.3", - "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.3.tgz", - "integrity": "sha512-ExO0774ikEObIAEV9kDo50o+79VCUdEB6n6lzKgGwupcVeRlhrj3qGAfwq8G6uBJjkqLrhT0qEYFcWng8z1z0g==", - "requires": { - "safe-buffer": "5.1.2" - } - }, - "content-type": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.4.tgz", - "integrity": "sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA==" - }, - "cookie": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.4.0.tgz", - "integrity": "sha512-+Hp8fLp57wnUSt0tY0tHEXh4voZRDnoIrZPqlo3DPiI4y9lwg/jqx+1Om94/W6ZaPDOUbnjOt/99w66zk+l1Xg==" - }, - "cookie-signature": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz", - "integrity": "sha1-4wOogrNCzD7oylE6eZmXNNqzriw=" - }, "core-util-is": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz", @@ -437,16 +363,6 @@ "object-keys": "^1.0.12" } }, - "depd": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/depd/-/depd-1.1.2.tgz", - "integrity": "sha1-m81S4UwJd2PnSbJ0xDRu0uVgtak=" - }, - "destroy": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.0.4.tgz", - "integrity": "sha1-l4hXRCxEdJ5CBmE+N5RiBYJqvYA=" - }, "diff": { "version": "3.5.0", "resolved": "https://registry.npmjs.org/diff/-/diff-3.5.0.tgz", @@ -503,22 +419,12 @@ "resolved": "https://registry.npmjs.org/duplexer3/-/duplexer3-0.1.4.tgz", "integrity": "sha1-7gHdHKwO08vH/b6jfcCo8c4ALOI=" }, - "ee-first": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", - "integrity": "sha1-WQxhFWsK4vTwJVcyoViyZrxWsh0=" - }, "emoji-regex": { "version": "7.0.3", "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-7.0.3.tgz", "integrity": "sha512-CwBLREIQ7LvYFB0WyRvwhq5N5qPhc6PMjD6bYggFlI5YyDgl+0vxq5VHbMOFqLg7hfWzmu8T5Z1QofhmTIhItA==", "dev": true }, - "encodeurl": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz", - "integrity": "sha1-rT/0yG7C0CkyL1oCw6mmBslbP1k=" - }, "end-of-stream": { "version": "1.4.1", "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.1.tgz", @@ -564,17 +470,12 @@ }, "es6-promisify": { "version": "5.0.0", - "resolved": "https://registry.npmjs.org/es6-promisify/-/es6-promisify-5.0.0.tgz", + "resolved": "http://registry.npmjs.org/es6-promisify/-/es6-promisify-5.0.0.tgz", "integrity": "sha1-UQnWLz5W6pZ8S2NQWu8IKRyKUgM=", "requires": { "es6-promise": "^4.0.3" } }, - "escape-html": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", - "integrity": "sha1-Aljq5NPQwJdN4cFpGI7wBR0dGYg=" - }, "escape-string-regexp": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", @@ -587,11 +488,6 @@ "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", "dev": true }, - "etag": { - "version": "1.8.1", - "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", - "integrity": "sha1-Qa4u62XvpiJorr/qg6x9eSmbCIc=" - }, "execa": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/execa/-/execa-1.0.0.tgz", @@ -607,58 +503,6 @@ "strip-eof": "^1.0.0" } }, - "express": { - "version": "4.17.1", - "resolved": "https://registry.npmjs.org/express/-/express-4.17.1.tgz", - "integrity": "sha512-mHJ9O79RqluphRrcw2X/GTh3k9tVv8YcoyY4Kkh4WDMUYKRZUq0h1o0w2rrrxBqM7VoeUVqgb27xlEMXTnYt4g==", - "requires": { - "accepts": "~1.3.7", - "array-flatten": "1.1.1", - "body-parser": "1.19.0", - "content-disposition": "0.5.3", - "content-type": "~1.0.4", - "cookie": "0.4.0", - "cookie-signature": "1.0.6", - "debug": "2.6.9", - "depd": "~1.1.2", - "encodeurl": "~1.0.2", - "escape-html": "~1.0.3", - "etag": "~1.8.1", - "finalhandler": "~1.1.2", - "fresh": "0.5.2", - "merge-descriptors": "1.0.1", - "methods": "~1.1.2", - "on-finished": "~2.3.0", - "parseurl": "~1.3.3", - "path-to-regexp": "0.1.7", - "proxy-addr": "~2.0.5", - "qs": "6.7.0", - "range-parser": "~1.2.1", - "safe-buffer": "5.1.2", - "send": "0.17.1", - "serve-static": "1.14.1", - "setprototypeof": "1.1.1", - "statuses": "~1.5.0", - "type-is": "~1.6.18", - "utils-merge": "1.0.1", - "vary": "~1.1.2" - }, - "dependencies": { - "debug": { - "version": "2.6.9", - "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", - "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", - "requires": { - "ms": "2.0.0" - } - }, - "ms": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", - "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" - } - } - }, "extract-zip": { "version": "1.6.7", "resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-1.6.7.tgz", @@ -693,35 +537,6 @@ "pend": "~1.2.0" } }, - "finalhandler": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.1.2.tgz", - "integrity": "sha512-aAWcW57uxVNrQZqFXjITpW3sIUQmHGG3qSb9mUah9MgMC4NeWhNOlNjXEYq3HjRAvL6arUviZGGJsBg6z0zsWA==", - "requires": { - "debug": "2.6.9", - "encodeurl": "~1.0.2", - "escape-html": "~1.0.3", - "on-finished": "~2.3.0", - "parseurl": "~1.3.3", - "statuses": "~1.5.0", - "unpipe": "~1.0.0" - }, - "dependencies": { - "debug": { - "version": "2.6.9", - "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", - "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", - "requires": { - "ms": "2.0.0" - } - }, - "ms": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", - "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" - } - } - }, "find-up": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/find-up/-/find-up-3.0.0.tgz", @@ -753,16 +568,6 @@ "for-in": "^1.0.1" } }, - "forwarded": { - "version": "0.1.2", - "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.1.2.tgz", - "integrity": "sha1-mMI9qxF1ZXuMBXPozszZGw/xjIQ=" - }, - "fresh": { - "version": "0.5.2", - "resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz", - "integrity": "sha1-PYyt2Q2XZWn6g1qx+OSyOhBWBac=" - }, "fs.realpath": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", @@ -876,18 +681,6 @@ "resolved": "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.0.2.tgz", "integrity": "sha512-laeSTWIkuFa6lUgZAt+ic9RwOSEwbi9VDQNcCvMFO4sZiDc2Ha8DaZVCJnfpLLQCcS8rvCnIWYmz0POLxt7Dew==" }, - "http-errors": { - "version": "1.7.2", - "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-1.7.2.tgz", - "integrity": "sha512-uUQBt3H/cSIVfch6i1EuPNy/YsRSOUBXTVfZ+yR7Zjez3qjBz6i9+i4zjNaoqcoFVI4lQJ5plg63TvGfRSDCRg==", - "requires": { - "depd": "~1.1.2", - "inherits": "2.0.3", - "setprototypeof": "1.1.1", - "statuses": ">= 1.5.0 < 2", - "toidentifier": "1.0.0" - } - }, "https-proxy-agent": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-3.0.1.tgz", @@ -907,14 +700,6 @@ } } }, - "iconv-lite": { - "version": "0.4.24", - "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", - "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", - "requires": { - "safer-buffer": ">= 2.1.2 < 3" - } - }, "inflight": { "version": "1.0.6", "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", @@ -935,11 +720,6 @@ "integrity": "sha512-wPVv/y/QQ/Uiirj/vh3oP+1Ww+AWehmi1g5fFWGPF6IpCBCDVrhgHRMvrLfdYcwDh3QJbGXDW4JAuzxElLSqKA==", "dev": true }, - "ipaddr.js": { - "version": "1.9.0", - "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.0.tgz", - "integrity": "sha512-M4Sjn6N/+O6/IXSJseKqHoFc+5FdGJ22sXqnjTpdZweHK64MzEPAyQZyEU3R/KRv2GLoa7nNtg/C2Ev6m7z+eA==" - }, "is-buffer": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-2.0.3.tgz", @@ -1112,11 +892,6 @@ "p-defer": "^1.0.0" } }, - "media-typer": { - "version": "0.3.0", - "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", - "integrity": "sha1-hxDXrwqmJvj/+hzgAWhUUmMlV0g=" - }, "mem": { "version": "4.3.0", "resolved": "https://registry.npmjs.org/mem/-/mem-4.3.0.tgz", @@ -1138,34 +913,11 @@ "kind-of": "^3.0.2" } }, - "merge-descriptors": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz", - "integrity": "sha1-sAqqVW3YtEVoFQ7J0blT8/kMu2E=" - }, - "methods": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz", - "integrity": "sha1-VSmk1nZUE07cxSZmVoNbD4Ua/O4=" - }, "mime": { "version": "2.4.4", "resolved": "https://registry.npmjs.org/mime/-/mime-2.4.4.tgz", "integrity": "sha512-LRxmNwziLPT828z+4YkNzloCFC2YM4wrB99k+AV5ZbEyfGNWfG8SO1FUXLmLDBSo89NrJZ4DIWeLjy1CHGhMGA==" }, - "mime-db": { - "version": "1.40.0", - "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.40.0.tgz", - "integrity": "sha512-jYdeOMPy9vnxEqFRRo6ZvTZ8d9oPb+k18PKoYNYUe2stVEBPPwsln/qWzdbmaIvnhZ9v2P+CuecK+fpUfsV2mA==" - }, - "mime-types": { - "version": "2.1.24", - "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.24.tgz", - "integrity": "sha512-WaFHS3MCl5fapm3oLxU4eYDw77IQM2ACcxQ9RIxfaC3ooc6PFuBMGZZsYpvoXS5D5QTWPieo1jjLdAm3TBP3cQ==", - "requires": { - "mime-db": "1.40.0" - } - }, "mimic-fn": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz", @@ -1275,11 +1027,6 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz", "integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg==" }, - "negotiator": { - "version": "0.6.2", - "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.2.tgz", - "integrity": "sha512-hZXc7K2e+PgeI1eDBe/10Ard4ekbfrrqG8Ep+8Jmf4JID2bNg7NvCPOZN+kfF574pFQI7mum2AUqDidoKqcTOw==" - }, "nice-try": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/nice-try/-/nice-try-1.0.5.tgz", @@ -1352,14 +1099,6 @@ "es-abstract": "^1.5.1" } }, - "on-finished": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.3.0.tgz", - "integrity": "sha1-IPEzZIGwg811M3mSoWlxqi2QaUc=", - "requires": { - "ee-first": "1.1.1" - } - }, "once": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", @@ -1434,11 +1173,6 @@ "@types/node": "*" } }, - "parseurl": { - "version": "1.3.3", - "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", - "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==" - }, "path-exists": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-3.0.0.tgz", @@ -1456,11 +1190,6 @@ "integrity": "sha1-QRyttXTFoUDTpLGRDUDYDMn0C0A=", "dev": true }, - "path-to-regexp": { - "version": "0.1.7", - "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz", - "integrity": "sha1-32BBeABfUi8V60SQ5yR6G/qmf4w=" - }, "pathval": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/pathval/-/pathval-1.1.0.tgz", @@ -1487,15 +1216,6 @@ "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz", "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==" }, - "proxy-addr": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.5.tgz", - "integrity": "sha512-t/7RxHXPH6cJtP0pRG6smSr9QJidhB+3kXu0KgXnbGYMgzEnUxRQ4/LDdfOwZEMyIh3/xHb8PX3t+lfL9z+YVQ==", - "requires": { - "forwarded": "~0.1.2", - "ipaddr.js": "1.9.0" - } - }, "proxy-from-env": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.0.0.tgz", @@ -1602,27 +1322,6 @@ } } }, - "qs": { - "version": "6.7.0", - "resolved": "https://registry.npmjs.org/qs/-/qs-6.7.0.tgz", - "integrity": "sha512-VCdBRNFTX1fyE7Nb6FYoURo/SPe62QCaAyzJvUjwRaIsc+NePBEniHlvxFmmX56+HZphIGtV0XeCirBtpDrTyQ==" - }, - "range-parser": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz", - "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==" - }, - "raw-body": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.4.0.tgz", - "integrity": "sha512-4Oz8DUIwdvoa5qMJelxipzi/iJIi40O5cGV1wNYp5hvZP8ZN0T+jiNkL0QepXs+EsQ9XJ8ipEDoiH70ySUJP3Q==", - "requires": { - "bytes": "3.1.0", - "http-errors": "1.7.2", - "iconv-lite": "0.4.24", - "unpipe": "1.0.0" - } - }, "readable-stream": { "version": "3.4.0", "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.4.0.tgz", @@ -1666,81 +1365,18 @@ "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" }, - "safer-buffer": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", - "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" - }, "semver": { "version": "5.7.0", "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.0.tgz", "integrity": "sha512-Ya52jSX2u7QKghxeoFGpLwCtGlt7j0oY9DYb5apt9nPlJ42ID+ulTXESnt/qAQcoSERyZ5sl3LDIOw0nAn/5DA==", "dev": true }, - "send": { - "version": "0.17.1", - "resolved": "https://registry.npmjs.org/send/-/send-0.17.1.tgz", - "integrity": "sha512-BsVKsiGcQMFwT8UxypobUKyv7irCNRHk1T0G680vk88yf6LBByGcZJOTJCrTP2xVN6yI+XjPJcNuE3V4fT9sAg==", - "requires": { - "debug": "2.6.9", - "depd": "~1.1.2", - "destroy": "~1.0.4", - "encodeurl": "~1.0.2", - "escape-html": "~1.0.3", - "etag": "~1.8.1", - "fresh": "0.5.2", - "http-errors": "~1.7.2", - "mime": "1.6.0", - "ms": "2.1.1", - "on-finished": "~2.3.0", - "range-parser": "~1.2.1", - "statuses": "~1.5.0" - }, - "dependencies": { - "debug": { - "version": "2.6.9", - "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", - "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", - "requires": { - "ms": "2.0.0" - }, - "dependencies": { - "ms": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", - "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" - } - } - }, - "mime": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz", - "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==" - } - } - }, - "serve-static": { - "version": "1.14.1", - "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.14.1.tgz", - "integrity": "sha512-JMrvUwE54emCYWlTI+hGrGv5I8dEwmco/00EvkzIIsR7MqrHonbD9pO2MOfFnpFntl7ecpZs+3mW+XbQZu9QCg==", - "requires": { - "encodeurl": "~1.0.2", - "escape-html": "~1.0.3", - "parseurl": "~1.3.3", - "send": "0.17.1" - } - }, "set-blocking": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz", "integrity": "sha1-BF+XgtARrppoA93TgrJDkrPYkPc=", "dev": true }, - "setprototypeof": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.1.1.tgz", - "integrity": "sha512-JvdAWfbXeIGaZ9cILp38HntZSFSo3mWg6xGcJJsd+d4aRMOqauag1C63dJfDw7OaMYwEbHMOxEZ1lqVRYP2OAw==" - }, "shallow-clone": { "version": "0.1.2", "resolved": "https://registry.npmjs.org/shallow-clone/-/shallow-clone-0.1.2.tgz", @@ -1799,11 +1435,6 @@ "integrity": "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw=", "dev": true }, - "statuses": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/statuses/-/statuses-1.5.0.tgz", - "integrity": "sha1-Fhx9rBd2Wf2YEfQ3cfqZOBR4Yow=" - }, "string-width": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/string-width/-/string-width-2.1.1.tgz", @@ -1864,26 +1495,12 @@ "resolved": "https://registry.npmjs.org/to-readable-stream/-/to-readable-stream-1.0.0.tgz", "integrity": "sha512-Iq25XBt6zD5npPhlLVXGFN3/gyR2/qODcKNNyTMd4vbm39HUaOiAM4PMq0eMVC/Tkxz+Zjdsc55g9yyz+Yq00Q==" }, - "toidentifier": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.0.tgz", - "integrity": "sha512-yaOH/Pk/VEhBWWTlhI+qXxDFXlejDGcQipMlyxda9nthulaxLZUNcUqFxokp0vcYnvteJln5FNQDRrxj3YcbVw==" - }, "type-detect": { "version": "4.0.8", "resolved": "https://registry.npmjs.org/type-detect/-/type-detect-4.0.8.tgz", "integrity": "sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==", "dev": true }, - "type-is": { - "version": "1.6.18", - "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz", - "integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==", - "requires": { - "media-typer": "0.3.0", - "mime-types": "~2.1.24" - } - }, "typedarray": { "version": "0.0.6", "resolved": "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz", @@ -1902,11 +1519,6 @@ "underscore": "*" } }, - "unpipe": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", - "integrity": "sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw=" - }, "url-parse-lax": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/url-parse-lax/-/url-parse-lax-3.0.0.tgz", @@ -1929,16 +1541,6 @@ "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", "integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=" }, - "utils-merge": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz", - "integrity": "sha1-n5VxD1CiZ5R7LMwSR0HBAoQn5xM=" - }, - "vary": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", - "integrity": "sha1-IpnwLG3tMNSllhsLn3RSShj2NPw=" - }, "which": { "version": "1.3.1", "resolved": "https://registry.npmjs.org/which/-/which-1.3.1.tgz", diff --git a/package.json b/package.json index 3355ddf..78fec21 100644 --- a/package.json +++ b/package.json @@ -24,7 +24,6 @@ "dependencies": { "cheerio": "^1.0.0-rc.3", "debug": "^4.1.1", - "express": "^4.17.1", "got": "^9.6.0", "lodash": "^4.17.14", "puppeteer": "^2.0.0", diff --git a/src/node_scraper.js b/src/node_scraper.js index 3b35dd9..5ebd6dc 100644 --- a/src/node_scraper.js +++ b/src/node_scraper.js @@ -1,7 +1,8 @@ 'use strict'; -var fs = require('fs'); -var os = require("os"); +const fs = require('fs'); +const os = require('os'); +const _ = require('lodash'); const UserAgent = require('user-agents'); const google = require('./modules/google.js'); @@ -63,7 +64,7 @@ class ScrapeManager { this.scraper = null; this.context = context; - this.config = { + this.config = _.defaults(config, { // the user agent to scrape with user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3835.0 Safari/537.36', // if random_user_agent is set to True, a random user agent is chosen @@ -90,7 +91,22 @@ class ScrapeManager { // whether to start the browser in headless mode headless: true, // specify flags passed to chrome here - chrome_flags: [], + // About our defaults values https://peter.sh/experiments/chromium-command-line-switches/ + chrome_flags: [ + '--disable-infobars', + '--window-position=0,0', + '--ignore-certifcate-errors', + '--ignore-certifcate-errors-spki-list', + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-dev-shm-usage', + '--disable-accelerated-2d-canvas', + '--disable-gpu', + '--window-size=1920,1040', + '--start-fullscreen', + '--hide-scrollbars', + '--disable-notifications', + ], // the number of pages to scrape for each keyword num_pages: 1, // path to output file, data will be stored in JSON @@ -115,10 +131,8 @@ class ScrapeManager { //custom_func: resolve('examples/pluggable.js'), custom_func: null, throw_on_detection: false, - // use a proxy for all connections - // example: 'socks5://78.94.172.42:1080' - // example: 'http://118.174.233.10:48400' - proxy: '', + // List of proxies to use ['socks5://78.94.172.42:1080', 'http://localhost:1080'] + proxies: [], // a file with one proxy per line. Example: // socks5://78.94.172.42:1080 // http://118.174.233.10:48400 @@ -138,14 +152,7 @@ class ScrapeManager { concurrency: Cluster.CONCURRENCY_BROWSER, maxConcurrency: 1, } - }; - - this.config.proxies = []; - - // overwrite default config - for (var key in config) { - this.config[key] = config[key]; - } + }); if (config.sleep_range) { // parse an array @@ -160,6 +167,11 @@ class ScrapeManager { this.config.keywords = read_keywords_from_file(this.config.keyword_file); } + if (this.config.proxies && this.config.proxy_file) { + console.error('Either use a proxy_file or specify a proxy for all connections. Do not use both options.'); + return false; + } + if (fs.existsSync(this.config.proxy_file)) { this.config.proxies = read_keywords_from_file(this.config.proxy_file); log(this.config, 1, `${this.config.proxies.length} proxies read from file.`); @@ -193,54 +205,16 @@ class ScrapeManager { } } - // See here: https://peter.sh/experiments/chromium-command-line-switches/ - var default_chrome_flags = [ - '--disable-infobars', - '--window-position=0,0', - '--ignore-certifcate-errors', - '--ignore-certifcate-errors-spki-list', - '--no-sandbox', - '--disable-setuid-sandbox', - '--disable-dev-shm-usage', - '--disable-accelerated-2d-canvas', - '--disable-gpu', - '--window-size=1920,1040', - '--start-fullscreen', - '--hide-scrollbars', - '--disable-notifications', - ]; - - var chrome_flags = default_chrome_flags.slice(); // copy that - - if (Array.isArray(this.config.chrome_flags) && this.config.chrome_flags.length) { - chrome_flags = this.config.chrome_flags; - } - - var user_agent = null; - - if (this.config.user_agent) { - user_agent = this.config.user_agent; - } + const chrome_flags = _.clone(this.config.chrome_flags); if (this.config.random_user_agent) { const userAgent = new UserAgent({ deviceCategory: 'desktop' }); - user_agent = userAgent.toString(); + this.config.user_agent = userAgent.toString(); } - if (user_agent) { + if (this.config.user_agent) { chrome_flags.push( - `--user-agent=${user_agent}` - ) - } - - if (this.config.proxy) { - if (this.config.proxies && this.config.proxies.length > 0) { - console.error('Either use a proxy_file or specify a proxy for all connections. Do not use both options.'); - return false; - } - - chrome_flags.push( - '--proxy-server=' + this.config.proxy, + `--user-agent=${this.config.user_agent}` ) } @@ -259,7 +233,6 @@ class ScrapeManager { } else { // if no custom start_browser functionality was given // use puppeteer-cluster for scraping - const { Cluster } = require('./puppeteer-cluster/dist/index.js'); this.numClusters = this.config.puppeteer_cluster_config.maxConcurrency; var perBrowserOptions = [];