From 9af1630e19cad72c12e1a81836a563e4ccde2843 Mon Sep 17 00:00:00 2001 From: Nikolai Tschacher Date: Mon, 24 Dec 2018 14:25:02 +0100 Subject: [PATCH] initial --- .gitignore | 61 ++++ .idea/encodings.xml | 4 + .idea/misc.xml | 9 + .idea/modules.xml | 8 + .idea/workspace.xml | 424 +++++++++++++++++++++++++ GoogleScraperPup.iml | 9 + README.md | 290 ++++++++++++++++++ TODO.txt | 8 + index.js | 63 ++++ keywords.txt | 3 + package-lock.json | 499 ++++++++++++++++++++++++++++++ package.json | 26 ++ results.json | 1 + run.js | 34 +++ src/modules/baidu.js | 101 ++++++ src/modules/bing.js | 178 +++++++++++ src/modules/duckduckgo.js | 86 ++++++ src/modules/functions.js | 31 ++ src/modules/google.js | 611 +++++++++++++++++++++++++++++++++++++ src/modules/infospace.js | 170 +++++++++++ src/modules/metadata.js | 31 ++ src/modules/user_agents.js | 85 ++++++ src/modules/youtube.js | 113 +++++++ src/node_scraper.js | 200 ++++++++++++ test/tests.js | 185 +++++++++++ 25 files changed, 3230 insertions(+) create mode 100644 .gitignore create mode 100644 .idea/encodings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/workspace.xml create mode 100644 GoogleScraperPup.iml create mode 100644 README.md create mode 100644 TODO.txt create mode 100644 index.js create mode 100644 keywords.txt create mode 100644 package-lock.json create mode 100644 package.json create mode 100644 results.json create mode 100644 run.js create mode 100644 src/modules/baidu.js create mode 100644 src/modules/bing.js create mode 100644 src/modules/duckduckgo.js create mode 100644 src/modules/functions.js create mode 100644 src/modules/google.js create mode 100644 src/modules/infospace.js create mode 100644 src/modules/metadata.js create mode 100644 src/modules/user_agents.js create mode 100644 src/modules/youtube.js create mode 100644 src/node_scraper.js create mode 100644 test/tests.js diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ad46b30 --- /dev/null +++ b/.gitignore @@ -0,0 +1,61 @@ +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# Runtime data +pids +*.pid +*.seed +*.pid.lock + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage + +# nyc test coverage +.nyc_output + +# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# Bower dependency directory (https://bower.io/) +bower_components + +# node-waf configuration +.lock-wscript + +# Compiled binary addons (https://nodejs.org/api/addons.html) +build/Release + +# Dependency directories +node_modules/ +jspm_packages/ + +# TypeScript v1 declaration files +typings/ + +# Optional npm cache directory +.npm + +# Optional eslint cache +.eslintcache + +# Optional REPL history +.node_repl_history + +# Output of 'npm pack' +*.tgz + +# Yarn Integrity file +.yarn-integrity + +# dotenv environment variables file +.env + +# next.js build output +.next diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 0000000..15a15b2 --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..7e5bdf8 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,9 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..412b843 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 0000000..4dbd9aa --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,424 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + aws + should_turn_down + tools + .body + random_sleep + waitFor + waitForNav + + + + + + + true + + true + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +