Merge branch 'master' of github.com:NikolaiT/se-scraper

branchy
ad
2020-05-17 22:06:57 +02:00 · 2020-05-17 22:06:33 +02:00 · 2020-02-13 20:04:20 +01:00 · 2020-02-13 20:03:39 +01:00 · 2020-02-13 20:02:55 +01:00 · 2020-01-17 15:55:17 +01:00
71 changed files with 12955 additions and 2901 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,19 @@
+# ignore static tests
+
+test/static_tests/html/
+test/static_tests/html/*
+
+.idea
+
+# ignore data
+
+examples/data/
+examples/data/*
+
+examples/results/
+examples/results/*
+
+
 # Logs
 logs
 *.log
@ -63,3 +79,5 @@ typings/

 .idea/
 GoogleScraperPup.iml
+
+.http-mitm-proxy
--- a/.gitmodules
+++ b/.gitmodules
--- a/73
+++ b/73
@ -0,0 +1,73 @@
+FROM node:10-slim
+
+# Application parameters and variables
+# ENV NODE_ENV=production
+ENV HOST=0.0.0.0
+ENV PORT=3000
+ENV application_directory=/se-scraper
+ENV puppeteer_cluster_directory=/se-scraper/src/puppeteer-cluster
+
+# Create app directory
+WORKDIR $application_directory
+
+RUN apt-get update && \
+apt-get install -y \
+gconf-service \
+libasound2 \
+libatk1.0-0 \
+libc6 \
+libcairo2 \
+libcups2 \
+libdbus-1-3 \
+libexpat1 \
+libfontconfig1 \
+libgcc1 \
+libgconf-2-4 \
+libgdk-pixbuf2.0-0 \
+libglib2.0-0 \
+libgtk-3-0 \
+libnspr4 \
+libpango-1.0-0 \
+libpangocairo-1.0-0 \
+libstdc++6 \
+libx11-6 \
+libx11-xcb1 \
+libxcb1 \
+libxcomposite1 \
+libxcursor1 \
+libxdamage1 \
+libxext6 \
+libxfixes3 \
+libxi6 \
+libxrandr2 \
+libxrender1 \
+libxss1 \
+libxtst6 \
+ca-certificates \
+fonts-liberation \
+libappindicator1 \
+libnss3 \
+lsb-release \
+xdg-utils \
+wget
+
+# Bundle app source
+COPY . .
+WORKDIR $puppeteer_cluster_directory
+RUN npm install \
+    && npm run build
+
+WORKDIR $application_directory
+# skip installing scripts for puppeteer dependencies
+# we've already installed puppeteer above.
+RUN npm install --ignore-scripts
+
+# Cleanup
+RUN apt-get clean && rm -rf /var/lib/apt/lists/*
+
+ADD https://github.com/Yelp/dumb-init/releases/download/v1.2.2/dumb-init_1.2.2_amd64 /usr/local/bin/dumb-init
+RUN chmod +x /usr/local/bin/dumb-init
+
+EXPOSE $PORT
+
+CMD ["dumb-init", "node", "server/server.js"]
--- a/201
+++ b/201
@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2019 Nikolai Tschacher
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/README.md
+++ b/README.md
@ -1,78 +1,307 @@
-# Search Engine Scraper
+# [The maintained successor of se-scraper is the general purpose crawling infrastructure](https://github.com/NikolaiT/Crawling-Infrastructure)

-This node module supports scraping several search engines.
+## Search Engine Scraper - se-scraper

-Right now scraping the search engines
+[![npm](https://img.shields.io/npm/v/se-scraper.svg?style=for-the-badge)](https://www.npmjs.com/package/se-scraper)
+[![Donate](https://img.shields.io/badge/donate-paypal-blue.svg?style=for-the-badge)](https://www.paypal.me/incolumitas)
+[![Known Vulnerabilities](https://snyk.io/test/github/NikolaiT/se-scraper/badge.svg)](https://snyk.io/test/github/NikolaiT/se-scraper)

+This node module allows you to scrape search engines concurrently with different proxies.
+
+If you don't have extensive technical experience or don't want to purchase proxies, you can use [my scraping service](https://scrapeulous.com/).
+
+#### Table of Contents
+- [Installation](#installation)
+- [Docker](#docker-support)
+- [Minimal Example](#minimal-example)
+- [Quickstart](#quickstart)
+- [Contribute](#contribute)
+- [Using Proxies](#proxies)
+- [Custom Scrapers](#custom-scrapers)
+- [Examples](#examples)
+- [Scraping Model](#scraping-model)
+- [Technical Notes](#technical-notes)
+- [Advanced Usage](#advanced-usage)
+- [Special Query String Parameters for Search Engines](#query-string-parameters)
+
+
+Se-scraper supports the following search engines:
 * Google
 * Google News
 * Google News App version (https://news.google.com)
 * Google Image
 * Bing
-* Baidu
-* Youtube
+* Bing News
 * Infospace
 * Duckduckgo
+* Yandex
 * Webcrawler

-is supported.
+This module uses puppeteer and a modified version of [puppeteer-cluster](https://github.com/thomasdondorf/puppeteer-cluster/). It was created by the Developer of [GoogleScraper](https://github.com/NikolaiT/GoogleScraper), a module with 1800 Stars on Github.

-Additionally **se-scraper** supports investment ticker search from the following sites:
+## Installation

-* Reuters
-* cnbc
-* Marketwatch
+You need a working installation of **node** and the **npm** package manager.

-This module uses puppeteer. It was created by the Developer of https://github.com/NikolaiT/GoogleScraper, a module with 1800 Stars on Github.

-### Quickstart
-
-**Note**: If you don't want puppeteer to download a complete chromium browser, add this variable to your environments:
+For example, if you are using Ubuntu 18.04, you can install node and npm with the following commands:

 ```bash
-export PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=1
+sudo apt update;
+
+sudo apt install nodejs;
+
+# recent version of npm
+curl -sL https://deb.nodesource.com/setup_10.x -o nodesource_setup.sh;
+sudo bash nodesource_setup.sh;
+sudo apt install npm;
 ```

-Then install with
+Chrome and puppeteer [need some additional libraries to run on ubuntu](https://techoverflow.net/2018/06/05/how-to-fix-puppetteer-error-).
+
+This command will install dependencies:
+
+```bash
+# install all that is needed by chromium browser. Maybe not everything needed
+sudo apt-get install gconf-service libasound2 libatk1.0-0 libc6 libcairo2 libcups2 libdbus-1-3 libexpat1 libfontconfig1 libgcc1 libgconf-2-4 libgdk-pixbuf2.0-0 libglib2.0-0 libgtk-3-0 libnspr4 libpango-1.0-0 libpangocairo-1.0-0 libstdc++6 libx11-6 libx11-xcb1 libxcb1 libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 libxtst6 ca-certificates fonts-liberation libappindicator1 libnss3 lsb-release xdg-utils wget;
+```
+
+Install **se-scraper** by entering the following command in your terminal

 ```bash
 npm install se-scraper
 ```

-then create a file with the following contents and start scraping.
+If you **don't** want puppeteer to download a complete chromium browser, add this variable to your environment. Then this module is not guaranteed to run out of the box.
+
+```bash
+export PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=1
+```
+
+### Docker Support
+
+I will maintain a public docker image of se-scraper. Pull the docker image with the command:
+
+```bash
+docker pull tschachn/se-scraper
+```
+
+Confirm that the docker image was correctly pulled:
+
+```bash
+docker image ls
+```
+
+Should show something like that:
+
+```
+tschachn/se-scraper             latest           897e1aeeba78        21 minutes ago      1.29GB
+```
+
+You can check the [latest tag here](https://hub.docker.com/r/tschachn/se-scraper/tags). In the example below, the latest tag is **latest**. This will most likely remain **latest** in the future.
+
+Run the docker image and map the internal port 3000 to the external
+port 3000:
+
+```bash
+$ docker run -p 3000:3000 tschachn/se-scraper:latest
+
+Running on http://0.0.0.0:3000
+```
+
+When the image is running, you may start scrape jobs via HTTP API:
+
+```bash
+curl -XPOST http://0.0.0.0:3000 -H 'Content-Type: application/json' \
+-d '{
+    "browser_config": {
+        "random_user_agent": true
+    },
+    "scrape_config": {
+        "search_engine": "google",
+        "keywords": ["test"],
+        "num_pages": 1
+    }
+}'
+```
+
+Many thanks goes to [slotix](https://github.com/NikolaiT/se-scraper/pull/21) for his tremendous help in setting up a docker image.
+
+
+## Minimal Example
+
+Create a file named `minimal.js` with the following contents

 ```js
 const se_scraper = require('se-scraper');

-let config = {
-    search_engine: 'google',
-    debug: false,
-    verbose: false,
-    keywords: ['news', 'scraping scrapeulous.com'],
-    num_pages: 3,
-    output_file: 'data.json',
-};
+(async () => {
+    let scrape_job = {
+        search_engine: 'google',
+        keywords: ['lets go boys'],
+        num_pages: 1,
+    };

-function callback(err, response) {
-    if (err) { console.error(err) }
-    console.dir(response, {depth: null, colors: true});
-}
+    var results = await se_scraper.scrape({}, scrape_job);

-se_scraper.scrape(config, callback);
+    console.dir(results, {depth: null, colors: true});
+})();
 ```

-### Technical Notes
+Start scraping by firing up the command `node minimal.js`
+
+## Quickstart
+
+Create a file named `run.js` with the following contents
+
+```js
+const se_scraper = require('se-scraper');
+
+(async () => {
+    let browser_config = {
+        debug_level: 1,
+        output_file: 'examples/results/data.json',
+    };
+
+    let scrape_job = {
+        search_engine: 'google',
+        keywords: ['news', 'se-scraper'],
+        num_pages: 1,
+        // add some cool google search settings
+        google_settings: {
+            gl: 'us', // The gl parameter determines the Google country to use for the query.
+            hl: 'en', // The hl parameter determines the Google UI language to return results.
+            start: 0, // Determines the results offset to use, defaults to 0.
+            num: 100, // Determines the number of results to show, defaults to 10. Maximum is 100.
+        },
+    };
+
+    var scraper = new se_scraper.ScrapeManager(browser_config);
+
+    await scraper.start();
+
+    var results = await scraper.scrape(scrape_job);
+
+    console.dir(results, {depth: null, colors: true});
+
+    await scraper.quit();
+})();
+```
+
+Start scraping by firing up the command `node run.js`
+
+## Contribute
+
+I really help and love your help! However scraping is a dirty business and it often takes me a lot of time to find failing selectors or missing JS logic. So if any search engine does not yield the results of your liking, please create a **static test case** similar to [this static test of google](test/static_tests/google.js) that fails. I will try to correct se-scraper then.
+
+That's how you would proceed:
+
+1. Copy the [static google test case](test/static_tests/google.js)
+2. Remove all unnecessary testing code
+3. Save a search to file where se-scraper does not work correctly.
+3. Implement the static test case using the saved search html where se-scraper currently fails.
+4. Submit a new issue with the failing test case as pull request
+5. I will fix it! (or better: you submit a pull request directly)
+
+## Proxies
+
+**se-scraper** will create one browser instance per proxy. So the maximal amount of concurrency is equivalent to the number of proxies plus one (your own IP).
+
+```js
+const se_scraper = require('se-scraper');
+
+(async () => {
+    let browser_config = {
+        debug_level: 1,
+        output_file: 'examples/results/proxyresults.json',
+        proxy_file: '/home/nikolai/.proxies', // one proxy per line
+        log_ip_address: true,
+    };
+
+    let scrape_job = {
+        search_engine: 'google',
+        keywords: ['news', 'scrapeulous.com', 'incolumitas.com', 'i work too much', 'what to do?', 'javascript is hard'],
+        num_pages: 1,
+    };
+
+    var scraper = new se_scraper.ScrapeManager(browser_config);
+    await scraper.start();
+
+    var results = await scraper.scrape(scrape_job);
+    console.dir(results, {depth: null, colors: true});
+    await scraper.quit();
+})();
+```
+
+With a proxy file such as
+
+```text
+socks5://53.34.23.55:55523
+socks4://51.11.23.22:22222
+```
+
+This will scrape with **three** browser instance each having their own IP address. Unfortunately, it is currently not possible to scrape with different proxies per tab. Chromium does not support that.
+
+
+## Custom Scrapers
+
+You can define your own scraper class and use it within se-scraper.
+
+[Check this example out](examples/custom_scraper.js) that defines a custom scraper for Ecosia.
+
+
+## Examples
+
+* [Reuse existing browser](examples/multiple_search_engines.js) yields [these results](examples/results/multiple_search_engines.json)
+* [Simple example scraping google](examples/quickstart.js) yields [these results](examples/results/data.json)
+* [Scrape with one proxy per browser](examples/proxies.js) yields [these results](examples/results/proxyresults.json)
+* [Scrape 100 keywords on Bing with multible tabs in one browser](examples/multiple_tabs.js) produces [this](examples/results/bing.json)
+* [Inject your own scraping logic](examples/pluggable.js)
+* [For the Lulz: Scraping google dorks for SQL injection vulnerabilites and confirming them.](examples/for_the_lulz.js)
+* [Scrape google maps/locations](examples/google_maps.js) yields [these results](examples/results/maps.json)
+
+
+## Scraping Model
+
+**se-scraper** scrapes search engines only. In order to introduce concurrency into this library, it is necessary to define the scraping model. Then we can decide how we divide and conquer.
+
+#### Scraping Resources
+
+What are common scraping resources?
+
+1. **Memory and CPU**. Necessary to launch multiple browser instances.
+2. **Network Bandwith**. Is not often the bottleneck.
+3. **IP Addresses**. Websites often block IP addresses after a certain amount of requests from the same IP address. Can be circumvented by using proxies.
+4. Spoofable identifiers such as browser fingerprint or user agents. Those will be handled by **se-scraper**
+
+#### Concurrency Model
+
+**se-scraper** should be able to run without any concurrency at all. This is the default case. No concurrency means only one browser/tab is searching at the time.
+
+For concurrent use, we will make use of a modified [puppeteer-cluster library](https://github.com/thomasdondorf/puppeteer-cluster).
+
+One scrape job is properly defined by
+
+* 1 search engine such as `google`
+* `M` pages
+* `N` keywords/queries
+* `K` proxies and `K+1` browser instances (because when we have no proxies available, we will scrape with our dedicated IP)
+
+Then **se-scraper** will create `K+1` dedicated browser instances with a unique ip address. Each browser will get `N/(K+1)` keywords and will issue `N/(K+1) * M` total requests to the search engine.
+
+The problem is that [puppeteer-cluster library](https://github.com/thomasdondorf/puppeteer-cluster) does only allow identical options for subsequent new browser instances. Therefore, it is not trivial to launch a cluster of browsers with distinct proxy settings. Right now, every browser has the same options. It's not possible to set options on a per browser basis.
+
+Solution:
+
+1. Create a [upstream proxy router](https://github.com/GoogleChrome/puppeteer/issues/678).
+2. Modify [puppeteer-cluster library](https://github.com/thomasdondorf/puppeteer-cluster) to accept a list of proxy strings and then pop() from this list at every new call to `workerInstance()` in https://github.com/thomasdondorf/puppeteer-cluster/blob/master/src/Cluster.ts I wrote an [issue here](https://github.com/thomasdondorf/puppeteer-cluster/issues/107). **I ended up doing this**.
+
+
+## Technical Notes

 Scraping is done with a headless chromium browser using the automation library puppeteer. Puppeteer is a Node library which provides a high-level API to control headless Chrome or Chromium over the DevTools Protocol.
- 
- No multithreading is supported for now. Only one scraping worker per `scrape()` call.
- 
- We will soon support parallelization. **se-scraper** will support an architecture similar to:
- 
- 1. https://antoinevastel.com/crawler/2018/09/20/parallel-crawler-puppeteer.html
- 2. https://docs.browserless.io/blog/2018/06/04/puppeteer-best-practices.html

-If you need to deploy scraping to the cloud (AWS or Azure), you can contact me at hire@incolumitas.com
+If you need to deploy scraping to the cloud (AWS or Azure), you can contact me at **hire@incolumitas.com**

 The chromium browser is started with the following flags to prevent
 scraping detection.
@ -90,11 +319,12 @@ var ADDITIONAL_CHROME_FLAGS = [
    '--disable-gpu',
    '--window-size=1920x1080',
    '--hide-scrollbars',
+    '--disable-notifications',
 ];
 ```

 Furthermore, to avoid loading unnecessary ressources and to speed up
-scraping a great deal, we instruct chrome to not load images and css:
+scraping a great deal, we instruct chrome to not load images and css and media:

 ```js
 await page.setRequestInterception(true);
@ -109,10 +339,11 @@ page.on('request', (req) => {
 });
 ```

-### Making puppeteer and headless chrome undetectable
+#### Making puppeteer and headless chrome undetectable

 Consider the following resources:

+* https://antoinevastel.com/bot%20detection/2019/07/19/detecting-chrome-headless-v3.html
 * https://intoli.com/blog/making-chrome-headless-undetectable/
 * https://intoli.com/blog/not-possible-to-block-chrome-headless/
 * https://news.ycombinator.com/item?id=16179602
@ -136,19 +367,20 @@ let config = {

 It will create a screenshot named `headless-test-result.png` in the directory where the scraper was started that shows whether all test have passed.

-### Advanced Usage
+## Advanced Usage

-Use se-scraper by calling it with a script such as the one below.
+Use **se-scraper** by calling it with a script such as the one below.

 ```js
 const se_scraper = require('se-scraper');
-const resolve = require('path').resolve;

-let config = {
+// those options need to be provided on startup
+// and cannot give to se-scraper on scrape() calls
+let browser_config = {
    // the user agent to scrape with
-    user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
+    user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3835.0 Safari/537.36',
    // if random_user_agent is set to True, a random user agent is chosen
-    random_user_agent: true,
+    random_user_agent: false,
    // whether to select manual settings in visible mode
    set_manual_settings: false,
    // log ip address data
@ -157,19 +389,29 @@ let config = {
    log_http_headers: false,
    // how long to sleep between requests. a random sleep interval within the range [a,b]
    // is drawn before every request. empty string for no sleeping.
-    sleep_range: '[1,1]',
+    sleep_range: '',
    // which search engine to scrape
    search_engine: 'google',
    compress: false, // compress
-    debug: false,
-    verbose: false,
-    keywords: ['scrapeulous.com'],
+    // whether debug information should be printed
+    // level 0: print nothing
+    // level 1: print most important info
+    // ...
+    // level 4: print all shit nobody wants to know
+    debug_level: 1,
+    keywords: ['nodejs rocks',],
    // whether to start the browser in headless mode
    headless: true,
+    // specify flags passed to chrome here
+    chrome_flags: [],
    // the number of pages to scrape for each keyword
    num_pages: 1,
    // path to output file, data will be stored in JSON
    output_file: '',
+    // whether to also passthru all the html output of the serp pages
+    html_output: false,
+    // whether to return a screenshot of serp pages as b64 data
+    screen_output: false,
    // whether to prevent images, css, fonts and media from being loaded
    // will speed up scraping a great deal
    block_assets: true,
@ -178,223 +420,92 @@ let config = {
    // get_browser, handle_metadata, close_browser
    //custom_func: resolve('examples/pluggable.js'),
    custom_func: '',
+    throw_on_detection: false,
    // use a proxy for all connections
    // example: 'socks5://78.94.172.42:1080'
    // example: 'http://118.174.233.10:48400'
    proxy: '',
+    // a file with one proxy per line. Example:
+    // socks5://78.94.172.42:1080
+    // http://118.174.233.10:48400
+    proxy_file: '',
+    // whether to use proxies only
+    // when this is set to true, se-scraper will not use
+    // your default IP address
+    use_proxies_only: false,
    // check if headless chrome escapes common detection techniques
    // this is a quick test and should be used for debugging
    test_evasion: false,
+    apply_evasion_techniques: true,
+    // settings for puppeteer-cluster
+    puppeteer_cluster_config: {
+        timeout: 30 * 60 * 1000, // max timeout set to 30 minutes
+        monitor: false,
+        concurrency: Cluster.CONCURRENCY_BROWSER,
+        maxConcurrency: 1,
+    }
 };

-function callback(err, response) {
-    if (err) { console.error(err) }
+(async () => {
+    // scrape config can change on each scrape() call
+    let scrape_config = {
+        // which search engine to scrape
+        search_engine: 'google',
+        // an array of keywords to scrape
+        keywords: ['cat', 'mouse'],
+        // the number of pages to scrape for each keyword
+        num_pages: 2,

-    /* response object has the following properties:
+        // OPTIONAL PARAMS BELOW:
+        google_settings: {
+            gl: 'us', // The gl parameter determines the Google country to use for the query.
+            hl: 'fr', // The hl parameter determines the Google UI language to return results.
+            start: 0, // Determines the results offset to use, defaults to 0.
+            num: 100, // Determines the number of results to show, defaults to 10. Maximum is 100.
+        },
+        // instead of keywords you can specify a keyword_file. this overwrites the keywords array
+        keyword_file: '',
+        // how long to sleep between requests. a random sleep interval within the range [a,b]
+        // is drawn before every request. empty string for no sleeping.
+        sleep_range: '',
+        // path to output file, data will be stored in JSON
+        output_file: 'output.json',
+        // whether to prevent images, css, fonts from being loaded
+        // will speed up scraping a great deal
+        block_assets: false,
+        // check if headless chrome escapes common detection techniques
+        // this is a quick test and should be used for debugging
+        test_evasion: false,
+        apply_evasion_techniques: true,
+        // log ip address data
+        log_ip_address: false,
+        // log http headers
+        log_http_headers: false,
+    };

-        response.results - json object with the scraping results
-        response.metadata - json object with metadata information
-        response.statusCode - status code of the scraping process
-     */
+    let results = await se_scraper.scrape(browser_config, scrape_config);
+    console.dir(results, {depth: null, colors: true});
+})();
+```

-    console.dir(response.results, {depth: null, colors: true});
+[Output for the above script on my machine.](examples/results/advanced.json)
+
+### Query String Parameters
+
+You can add your custom query string parameters to the configuration object by specifying a `google_settings` key. In general: `{{search engine}}_settings`.
+
+For example you can customize your google search with the following config:
+
+```js
+let scrape_config = {
+    search_engine: 'google',
+    // use specific search engine parameters for various search engines
+    google_settings: {
+        google_domain: 'google.com',
+        gl: 'us', // The gl parameter determines the Google country to use for the query.
+        hl: 'us', // The hl parameter determines the Google UI language to return results.
+        start: 0, // Determines the results offset to use, defaults to 0.
+        num: 100, // Determines the number of results to show, defaults to 10. Maximum is 100.
+    },
 }
-
-se_scraper.scrape(config, callback);
 ```
-
-Supported options for the `search_engine` config key:
-
-```javascript
-'google'
-'google_news_old'
-'google_news'
-'google_image'
-'bing'
-'bing_news'
-'infospace'
-'webcrawler'
-'baidu'
-'youtube'
-'duckduckgo_news'
-'reuters'
-'cnbc'
-'marketwatch'
-```
-
-Output for the above script on my machine:
-
-```text
-{ 'scraping scrapeulous.com':
-   { '1':
-      { time: 'Tue, 29 Jan 2019 21:39:22 GMT',
-        num_results: 'Ungefähr 145 Ergebnisse (0,18 Sekunden) ',
-        no_results: false,
-        effective_query: '',
-        results:
-         [ { link: 'https://scrapeulous.com/',
-             title:
-              'Scrapeuloushttps://scrapeulous.com/Im CacheDiese Seite übersetzen',
-             snippet:
-              'Scrapeulous.com allows you to scrape various search engines automatically ... or to find hidden links, Scrapeulous.com enables you to scrape a ever increasing ...',
-             visible_link: 'https://scrapeulous.com/',
-             date: '',
-             rank: 1 },
-           { link: 'https://scrapeulous.com/about/',
-             title:
-              'About - Scrapeuloushttps://scrapeulous.com/about/Im CacheDiese Seite übersetzen',
-             snippet:
-              'Scrapeulous.com allows you to scrape various search engines automatically and in large quantities. The business requirement to scrape information from ...',
-             visible_link: 'https://scrapeulous.com/about/',
-             date: '',
-             rank: 2 },
-           { link: 'https://scrapeulous.com/howto/',
-             title:
-              'Howto - Scrapeuloushttps://scrapeulous.com/howto/Im CacheDiese Seite übersetzen',
-             snippet:
-              'We offer scraping large amounts of keywords for the Google Search Engine. Large means any number of keywords between 40 and 50000. Additionally, we ...',
-             visible_link: 'https://scrapeulous.com/howto/',
-             date: '',
-             rank: 3 },
-           { link: 'https://github.com/NikolaiT/se-scraper',
-             title:
-              'GitHub - NikolaiT/se-scraper: Javascript scraping module based on ...https://github.com/NikolaiT/se-scraperIm CacheDiese Seite übersetzen',
-             snippet:
-              '24.12.2018 - Javascript scraping module based on puppeteer for many different search ... for many different search engines... https://scrapeulous.com/.',
-             visible_link: 'https://github.com/NikolaiT/se-scraper',
-             date: '24.12.2018 - ',
-             rank: 4 },
-           { link:
-              'https://github.com/NikolaiT/GoogleScraper/blob/master/README.md',
-             title:
-              'GoogleScraper/README.md at master · NikolaiT/GoogleScraper ...https://github.com/NikolaiT/GoogleScraper/blob/.../README.mdIm CacheÄhnliche SeitenDiese Seite übersetzen',
-             snippet:
-              'GoogleScraper - Scraping search engines professionally. Scrapeulous.com - Scraping Service. GoogleScraper is a open source tool and will remain a open ...',
-             visible_link:
-              'https://github.com/NikolaiT/GoogleScraper/blob/.../README.md',
-             date: '',
-             rank: 5 },
-           { link: 'https://googlescraper.readthedocs.io/',
-             title:
-              'Welcome to GoogleScraper\'s documentation! — GoogleScraper ...https://googlescraper.readthedocs.io/Im CacheDiese Seite übersetzen',
-             snippet:
-              'Welcome to GoogleScraper\'s documentation!¶. Contents: GoogleScraper - Scraping search engines professionally · Scrapeulous.com - Scraping Service ...',
-             visible_link: 'https://googlescraper.readthedocs.io/',
-             date: '',
-             rank: 6 },
-           { link: 'https://incolumitas.com/pages/scrapeulous/',
-             title:
-              'Coding, Learning and Business Ideas – Scrapeulous.com - Incolumitashttps://incolumitas.com/pages/scrapeulous/Im CacheDiese Seite übersetzen',
-             snippet:
-              'A scraping service for scientists, marketing professionals, analysts or SEO folk. In autumn 2018, I created a scraping service called scrapeulous.com. There you ...',
-             visible_link: 'https://incolumitas.com/pages/scrapeulous/',
-             date: '',
-             rank: 7 },
-           { link: 'https://incolumitas.com/',
-             title:
-              'Coding, Learning and Business Ideashttps://incolumitas.com/Im CacheDiese Seite übersetzen',
-             snippet:
-              'Scraping Amazon Reviews using Headless Chrome Browser and Python3. Posted on Mi ... GoogleScraper Tutorial - How to scrape 1000 keywords with Google.',
-             visible_link: 'https://incolumitas.com/',
-             date: '',
-             rank: 8 },
-           { link: 'https://en.wikipedia.org/wiki/Search_engine_scraping',
-             title:
-              'Search engine scraping - Wikipediahttps://en.wikipedia.org/wiki/Search_engine_scrapingIm CacheDiese Seite übersetzen',
-             snippet:
-              'Search engine scraping is the process of harvesting URLs, descriptions, or other information from search engines such as Google, Bing or Yahoo. This is a ...',
-             visible_link: 'https://en.wikipedia.org/wiki/Search_engine_scraping',
-             date: '',
-             rank: 9 },
-           { link:
-              'https://readthedocs.org/projects/googlescraper/downloads/pdf/latest/',
-             title:
-              'GoogleScraper Documentation - Read the Docshttps://readthedocs.org/projects/googlescraper/downloads/.../latest...Im CacheDiese Seite übersetzen',
-             snippet:
-              '23.12.2018 - Contents: 1 GoogleScraper - Scraping search engines professionally. 1. 1.1 ... For this reason, I created the web service scrapeulous.com.',
-             visible_link:
-              'https://readthedocs.org/projects/googlescraper/downloads/.../latest...',
-             date: '23.12.2018 - ',
-             rank: 10 } ] },
-     '2':
-      { time: 'Tue, 29 Jan 2019 21:39:24 GMT',
-        num_results: 'Seite 2 von ungefähr 145 Ergebnissen (0,20 Sekunden) ',
-        no_results: false,
-        effective_query: '',
-        results:
-         [ { link: 'https://pypi.org/project/CountryGoogleScraper/',
-             title:
-              'CountryGoogleScraper · PyPIhttps://pypi.org/project/CountryGoogleScraper/Im CacheDiese Seite übersetzen',
-             snippet:
-              'A module to scrape and extract links, titles and descriptions from various search ... Look [here to get an idea how to use asynchronous mode](http://scrapeulous.',
-             visible_link: 'https://pypi.org/project/CountryGoogleScraper/',
-             date: '',
-             rank: 1 },
-           { link: 'https://www.youtube.com/watch?v=a6xn6rc9GbI',
-             title:
-              'scrapeulous intro - YouTubehttps://www.youtube.com/watch?v=a6xn6rc9GbIDiese Seite übersetzen',
-             snippet:
-              'scrapeulous intro. Scrapeulous Scrapeulous. Loading... Unsubscribe from ... on Dec 16, 2018. Introduction ...',
-             visible_link: 'https://www.youtube.com/watch?v=a6xn6rc9GbI',
-             date: '',
-             rank: 3 },
-           { link:
-              'https://www.reddit.com/r/Python/comments/2tii3r/scraping_260_search_queries_in_bing_in_a_matter/',
-             title:
-              'Scraping 260 search queries in Bing in a matter of seconds using ...https://www.reddit.com/.../scraping_260_search_queries_in_bing...Im CacheDiese Seite übersetzen',
-             snippet:
-              '24.01.2015 - Scraping 260 search queries in Bing in a matter of seconds using asyncio and aiohttp. (scrapeulous.com). submitted 3 years ago by ...',
-             visible_link:
-              'https://www.reddit.com/.../scraping_260_search_queries_in_bing...',
-             date: '24.01.2015 - ',
-             rank: 4 },
-           { link: 'https://twitter.com/incolumitas_?lang=de',
-             title:
-              'Nikolai Tschacher (@incolumitas_) | Twitterhttps://twitter.com/incolumitas_?lang=deIm CacheÄhnliche SeitenDiese Seite übersetzen',
-             snippet:
-              'Learn how to scrape millions of url from yandex and google or bing with: http://scrapeulous.com/googlescraper-market-analysis.html … 0 replies 0 retweets 0 ...',
-             visible_link: 'https://twitter.com/incolumitas_?lang=de',
-             date: '',
-             rank: 5 },
-           { link:
-              'http://blog.shodan.io/hostility-in-the-python-package-index/',
-             title:
-              'Hostility in the Cheese Shop - Shodan Blogblog.shodan.io/hostility-in-the-python-package-index/Im CacheDiese Seite übersetzen',
-             snippet:
-              '22.02.2015 - https://zzz.scrapeulous.com/r? According to the author of the website, these hostile packages are used as honeypots. Honeypots are usually ...',
-             visible_link: 'blog.shodan.io/hostility-in-the-python-package-index/',
-             date: '22.02.2015 - ',
-             rank: 6 },
-           { link: 'https://libraries.io/github/NikolaiT/GoogleScraper',
-             title:
-              'NikolaiT/GoogleScraper - Libraries.iohttps://libraries.io/github/NikolaiT/GoogleScraperIm CacheDiese Seite übersetzen',
-             snippet:
-              'A Python module to scrape several search engines (like Google, Yandex, Bing, ... https://scrapeulous.com/ ... You can install GoogleScraper comfortably with pip:',
-             visible_link: 'https://libraries.io/github/NikolaiT/GoogleScraper',
-             date: '',
-             rank: 7 },
-           { link: 'https://pydigger.com/pypi/CountryGoogleScraper',
-             title:
-              'CountryGoogleScraper - PyDiggerhttps://pydigger.com/pypi/CountryGoogleScraperDiese Seite übersetzen',
-             snippet:
-              '19.10.2016 - Look [here to get an idea how to use asynchronous mode](http://scrapeulous.com/googlescraper-260-keywords-in-a-second.html). ### Table ...',
-             visible_link: 'https://pydigger.com/pypi/CountryGoogleScraper',
-             date: '19.10.2016 - ',
-             rank: 8 },
-           { link: 'https://hub.docker.com/r/cimenx/data-mining-penandtest/',
-             title:
-              'cimenx/data-mining-penandtest - Docker Hubhttps://hub.docker.com/r/cimenx/data-mining-penandtest/Im CacheDiese Seite übersetzen',
-             snippet:
-              'Container. OverviewTagsDockerfileBuilds · http://scrapeulous.com/googlescraper-260-keywords-in-a-second.html. Docker Pull Command. Owner. profile ...',
-             visible_link: 'https://hub.docker.com/r/cimenx/data-mining-penandtest/',
-             date: '',
-             rank: 9 },
-           { link: 'https://www.revolvy.com/page/Search-engine-scraping',
-             title:
-              'Search engine scraping | Revolvyhttps://www.revolvy.com/page/Search-engine-scrapingIm CacheDiese Seite übersetzen',
-             snippet:
-              'Search engine scraping is the process of harvesting URLs, descriptions, or other information from search engines such as Google, Bing or Yahoo. This is a ...',
-             visible_link: 'https://www.revolvy.com/page/Search-engine-scraping',
-             date: '',
-             rank: 10 } ] } } }
-```
--- a/TODO.md
+++ b/TODO.md
@ -0,0 +1,88 @@
+### 24.12.2018
+    - fix interface to scrape() [DONE]
+    - add to Github
+
+
+### 24.1.2018
+    - fix issue #3: add functionality to add keyword file
+
+### 27.1.2019
+    - Add functionality to block images and CSS from loading as described here:
+        https://www.scrapehero.com/how-to-increase-web-scraping-speed-using-puppeteer/
+        https://www.scrapehero.com/how-to-build-a-web-scraper-using-puppeteer-and-node-js/
+
+### 29.1.2019
+    - implement proxy support functionality
+        - implement proxy check
+
+    - implement scraping more than 1 page
+        - do it for google
+        - and bing
+    - implement duckduckgo scraping
+
+
+### 30.1.2019
+    - modify all scrapers to use the generic class where it makes sense
+        - Bing, Baidu, Google, Duckduckgo
+
+### 7.2.2019
+    - add num_requests to test cases [done]
+
+### 25.2.2019
+    - https://antoinevastel.com/crawler/2018/09/20/parallel-crawler-puppeteer.html
+    - add support for browsing with multiple browsers, use this neat library:
+    - https://github.com/thomasdondorf/puppeteer-cluster [done]
+    
+    
+### 28.2.2019
+    - write test case for multiple browsers/proxies
+    - write test case and example for multiple tabs with bing
+    - make README.md nicer. https://github.com/thomasdondorf/puppeteer-cluster/blob/master/README.md as template
+
+
+### 11.6.2019
+    - TODO: fix amazon scraping
+    - change api of remaining test cases [done]
+    - TODO: implement custom search engine parameters on scrape()
+    
+### 12.6.2019
+    - remove unnecessary sleep() calls and replace with waitFor selectors
+
+
+### 16.7.2019
+
+- resolve issues
+    - fix this https://github.com/NikolaiT/se-scraper/issues/37 [done]
+    
+- use puppeteer stealth plugin: https://www.npmjs.com/package/puppeteer-extra-plugin-stealth
+
+    - we will need to load at the concurrency impl of puppeteer-cluster [no typescript support :(), I will not support this right now]
+
+- user random user agents plugin: https://github.com/intoli/user-agents [done]
+
+- add screenshot capability (make the screen after parsing)
+    - store as b64 [done]
+
+
+
+### 12.8.2019
+
+- add static test case for bing [done]
+- add options that minimize `html_output` flag: 
+    `clean_html_output` will remove all JS and CSS from the html 
+    `clean_data_images` removes all data images from the html
+    [done]
+    
+    
+### 13.8.2019
+- Write test case for clean html output [done]
+- Consider better compression algorithm. [done] There is the brotli algorithm, but this is only supported
+  in very recent versions of nodejs
+- what else can we remove from the dom [done] Removing comment nodes now! They are large in BING.
+- remove all whitespace and \n and \t from html
+
+### TODO:
+1. fix googlenewsscraper waiting for results and parsing. remove the static sleep [done]
+2. when using multiple browsers and random user agent, pass a random user agent to each perBrowserOptions
+
+3. dont create a new tab when opening a new scraper
--- a/TODO.txt
+++ b/TODO.txt
@ -1,45 +0,0 @@
-24.12.2018
-    - fix interface to scrape() [DONE]
-    - add to Github
-
-
-24.1.2018
-
-    - fix issue #3: add functionality to add keyword file
-
-27.1.2019
-
-    - Add functionality to block images and CSS from loading as described here:
-
-        https://www.scrapehero.com/how-to-increase-web-scraping-speed-using-puppeteer/
-        https://www.scrapehero.com/how-to-build-a-web-scraper-using-puppeteer-and-node-js/
-
-29.1.2019
-
-    - implement proxy support functionality
-        - implement proxy check
-
-    - implement scraping more than 1 page
-        - do it for google
-        - and bing
-
-    - implement duckduckgo scraping
-
-
-30.1.2019
-
-    - modify all scrapers to use the generic class where it makes sense
-        - Bing, Baidu, Google, Duckduckgo
-
-7.2.2019
-    - add num_requests to test cases [done]
-
-
-
-TODO:
-    - add captcha service solving support
-    - check if news instances run the same browser and if we can have one proxy per tab wokers
-
-    - write test case for:
-        - pluggable
-        - full metadata (log http headers, log ip address)
--- a/examples/bing_de.json
+++ b/examples/bing_de.json
--- a/examples/bing_multiple_browser_multiple_pages.js
+++ b/examples/bing_multiple_browser_multiple_pages.js
@ -0,0 +1,85 @@
+var fs = require('fs');
+var path = require('path');
+var os = require("os");
+
+const se_scraper = require('./../index.js');
+var filepath_de = path.join(__dirname, '/data/keywords_de.txt');
+
+function read_keywords_from_file(fpath) {
+    let kws =  fs.readFileSync(fpath).toString().split(os.EOL);
+    // clean keywords
+    kws = kws.filter((kw) => {
+        return kw.trim().length > 0;
+    });
+    return kws;
+}
+
+let keywords_de = read_keywords_from_file(filepath_de);
+
+const Cluster = {
+    CONCURRENCY_PAGE: 1, // shares cookies, etc.
+    CONCURRENCY_CONTEXT: 2, // no cookie sharing (uses contexts)
+    CONCURRENCY_BROWSER: 3, // no cookie sharing and individual processes (uses contexts)
+};
+
+// those options need to be provided on startup
+// and cannot give to se-scraper on scrape() calls
+let browser_config = {
+    // the user agent to scrape with
+    user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
+    // if random_user_agent is set to True, a random user agent is chosen
+    random_user_agent: true,
+    verbose: true,
+    // whether to start the browser in headless mode
+    headless: true,
+    is_local: false,
+    throw_on_detection: false,
+    puppeteer_cluster_config: {
+        headless: true,
+        timeout: 2 * 60 * 1000, // max timeout set to 2 minutes
+        monitor: false,
+        concurrency: 3, // one scraper per tab
+        maxConcurrency: 3, // scrape with 5 tabs
+    }
+};
+
+(async () => {
+    // scrape config can change on each scrape() call
+    let scrape_config_bing_de = {
+        // which search engine to scrape
+        search_engine: 'bing',
+        // an array of keywords to scrape
+        keywords: keywords_de,
+        // the number of pages to scrape for each keyword
+        num_pages: 10,
+
+        // OPTIONAL PARAMS BELOW:
+        // https://docs.microsoft.com/en-us/rest/api/cognitiveservices-bingsearch/bing-web-api-v5-reference#query-parameters
+        bing_settings: {
+            cc: 'DE', // The cc parameter determines the country to use for the query.
+            mkt: 'de-DE', // The mkt parameter determines the UI language to return results.
+            offset: 0, // Determines the results offset to use, defaults to 0.
+            count: 20, // Determines the number of results to show, defaults to 10. Maximum is 100.
+        },
+        // how long to sleep between requests. a random sleep interval within the range [a,b]
+        // is drawn before every request. empty string for no sleeping.
+        sleep_range: '',
+        // path to output file, data will be stored in JSON
+        output_file: 'examples/bing_de.json',
+        // whether to prevent images, css, fonts from being loaded
+        // will speed up scraping a great deal
+        block_assets: true,
+        // check if headless chrome escapes common detection techniques
+        // this is a quick test and should be used for debugging
+        test_evasion: false,
+        apply_evasion_techniques: true,
+        // log ip address data
+        log_ip_address: false,
+        // log http headers
+        log_http_headers: false,
+    };
+
+    let results = await se_scraper.scrape(browser_config, scrape_config_bing_de);
+    console.dir(results.metadata, {depth: null, colors: true});
+
+})();
--- a/examples/cleaned_html.js
+++ b/examples/cleaned_html.js
@ -0,0 +1,25 @@
+const se_scraper = require('./../index.js');
+const fs = require('fs');
+
+(async () => {
+
+	let kw = 'news iran'
+
+    let scrape_job = {
+        search_engine: 'baidu',
+        keywords: [kw],
+        num_pages: 1,
+        html_output: true,
+        // whether to strip JS and CSS from the html_output
+        // has only an effect if `html_output` is true
+        clean_html_output: true,
+        // remove all data images from the html
+        clean_data_images: true,
+    };
+
+    var response = await se_scraper.scrape({}, scrape_job);
+
+    console.dir(response, {depth: null, colors: true});
+
+    fs.writeFileSync('example_cleaned.html', response.results[kw]['1']['html']);
+})();
--- a/examples/custom_scraper.js
+++ b/examples/custom_scraper.js
@ -0,0 +1,119 @@
+const se_scraper = require('./../index.js');
+
+/*
+ * This example shows how you can define your custom scraper class and use it
+ * within se-scraper.
+ */
+class EcosiaScraper extends se_scraper.Scraper {
+
+    constructor(...args) {
+        super(...args);
+    }
+
+    async parse_async(html) {
+        // In this example we use vanilla javascript to parse out the
+        // interesting information from the search engine
+
+        // you may also use a external library such as cheerio.
+
+        return await this.page.evaluate(() => {
+           var results = {
+               num_results: '',
+               no_results: false,
+               effective_query: '',
+               results: [],
+           };
+
+           document.querySelectorAll('.results .result').forEach((result) => {
+              var serp = {};
+              var title =  result.querySelector('.result-title');
+              if (title) {
+                  serp.title = title.innerText;
+                  serp.link = title.getAttribute('href');
+              }
+
+              var green = result.querySelector('.result-url');
+              if (green) {
+                  serp.green = green.getAttribute('href');
+              }
+
+              var snippet = result.querySelector('.result-snippet');
+
+              if (snippet) {
+                  serp.snippet = snippet.innerText;
+              }
+
+              results.results.push(serp);
+           });
+
+           var num_res = document.querySelector('.card-title-result-count');
+           if (num_res) {
+               results.num_results = num_res.innerText;
+           }
+
+           results.no_results = document.querySelector('.empty-result') != null;
+
+           var effective = document.querySelector('.query-context-text .result-title');
+
+           if (effective) {
+               results.effective_query = effective.innerText;
+           }
+
+           return results;
+        });
+    }
+
+    async load_start_page() {
+        let startUrl = 'https://www.ecosia.org/';
+
+        await this.page.goto(startUrl);
+
+        try {
+            await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
+        } catch (e) {
+            return false;
+        }
+
+        return true;
+    }
+
+    async search_keyword(keyword) {
+        const input = await this.page.$('input[name="q"]');
+        await this.set_input_value(`input[name="q"]`, keyword);
+        await this.sleep(50);
+        await input.focus();
+        await this.page.keyboard.press("Enter");
+    }
+
+    async next_page() {
+        let next_page_link = await this.page.$('.pagination-next', {timeout: 1000});
+        if (!next_page_link) {
+            return false;
+        }
+        await next_page_link.click();
+
+        return true;
+    }
+
+    async wait_for_results() {
+        await this.page.waitForSelector('.results .result', { timeout: this.STANDARD_TIMEOUT });
+    }
+
+    async detected() {
+        // check whether scraping was detected.
+    }
+}
+
+(async () => {
+
+    let scrape_job = {
+        search_engine: EcosiaScraper,
+        keywords: ['lets go boys'],
+        num_pages: 2,
+    };
+
+    var results = await se_scraper.scrape({headless: true}, scrape_job);
+
+    console.dir(results, {depth: null, colors: true});
+
+})();
--- a/examples/delete_comments.js
+++ b/examples/delete_comments.js
@ -0,0 +1,11 @@
+var nodeIterator = document.createNodeIterator(
+    document.body,
+    NodeFilter.SHOW_COMMENT,    
+    { acceptNode: function(node) { return NodeFilter.FILTER_ACCEPT; } }
+);
+
+// Remove all comment nodes
+while(nodeIterator.nextNode()){
+    var commentNode = nodeIterator.referenceNode;
+    commentNode.remove();
+}
--- a/examples/for_the_lulz.js
+++ b/examples/for_the_lulz.js
@ -0,0 +1,97 @@
+
+/*
+ * Do not run this, this is probably illegal in your country ;)
+ */
+
+const se_scraper = require('./../index.js');
+
+
+// generate some google dorks
+
+function genGoogleDorks(iter=4) {
+    let lulz_keywords = [];
+    ['seite', 'inicio', 'index'].forEach((x) => {
+        for (var i = 0; i < iter; i++) {
+            lulz_keywords.push(
+                'inurl:"' + x + '.php?id=' + Math.floor(Math.random() * 100) + '"'
+            )
+        }
+    });
+    return lulz_keywords;
+}
+
+const lulz_keywords = genGoogleDorks();
+console.log(lulz_keywords);
+
+
+// those options need to be provided on startup
+// and cannot give to se-scraper on scrape() calls
+let browser_config = {
+    // if random_user_agent is set to True, a random user agent is chosen
+    random_user_agent: true,
+    headless: true,
+    is_local: false,
+    throw_on_detection: false,
+    puppeteer_cluster_config: {
+        headless: true,
+        timeout: 2 * 60 * 1000, // max timeout set to 2 minutes
+        monitor: false,
+        concurrency: 3, // one scraper per tab
+        maxConcurrency: 4, // scrape with 4 tabs
+    }
+};
+
+(async () => {
+    // scrape config can change on each scrape() call
+    let lulz_config = {
+        // which search engine to scrape
+        search_engine: 'google',
+        // an array of keywords to scrape
+        keywords: lulz_keywords,
+        // the number of pages to scrape for each keyword
+        num_pages: 3,
+        // how long to sleep between requests. a random sleep interval within the range [a,b]
+        // is drawn before every request. empty string for no sleeping.
+        sleep_range: '',
+        // path to output file, data will be stored in JSON
+        output_file: 'goodboys.json',
+        // whether to prevent images, css, fonts from being loaded
+        // will speed up scraping a great deal
+        block_assets: true,
+        // check if headless chrome escapes common detection techniques
+        // this is a quick test and should be used for debugging
+        test_evasion: false,
+        apply_evasion_techniques: true,
+        // log ip address data
+        log_ip_address: false,
+        // log http headers
+        log_http_headers: false,
+    };
+
+    let results = await se_scraper.scrape(browser_config, lulz_config);
+
+    const all_links = [];
+
+    for (var kw in results) {
+        for (var page in results[kw]) {
+            for (var res of results[kw][page]['results']) {
+                all_links.push(res.link);
+            }
+        }
+    }
+
+    console.log(all_links);
+
+    for (var link of all_links) {
+        try {
+            const response = await got(link.replace(/(id=\d+)/g, "$1'"));
+            let html = response.body;
+            if (html.includes('error') || html.includes('mysql')) {
+                console.log('Got a mysql injection in ' + url);
+            }
+        } catch (error) {
+            console.log(error.response.statusCode);
+        }
+    }
+
+})();
--- a/examples/gimage.js
+++ b/examples/gimage.js
@ -0,0 +1,23 @@
+const se_scraper = require('./../src/node_scraper.js');
+
+(async () => {
+    let browser_config = {
+        output_file: '',
+    };
+
+    let scrape_job = {
+        search_engine: 'google_image',
+        keywords: ['manaslu', 'everest', 'pitcairn'],
+        num_pages: 1,
+    };
+
+    var scraper = new se_scraper.ScrapeManager(browser_config);
+
+    await scraper.start();
+
+    var results = await scraper.scrape(scrape_job);
+
+    console.dir(results, {depth: null, colors: true});
+
+    await scraper.quit();
+})();
--- a/examples/gnold.js
+++ b/examples/gnold.js
@ -0,0 +1,26 @@
+const se_scraper = require('./../src/node_scraper.js');
+
+(async () => {
+    let browser_config = {
+        output_file: 'examples/results/gnold.json',
+        google_news_old_settings: {
+            gl: 'us', // The gl parameter determines the Google country to use for the query.
+            hl: 'fr', // The hl parameter determines the Google UI language to return results.
+            start: 0, // Determines the results offset to use, defaults to 0.
+            num: 100, // Determines the number of results to show, defaults to 10. Maximum is 100.
+        },
+    };
+
+    let scrape_job = {
+        search_engine: 'google_news_old',
+        keywords: ['news world'],
+        num_pages: 1,
+    };
+
+    var scraper = new se_scraper.ScrapeManager(browser_config);
+    await scraper.start();
+
+    var results = await scraper.scrape(scrape_job);
+    console.dir(results, {depth: null, colors: true});
+    await scraper.quit();
+})();
--- a/examples/google_maps.js
+++ b/examples/google_maps.js
@ -0,0 +1,30 @@
+const se_scraper = require('./../src/node_scraper.js');
+
+(async () => {
+    let browser_config = {
+        output_file: 'examples/results/maps.json',
+        test_evasion: false,
+        block_assets: false,
+        headless: false,
+
+        google_maps_settings: {
+            scrape_in_detail: false,
+        }
+    };
+
+    let scrape_job = {
+        search_engine: 'google_maps',
+        keywords: ['Berlin Zahnarzt'],
+        num_pages: 1,
+    };
+
+    var scraper = new se_scraper.ScrapeManager(browser_config);
+
+    await scraper.start();
+
+    var results = await scraper.scrape(scrape_job);
+
+    console.dir(results, {depth: null, colors: true});
+
+    await scraper.quit();
+})();
--- a/examples/minimal.js
+++ b/examples/minimal.js
@ -0,0 +1,21 @@
+const se_scraper = require('./../index.js');
+
+(async () => {
+
+    let kws = [
+        'https://www.linkedin.com/in/aakanksha-majhi-b24a8449',
+        'https://www.linkedin.com/in/aakash-srivastava-7374a830',
+        'https://www.linkedin.com/in/aakash-tiwari-019b8569',
+    ];
+
+    let scrape_job = {
+        search_engine: 'google',
+        keywords: kws,
+        num_pages: 1,
+    };
+
+    var results = await se_scraper.scrape({}, scrape_job);
+
+    console.dir(results, {depth: null, colors: true});
+
+})();
--- a/examples/multiple_browsers.js
+++ b/examples/multiple_browsers.js
@ -0,0 +1,35 @@
+const se_scraper = require('./../src/node_scraper.js');
+
+(async () => {
+    let browser_config = {
+        search_engine: 'google',
+        random_user_agent: true,
+        is_local: false,
+        html_output: false,
+        throw_on_detection: false,
+        headless: true,
+        puppeteer_cluster_config: {
+            headless: true,
+            timeout: 30 * 60 * 1000, // max timeout set to 30 minutes
+            monitor: false,
+            concurrency: 3, // 3 == CONCURRENCY_BROWSER
+            maxConcurrency: 3, // 3 browsers will scrape
+        },
+    };
+
+    let scrape_job = {
+        search_engine: 'google',
+        keywords: ['news', 'mountain', 'what', 'are good', 'keyword', 'who', 'nice'],
+        num_pages: 1,
+    };
+
+    var scraper = new se_scraper.ScrapeManager(browser_config);
+
+    await scraper.start();
+
+    var results = await scraper.scrape(scrape_job);
+
+    console.dir(results, {depth: null, colors: true});
+
+    await scraper.quit();
+})();
--- a/examples/multiple_search_engines.js
+++ b/examples/multiple_search_engines.js
@ -1,35 +1,29 @@
-const se_scraper = require('../index.js');
+const se_scraper = require('./../src/node_scraper.js');

-async function multiple_search_engines() {
+(async () => {
+    let browser_config = {
+        random_user_agent: true,
+        write_meta_data: true,
+        sleep_range: '[1,1]',
+        headless: true,
+        output_file: `examples/results/multiple_search_engines.json`
+    };

-    var searchEnginesList = ['google', 'bing'];
+    let scrape_job = {
+        search_engine: 'google',
+        keywords: ['news', 'se-scraper'],
+        num_pages: 1,
+    };

-    for (let index = 0; index < searchEnginesList.length; index++) {
-        const searchEngine = searchEnginesList[index];
-        let config = {
-            random_user_agent: true,
-            write_meta_data: true,
-            sleep_range: '[1,1]',
-            search_engine: searchEngine,
-            debug: false,
-            verbose: false,
-            // the list of keywords to scrape
-            keywords: ['scrapeulous.com',],
-            // whether to start the browser in headless mode
-            headless: true,
-            output_file: `${searchEngine}.json`
-        };
+    var scraper = new se_scraper.ScrapeManager(browser_config);
+    await scraper.start();

-        await se_scraper.scrape(config, (err, response) => {
-            if (err) {
-                console.error(err)
-            }
-            console.dir(response.results, {
-                depth: null,
-                colors: true
-            });
-        });
+    for (var se of ['google', 'bing']) {
+        scrape_job.search_engine = se;
+        var results = await scraper.scrape(scrape_job);
+        console.dir(results, {depth: null, colors: true});
    }
-}

-multiple_search_engines();
+    await scraper.quit();
+})();
+
--- a/examples/multiple_tabs.js
+++ b/examples/multiple_tabs.js
@ -0,0 +1,134 @@
+const se_scraper = require('./../index.js');
+
+const Cluster = {
+    CONCURRENCY_PAGE: 1, // shares cookies, etc.
+    CONCURRENCY_CONTEXT: 2, // no cookie sharing (uses contexts)
+    CONCURRENCY_BROWSER: 3, // no cookie sharing and individual processes (uses contexts)
+};
+
+let keywords = ['New York',
+    'Los Angeles',
+    'Chicago',
+    'Houston',
+    'Philadelphia',
+    'Phoenix',
+    'San Antonio',
+    'San Diego',
+    'Dallas',
+    'San Jose',
+    'Austin',
+    'Indianapolis',
+    'Jacksonville',
+    'San Francisco',
+    'Columbus',
+    'Charlotte',
+    'Fort Worth',
+    'Detroit',
+    'El Paso',
+    'Memphis',
+    'Seattle',
+    'Denver',
+    'Washington',
+    'Boston',
+    'Nashville-Davidson',
+    'Baltimore',
+    'Oklahoma City',
+    'Louisville/Jefferson County',
+    'Portland',
+    'Las Vegas',
+    'Milwaukee',
+    'Albuquerque',
+    'Tucson',
+    'Fresno',
+    'Sacramento',
+    'Long Beach',
+    'Kansas City',
+    'Mesa',
+    'Virginia Beach',
+    'Atlanta',
+    'Colorado Springs',
+    'Omaha',
+    'Raleigh',
+    'Miami',
+    'Oakland',
+    'Minneapolis',
+    'Tulsa',
+    'Cleveland',
+    'Wichita',
+    'Arlington',
+    'New Orleans',
+    'Bakersfield',
+    'Tampa',
+    'Honolulu',
+    'Aurora',
+    'Anaheim',
+    'Santa Ana',
+    'St. Louis',
+    'Riverside',
+    'Corpus Christi',
+    'Lexington-Fayette',
+    'Pittsburgh',
+    'Anchorage',
+    'Stockton',
+    'Cincinnati',
+    'St. Paul',
+    'Toledo',
+    'Greensboro',
+    'Newark',
+    'Plano',
+    'Henderson',
+    'Lincoln',
+    'Buffalo',
+    'Jersey City',
+    'Chula Vista',
+    'Fort Wayne',
+    'Orlando',
+    'St. Petersburg',
+    'Chandler',
+    'Laredo',
+    'Norfolk',
+    'Durham',
+    'Madison',
+    'Lubbock',
+    'Irvine',
+    'Winston-Salem',
+    'Glendale',
+    'Garland',
+    'Hialeah',
+    'Reno',
+    'Chesapeake',
+    'Gilbert',
+    'Baton Rouge',
+    'Irving',
+    'Scottsdale',
+    'North Las Vegas',
+    'Fremont',
+    'Boise City',
+    'Richmond',
+    'San Bernardino'];
+
+let config = {
+    search_engine: 'bing',
+    debug: false,
+    verbose: true,
+    keywords: keywords,
+    num_pages: 1, // how many pages per keyword
+    output_file: 'examples/results/bing.json',
+    log_ip_address: false,
+    headless: true,
+    puppeteer_cluster_config: {
+        timeout: 10 * 60 * 1000, // max timeout set to 10 minutes
+        monitor: false,
+        concurrency: Cluster.CONCURRENCY_PAGE, // one scraper per tab
+        maxConcurrency: 7, // scrape with 7 tabs
+    }
+};
+
+function callback(err, response) {
+    if (err) {
+        console.error(err)
+    }
+    console.dir(response, {depth: null, colors: true});
+}
+
+se_scraper.scrape(config, callback);
--- a/examples/per_page_proxy.js
+++ b/examples/per_page_proxy.js
@ -0,0 +1,76 @@
+const puppeteer = require('puppeteer');
+const ProxyChain = require('proxy-chain');
+
+const ROUTER_PROXY = 'http://127.0.0.1:8000';
+
+// SEE: https://github.com/GoogleChrome/puppeteer/issues/678
+// Idea is: Setup a local router proxy that assigns requests identified by unique user-agent strings
+// distinct upstream proxies. With this way it is possible to use one proxy per chromium tab.
+// downside: not fast and efficient
+
+const uas = [
+    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36',
+    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
+];
+
+const proxies = ['http://142.93.57.147:3128', 'http://85.132.31.115:8181'];
+
+(async () => {
+    const browser = await puppeteer.launch({
+        headless: false,
+        args: [`--proxy-server=${ROUTER_PROXY}`],
+    });
+    const page1 = await browser.newPage();
+    const page2 = await browser.newPage();
+
+    try {
+        await page1.setUserAgent(uas[0]);
+        await page1.goto('https://www.whatsmyip.org/');
+    } catch (e) {
+        console.log(e);
+    }
+
+    try {
+        await page2.setUserAgent(uas[1]);
+        await page2.goto('https://www.whatsmyip.org/');
+    } catch (e) {
+        console.log(e);
+    }
+
+    //await browser.close();
+})();
+
+const server = new ProxyChain.Server({
+    // Port where the server the server will listen. By default 8000.
+    port: 8000,
+
+    // Enables verbose logging
+    verbose: true,
+
+    prepareRequestFunction: ({
+                                 request,
+                                 username,
+                                 password,
+                                 hostname,
+                                 port,
+                                 isHttp,
+                             }) => {
+        var upstreamProxyUrl;
+
+        if (request.headers['user-agent'] === uas[0]) {
+            upstreamProxyUrl = proxies[0];
+        }
+
+        if (request.headers['user-agent'] === uas[1]) {
+            upstreamProxyUrl = proxies[1];
+        }
+
+        console.log('Using proxy: ' + upstreamProxyUrl);
+
+        return { upstreamProxyUrl };
+    },
+});
+
+server.listen(() => {
+    console.log(`Router Proxy server is listening on port ${8000}`);
+});
--- a/examples/pluggable.js
+++ b/examples/pluggable.js
@ -9,17 +9,13 @@ module.exports = class Pluggable {
                '--disable-gpu',
                '--window-size=1920x1080',
                '--hide-scrollbars',
-                '--user-agent=Chrome',
+                '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3835.0 Safari/537.36',
            ],
-            userAgent = 'Chrome',
            headless = true,
        } = options;

        this.chromeFlags = chromeFlags;
-        this.userAgent = userAgent;
        this.headless = headless;
-
-        this.chromeFlags.push(this.userAgent);
    }

    async close_browser() {
@ -65,4 +61,9 @@ module.exports = class Pluggable {

        return this.browser;
    }
+
+    async do_work(page) {
+        // do some scraping work and return results and num_requests
+
+    }
 };
--- a/examples/pluggable_example.js
+++ b/examples/pluggable_example.js
@ -0,0 +1,31 @@
+const se_scraper = require('./../src/node_scraper.js');
+const resolve = require('path').resolve;
+
+(async () => {
+    let browser_config = {
+        test_evasion: false,
+        log_http_headers: true,
+        log_ip_address: true,
+        random_user_agent: false,
+        apply_evasion_techniques: false,
+        screen_output: false,
+        custom_func: resolve('./examples/pluggable.js'),
+        headless: false,
+    };
+
+    let scrape_job = {
+        search_engine: 'google',
+        keywords: ['news usa'],
+        num_pages: 1,
+    };
+
+    var scraper = new se_scraper.ScrapeManager(browser_config);
+
+    await scraper.start();
+
+    var results = await scraper.scrape(scrape_job);
+
+    console.dir(results, {depth: null, colors: true});
+
+    await scraper.quit();
+})();
--- a/examples/proxies.js
+++ b/examples/proxies.js
@ -0,0 +1,29 @@
+const se_scraper = require('./../src/node_scraper.js');
+
+(async () => {
+    let browser_config = {
+        output_file: 'examples/results/proxyresults.json',
+        log_ip_address: true,
+        // a file with one proxy per line. Example:
+        // socks5://78.94.172.42:1080
+        // http://118.174.233.10:48400
+        proxy_file: '/home/nikolai/.proxies', // one proxy per line
+        // whether to use proxies only
+        // when this is set to true, se-scraper will not use
+        // your default IP address in a browser
+        use_proxies_only: true,
+    };
+
+    let scrape_job = {
+        search_engine: 'google',
+        keywords: ['news', 'some stuff', 'i work too much', 'what to do?', 'javascript is hard'],
+        num_pages: 1,
+    };
+
+    var scraper = new se_scraper.ScrapeManager(browser_config);
+    await scraper.start();
+
+    var results = await scraper.scrape(scrape_job);
+    console.dir(results, {depth: null, colors: true});
+    await scraper.quit();
+})();
--- a/examples/quickstart.js
+++ b/examples/quickstart.js
@ -1,17 +1,36 @@
-const se_scraper = require('./../index.js');
+const se_scraper = require('./../src/node_scraper.js');

-let config = {
-    search_engine: 'duckduckgo',
-    debug: false,
-    verbose: false,
-    keywords: ['news'],
-    num_pages: 2,
-    output_file: 'data.json',
-};
+(async () => {
+    let browser_config = {
+        test_evasion: false,
+        log_http_headers: false,
+        log_ip_address: false,
+        random_user_agent: false,
+        apply_evasion_techniques: true,
+        screen_output: false,
+        html_output: false,
+        clean_html_output: true,
+    };

-function callback(err, response) {
-    if (err) { console.error(err) }
-    console.dir(response, {depth: null, colors: true});
-}
+    let scrape_job = {
+        search_engine: 'google',
+        keywords: ['buy a nice car'],
+        num_pages: 1,
+        google_settings: {
+            "gl": "us",
+            "hl": "en",
+            "start": 0,
+            "num": 10
+        }
+    };

-se_scraper.scrape(config, callback);
+    var scraper = new se_scraper.ScrapeManager(browser_config);
+
+    await scraper.start();
+
+    var results = await scraper.scrape(scrape_job);
+
+    console.dir(results, {depth: null, colors: true});
+
+    await scraper.quit();
+})();
--- a/examples/reusing.js
+++ b/examples/reusing.js
@ -0,0 +1,30 @@
+const se_scraper = require('./../src/node_scraper.js');
+
+(async () => {
+    let browser_config = {
+        output_file: 'examples/results/data.json',
+    };
+
+    let scrape_job = {
+        search_engine: 'google',
+        keywords: ['news', 'se-scraper'],
+        num_pages: 1,
+    };
+
+    let scrape_job2 = {
+        search_engine: 'bing',
+        keywords: ['test', 'what a wonderful world'],
+        num_pages: 1,
+    };
+
+    var scraper = new se_scraper.ScrapeManager(browser_config);
+    await scraper.start();
+
+    var results = await scraper.scrape(scrape_job);
+    console.dir(results, {depth: null, colors: true});
+
+    var results2 = await scraper.scrape(scrape_job2);
+    console.dir(results2, {depth: null, colors: true});
+
+    await scraper.quit();
+})();
--- a/examples/test_cluster.js
+++ b/examples/test_cluster.js
@ -0,0 +1,87 @@
+const { Cluster } = require('../../puppeteer-cluster/dist/index.js');
+var fs = require('fs');
+var os = require("os");
+
+const PROXY_FILE = '/home/nikolai/.proxies';
+
+function read_items_from_file(fname) {
+    let kws =  fs.readFileSync(fname).toString().split(os.EOL);
+    // clean keywords
+    kws = kws.filter((kw) => {
+        return kw.trim().length > 0;
+    });
+    return kws;
+}
+
+(async () => {
+
+    let browserArgs = [
+        '--disable-infobars',
+        '--window-position=0,0',
+        '--ignore-certifcate-errors',
+        '--ignore-certifcate-errors-spki-list',
+        '--no-sandbox',
+        '--disable-setuid-sandbox',
+        '--disable-dev-shm-usage',
+        '--disable-accelerated-2d-canvas',
+        '--disable-gpu',
+        '--window-size=1920x1080',
+        '--hide-scrollbars',
+    ];
+
+    let proxies = read_items_from_file(PROXY_FILE);
+
+    console.dir(proxies);
+
+    // each new call to workerInstance() will
+    // left pop() one element from this list
+    // maxConcurrency should be equal to perBrowserOptions.length
+
+    // the first browser config with home IP
+    let perBrowserOptions = [{
+        headless: false,
+        ignoreHTTPSErrors: true,
+        args: browserArgs
+    }];
+
+    for (var proxy of proxies) {
+        perBrowserOptions.push({
+            headless: false,
+            ignoreHTTPSErrors: true,
+            args: browserArgs.concat(`--proxy-server=${proxy}`)
+        })
+    }
+
+    const cluster = await Cluster.launch({
+        monitor: true,
+        timeout: 12 * 60 * 60 * 1000, // 12 hours in ms
+        concurrency: Cluster.CONCURRENCY_BROWSER,
+        maxConcurrency: perBrowserOptions.length,
+        puppeteerOptions: {
+            headless: false,
+            args: browserArgs,
+            ignoreHTTPSErrors: true,
+        },
+        perBrowserOptions: perBrowserOptions
+    });
+
+    // Event handler to be called in case of problems
+    cluster.on('taskerror', (err, data) => {
+        console.log(`Error crawling ${data}: ${err.message}`);
+    });
+
+
+    await cluster.task(async ({ page, data: url }) => {
+        await page.goto(url, {waitUntil: 'domcontentloaded', timeout: 20000});
+        const pageTitle = await page.evaluate(() => document.title);
+        console.log(`Page title of ${url} is ${pageTitle}`);
+        console.log(await page.content());
+    });
+
+    for(var i = 0; i < perBrowserOptions.length; i++) {
+        await cluster.queue('http://ipinfo.io/json');
+    }
+
+    await cluster.idle();
+    await cluster.close();
+})();
--- a/examples/test_promise.js
+++ b/examples/test_promise.js
@ -0,0 +1,40 @@
+class Test {
+    constructor(options = {}) {
+        const {
+            config = {},
+        } = options;
+
+        this.config = config;
+    }
+
+    run(vars) {
+
+        console.log(this.config)
+    }
+}
+
+let o1 = new Test({config: {a: Math.random()}});
+let o2 = new Test({config: {a: Math.random()}});
+
+o1.run()
+o2.run()
+
+// (async () => {
+//
+//     let prom = [];
+//
+//     for (var i = 0; i < 3; i++) {
+//         var obj = new Test({
+//             config: {a: Math.random()},
+//         });
+//         prom.push(new Promise(resolve => {
+//             setTimeout(() => { new Test({
+//                 config: {a: Math.random()},
+//             }).run(); resolve() }, 1000);
+//         }));
+//     }
+//
+//     let res = await Promise.all(prom);
+//     console.log(res);
+//
+// })();
--- a/examples/test_proxyflag.js
+++ b/examples/test_proxyflag.js
@ -0,0 +1,29 @@
+const puppeteer = require('puppeteer');
+
+(async () => {
+    const browser = await puppeteer.launch({
+        args: [
+            // SET PROXY HERE
+            '--proxy-server=socks5://IP:PORT',
+            '--disable-infobars',
+            '--window-position=0,0',
+            '--ignore-certifcate-errors',
+            '--ignore-certifcate-errors-spki-list',
+            '--disable-setuid-sandbox',
+            '--disable-dev-shm-usage',
+            '--disable-accelerated-2d-canvas',
+            '--disable-gpu',
+            '--window-size=1920x1080',
+            '--hide-scrollbars',
+            '--disable-notifications',
+            '--no-sandbox',
+            '--user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/71.0.3578.98 Chrome/71.0.3578.98 Safari/537.36'
+        ],
+        headless: true
+    });
+    var page = await browser.newPage();
+    await page.setViewport({width: 1920, height: 926});
+    await page.goto('http://ipinfo.io/json');
+    console.log(await page.content());
+    await browser.close();
+})();
--- a/headless-test-result.png
+++ b/headless-test-result.png
--- a/index.js
+++ b/index.js
@ -1,81 +1,23 @@
-const handler = require('./src/node_scraper.js');
-var fs = require('fs');
-var os = require("os");
+const se_scraper = require('./src/node_scraper.js');
+var Scraper = require('./src/modules/se_scraper');

-exports.scrape = async function(config, callback) {
+async function scrape(browser_config, scrape_config) {
+    // scrape config overwrites the browser_config
+    Object.assign(browser_config, scrape_config);

-	// options for scraping
-	event = {
-		// the user agent to scrape with
-		user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
-		// if random_user_agent is set to True, a random user agent is chosen
-		random_user_agent: true,
-		// whether to select manual settings in visible mode
-		set_manual_settings: false,
-		// log ip address data
-		log_ip_address: false,
-		// log http headers
-		log_http_headers: false,
-		// how long to sleep between requests. a random sleep interval within the range [a,b]
-		// is drawn before every request. empty string for no sleeping.
-		sleep_range: '[1,1]',
-		// which search engine to scrape
-		search_engine: 'google',
-		compress: false, // compress
-		debug: false,
-		verbose: false,
-		keywords: ['scrapeulous.com'],
-		// whether to start the browser in headless mode
-		headless: true,
-		// the number of pages to scrape for each keyword
-		num_pages: 1,
-		// path to output file, data will be stored in JSON
-		output_file: '',
-		// whether to prevent images, css, fonts and media from being loaded
-		// will speed up scraping a great deal
-		block_assets: true,
-		// path to js module that extends functionality
-		// this module should export the functions:
-		// get_browser, handle_metadata, close_browser
-		//custom_func: resolve('examples/pluggable.js'),
-		custom_func: '',
-		// use a proxy for all connections
-		// example: 'socks5://78.94.172.42:1080'
-		// example: 'http://118.174.233.10:48400'
-		proxy: '',
-		// check if headless chrome escapes common detection techniques
-		// this is a quick test and should be used for debugging
-		test_evasion: false,
-	};
+    var scraper = new se_scraper.ScrapeManager(browser_config);

-	// overwrite default config
-	for (var key in config) {
-		event[key] = config[key];
-	}
+    await scraper.start();

-	if (fs.existsSync(event.keyword_file)) {
-		event.keywords = read_keywords_from_file(event.keyword_file);
-	}
+    var results = await scraper.scrape(scrape_config);

-	if (!callback) {
-		// called when results are ready
-		callback = function (err, response) {
-			if (err) {
-				console.error(err)
-			}
+    await scraper.quit();

-			console.dir(response.results, {depth: null, colors: true});
-		}
-	}
-
-	await handler.handler(event, undefined, callback );
-};
-
-function read_keywords_from_file(fname) {
-	let kws =  fs.readFileSync(fname).toString().split(os.EOL);
-	// clean keywords
-	kws = kws.filter((kw) => {
-		return kw.trim().length > 0;
-	});
-	return kws;
+    return results;
 }
+
+module.exports = {
+    scrape: scrape,
+    ScrapeManager: se_scraper.ScrapeManager,
+    Scraper: Scraper,
+};
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@ -1,16 +1,17 @@
 {
  "name": "se-scraper",
-  "version": "1.1.13",
-  "description": "A simple library using puppeteer to scrape several search engines such as Google, Duckduckgo and Bing.",
+  "version": "1.5.7",
+  "description": "A module using puppeteer to scrape several search engines such as Google, Bing and Duckduckgo",
  "homepage": "https://scrapeulous.com/",
  "main": "index.js",
  "scripts": {
-    "test": "mocha"
+    "test": "mocha test test/modules"
  },
  "keywords": [
    "scraping",
    "search-engines",
    "google",
+    "bing",
    "web-scraping"
  ],
  "author": "Nikolai Tschacher <hire@incolumitas.com> (https://incolumitas.com/)",
@ -20,9 +21,25 @@
  },
  "license": "ISC",
  "dependencies": {
-    "chai": "^4.2.0",
-    "cheerio": "^1.0.0-rc.2",
+    "cheerio": "^1.0.0-rc.3",
+    "debug": "^4.1.1",
    "got": "^9.6.0",
-    "puppeteer": "^1.12.2"
+    "lodash": "^4.17.14",
+    "puppeteer": "^2.0.0",
+    "puppeteer-cluster": "^0.18.0",
+    "puppeteer-extra": "^2.1.3",
+    "puppeteer-extra-plugin-stealth": "^2.2.2",
+    "user-agents": "^1.0.378",
+    "winston": "^3.2.1"
+  },
+  "devDependencies": {
+    "bluebird": "^3.7.2",
+    "chai": "^4.2.0",
+    "chai-string": "^1.5.0",
+    "express": "^4.17.1",
+    "http-mitm-proxy": "^0.8.2",
+    "key-cert": "^1.0.1",
+    "mocha": "^6.1.4",
+    "ua-parser-js": "^0.7.21"
  }
 }
--- a/run.js
+++ b/run.js
@ -1,35 +1,22 @@
 const se_scraper = require('./index.js');
-const resolve = require('path').resolve;

-let config = {
+// those options need to be provided on startup
+// and cannot give to se-scraper on scrape() calls
+let browser_config = {
    // the user agent to scrape with
    user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
    // if random_user_agent is set to True, a random user agent is chosen
-    random_user_agent: true,
-    // how long to sleep between requests. a random sleep interval within the range [a,b]
-    // is drawn before every request. empty string for no sleeping.
-    sleep_range: '[1,2]',
-    // which search engine to scrape
-    search_engine: 'google',
-    // whether debug information should be printed
-    // debug info is useful for developers when debugging
-    debug: false,
-    // whether verbose program output should be printed
-    // this output is informational
-    verbose: true,
-    // an array of keywords to scrape
-    keywords: ['news'],
-    // alternatively you can specify a keyword_file. this overwrites the keywords array
-    keyword_file: '',
-    // the number of pages to scrape for each keyword
-    num_pages: 1,
+    random_user_agent: false,
    // whether to start the browser in headless mode
-    headless: true,
-    // path to output file, data will be stored in JSON
-    output_file: 'data.json',
-    // whether to prevent images, css, fonts from being loaded
-    // will speed up scraping a great deal
-    block_assets: true,
+    headless: false,
+    // whether debug information should be printed
+    // level 0: print nothing
+    // level 1: print most important info
+    // ...
+    // level 4: print all shit nobody wants to know
+    debug_level: 1,
+    // specify flags passed to chrome here
+    chrome_flags: [],
    // path to js module that extends functionality
    // this module should export the functions:
    // get_browser, handle_metadata, close_browser
@ -40,26 +27,56 @@ let config = {
    // example: 'socks5://78.94.172.42:1080'
    // example: 'http://118.174.233.10:48400'
    proxy: '',
-    // check if headless chrome escapes common detection techniques
-    // this is a quick test and should be used for debugging
-    test_evasion: false,
-    // log ip address data
-    log_ip_address: true,
-    // log http headers
-    log_http_headers: true,
+    // a file with one proxy per line. Example:
+    // socks5://78.94.172.42:1080
+    // http://118.174.233.10:48400
+    proxy_file: '',
+    puppeteer_cluster_config: {
+        timeout: 10 * 60 * 1000, // max timeout set to 10 minutes
+        monitor: false,
+        concurrency: 1, // one scraper per tab
+        maxConcurrency: 1, // scrape with 1 tab
+    }
 };

-function callback(err, response) {
-    if (err) { console.error(err) }
+(async () => {
+    // scrape config can change on each scrape() call
+    let scrape_config = {
+        // which search engine to scrape
+        search_engine: 'duckduckgo',
+        // an array of keywords to scrape
+        keywords: ['cloud service'],
+        // the number of pages to scrape for each keyword
+        num_pages: 1,

-    /* response object has the following properties:
+        // OPTIONAL PARAMS BELOW:
+        // google_settings: {
+        //     gl: 'us', // The gl parameter determines the Google country to use for the query.
+        //     hl: 'fr', // The hl parameter determines the Google UI language to return results.
+        //     start: 0, // Determines the results offset to use, defaults to 0.
+        //     num: 100, // Determines the number of results to show, defaults to 10. Maximum is 100.
+        // },
+        // instead of keywords you can specify a keyword_file. this overwrites the keywords array
+        keyword_file: '',
+        // how long to sleep between requests. a random sleep interval within the range [a,b]
+        // is drawn before every request. empty string for no sleeping.
+        sleep_range: '',
+        // path to output file, data will be stored in JSON
+        output_file: '',
+        // whether to prevent images, css, fonts from being loaded
+        // will speed up scraping a great deal
+        block_assets: false,
+        // check if headless chrome escapes common detection techniques
+        // this is a quick test and should be used for debugging
+        test_evasion: false,
+        apply_evasion_techniques: true,
+        // log ip address data
+        log_ip_address: false,
+        // log http headers
+        log_http_headers: false,
+    };

-        response.results - json object with the scraping results
-        response.metadata - json object with metadata information
-        response.statusCode - status code of the scraping process
-     */
+    let results = await se_scraper.scrape(browser_config, scrape_config);
+    console.dir(results, {depth: null, colors: true});
+})();

-    // console.dir(response.results, {depth: null, colors: true});
-}
-
-se_scraper.scrape(config, callback);
--- a/se-scraper.iml
+++ b/se-scraper.iml
@ -2,7 +2,9 @@
 <module type="WEB_MODULE" version="4">
  <component name="NewModuleRootManager" inherit-compiler-output="true">
    <exclude-output />
-    <content url="file://$MODULE_DIR$" />
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/test/static_tests/html" />
+    </content>
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
--- a/src/concurrency-implementation.js
+++ b/src/concurrency-implementation.js
@ -0,0 +1,55 @@
+const { Browser } = require('puppeteer-cluster/dist/concurrency/builtInConcurrency');
+const debug = require('debug')('se-scraper:CustomConcurrency');
+const { timeoutExecute } = require('puppeteer-cluster/dist/util');
+
+const BROWSER_TIMEOUT = 5000;
+
+class CustomConcurrency extends Browser {
+
+    async init() {}
+    async close() {}
+
+    async workerInstance() {
+        const options = this.options.perBrowserOptions.shift();
+        debug('Launch puppeteer instance with options=%o', options);
+        let chrome = await this.puppeteer.launch(options);
+        let page;
+        let context;
+
+        return {
+            jobInstance: async () => {
+                await timeoutExecute(BROWSER_TIMEOUT, (async () => {
+                    context = await chrome.createIncognitoBrowserContext();
+                    page = await context.newPage();
+                })());
+
+                return {
+                    resources: {
+                        page,
+                    },
+
+                    close: async () => {
+                        await timeoutExecute(BROWSER_TIMEOUT, context.close());
+                    },
+                };
+            },
+
+            close: async () => {
+                await chrome.close();
+            },
+
+            repair: async () => {
+                debug('Starting repair');
+                try {
+                    // will probably fail, but just in case the repair was not necessary
+                    await chrome.close();
+                } catch (e) {}
+
+                // just relaunch as there is only one page per browser
+                chrome = await this.puppeteer.launch(options);
+            },
+        };
+    }
+};
+
+module.exports = CustomConcurrency;
--- a/src/modules/baidu.js
+++ b/src/modules/baidu.js
@ -1,78 +0,0 @@
-const cheerio = require('cheerio');
-const Scraper = require('./se_scraper');
-
-class BaiduScraper extends Scraper {
-	parse(html) {
-		// load the page source into cheerio
-		const $ = cheerio.load(html);
-
-		// perform queries
-		const results = [];
-		$('#content_left .result').each((i, link) => {
-			results.push({
-				link: $(link).find('h3 a').attr('href'),
-				title: $(link).find('h3').text(),
-				snippet: $(link).find('.c-abstract').text(),
-				visible_link: $(link).find('.f13').text(),
-			})
-		});
-
-		const cleaned = [];
-		for (var i=0; i < results.length; i++) {
-			let res = results[i];
-			if (res.link && res.link.trim()) {
-				res.rank = this.result_rank++;
-				cleaned.push(res);
-			}
-		}
-
-		return {
-			time: (new Date()).toUTCString(),
-			no_results: false,
-			num_results: $('.nums_text').text(),
-			results: cleaned,
-		}
-	}
-
-	async load_start_page() {
-		try {
-			await this.page.goto('https://www.baidu.com/');
-			await this.page.waitForSelector('input[name="wd"]', { timeout: 5000 });
-		} catch (e) {
-			return false;
-		}
-		return true;
-	}
-
-	async search_keyword(keyword) {
-		const input = await this.page.$('input[name="wd"]');
-		// overwrites last text in input
-		await input.click({ clickCount: 3 });
-		await input.type(keyword);
-		await input.focus();
-		await this.page.keyboard.press("Enter");
-	}
-
-	async next_page() {
-		let next_page_link = await this.page.$('.sb_pagN', {timeout: 1000});
-		if (!next_page_link) {
-			return false;
-		}
-		await next_page_link.click();
-		await this.page.waitForNavigation();
-
-		return true;
-	}
-
-	async wait_for_results() {
-		// TODO: very very bad, but nobody uses baidu, or does someone?
-		await this.sleep(2000);
-	}
-
-	async detected() {
-	}
-}
-
-module.exports = {
-	BaiduScraper: BaiduScraper,
-};
--- a/src/modules/bing.js
+++ b/src/modules/bing.js
@ -3,163 +3,238 @@ const Scraper = require('./se_scraper');

 class BingScraper extends Scraper {

-	parse(html) {
-		// load the page source into cheerio
-		const $ = cheerio.load(html);
+    async parse_async(html) {

-		// perform queries
-		const results = [];
-		$('#b_content #b_results .b_algo').each((i, link) => {
-			results.push({
-				link: $(link).find('h2 a').attr('href'),
-				title: $(link).find('h2').text(),
-				snippet: $(link).find('.b_caption p').text(),
-				visible_link: $(link).find('cite').text(),
-			})
-		});
+        let results = await this.page.evaluate(() => {

-		// 'Including results for', 'Einschließlich Ergebnisse'
-		let no_results = this.no_results(
-			['There are no results', 'Es gibt keine Ergebnisse'],
-			$('#b_results').text()
-		);
+            let _text = (el, s) => {
+                let n = el.querySelector(s);

-		let effective_query = $('#sp_requery a').first().text() || '';
+                if (n) {
+                    return n.innerText;
+                } else {
+                    return '';
+                }
+            };

-		const cleaned = [];
-		for (var i=0; i < results.length; i++) {
-			let res = results[i];
-			if (res.link && res.link.trim() && res.title && res.title.trim()) {
-				res.rank = this.result_rank++;
-				cleaned.push(res);
-			}
-		}
+            let _attr = (el, s, attr) => {
+                let n = el.querySelector(s);

-		return {
-			time: (new Date()).toUTCString(),
-			no_results: no_results,
-			effective_query: effective_query,
-			num_results: $('#b_content .sb_count').text(),
-			results: cleaned,
-		}
-	}
+                if (n) {
+                    return n.getAttribute(attr);
+                } else {
+                    return null;
+                }
+            };

-	async load_start_page() {
-		try {
-			await this.page.goto('https://www.bing.com/');
-		    await this.page.waitForSelector('input[name="q"]', { timeout: 5000 });
-		} catch (e) {
-		    return false;
-		}
-		return true;
-	}
+            let results = {
+                num_results: '',
+                no_results: false,
+                effective_query: '',
+                results: [],
+                ads: [],
+                right_side_ads: [],
+            };

-	async search_keyword(keyword) {
-		const input = await this.page.$('input[name="q"]');
-		await this.set_input_value(`input[name="q"]`, keyword);
-		await this.sleep(50);
-		await input.focus();
-		await this.page.keyboard.press("Enter");
-	}
+            let num_results_el = document.querySelector('#b_content .sb_count');

-	async next_page() {
-		let next_page_link = await this.page.$('.sb_pagN', {timeout: 1000});
-		if (!next_page_link) {
-		    return false;
-		}
-		await next_page_link.click();
-		await this.page.waitForNavigation();
+            if (num_results_el) {
+                results.num_results = num_results_el.innerText;
+            }

-		return true;
-	}
+            let organic_results = document.querySelectorAll('#b_content #b_results .b_algo');

-	async wait_for_results() {
-		await this.page.waitForSelector('#b_content', { timeout: 5000 });
-		await this.sleep(500);
-	}
+            organic_results.forEach((el) => {

-	async detected() {
-		// TODO: I was actually never detected by bing. those are good guys.
-	}
+                let serp_obj = {
+                    link: _attr(el, 'h2 a', 'href'),
+                    title: _text(el, 'h2'),
+                    snippet: _text(el, '.b_caption p'),
+                    visible_link: _text(el, 'cite'),
+                };
+
+                results.results.push(serp_obj);
+            });
+
+            // check if no results
+            results.no_results = (results.results.length === 0);
+
+            // parse bing ads
+            let ads = document.querySelectorAll('#b_results .b_ad .sb_add');
+
+            ads.forEach((el) => {
+
+                let ad_obj = {
+                    title: _text(el, 'h2 a'),
+                    snippet: _text(el, '.b_caption p'),
+                    visible_link: _text(el, '.b_adurl cite'),
+                    tracking_link: _attr(el, 'h2 a', 'href'),
+                };
+
+                results.ads.push(ad_obj);
+            });
+
+            // right side ads
+            let right_side_ads = document.querySelectorAll('#b_context .b_ad .sb_add');
+
+            right_side_ads.forEach((el) => {
+
+                let ad_obj = {
+                    title: _text(el, 'h2 a'),
+                    snippet: _text(el, '.b_caption p'),
+                    visible_link: _text(el, '.b_adurl cite'),
+                    tracking_link: _attr(el, 'h2 a', 'href'),
+                };
+
+                results.right_side_ads.push(ad_obj);
+            });
+
+
+            let effective_query_el = document.querySelector('#sp_requery a');
+
+            if (effective_query_el) {
+                results.effective_query = effective_query_el.innerText;
+            }
+
+            return results;
+        });
+
+        results.results = this.clean_results(results.results, ['title', 'link']);
+        results.ads = this.clean_results(results.ads, ['title', 'visible_link', 'tracking_link']);
+        results.time = (new Date()).toUTCString();
+        return results;
+    }
+
+    async load_start_page() {
+        let startUrl = this.build_start_url('https://www.bing.com/search?') || 'https://www.bing.com/';
+
+        if (this.config.bing_settings) {
+            startUrl = `https://www.${this.config.bing_settings.bing_domain}/search?`;
+            if (this.config.bing_settings.bing_domain) {
+                startUrl = `https://www.${this.config.bing_settings.bing_domain}/search?`;
+            } else {
+                startUrl = `https://www.bing.com/search?`;
+            }
+
+            for (var key in this.config.bing_settings) {
+                if (key !== 'bing_domain') {
+                    startUrl += `${key}=${this.config.bing_settings[key]}&`
+                }
+            }
+        }
+
+        await this.page.goto(startUrl);
+        await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
+        
+        return true;
+    }
+
+    async search_keyword(keyword) {
+        const input = await this.page.$('input[name="q"]');
+        await this.set_input_value(`input[name="q"]`, keyword);
+        await this.sleep(50);
+        await input.focus();
+        await this.page.keyboard.press("Enter");
+    }
+
+    async next_page() {
+        let next_page_link = await this.page.$('.sb_pagN', {timeout: 1000});
+        if (!next_page_link) {
+            return false;
+        }
+
+        this.last_response = await Promise.all([
+            next_page_link.click(), // The promise resolves after navigation has finished
+            this.page.waitForNavigation(), // Clicking the link will indirectly cause a navigation
+        ]);
+
+        return true;
+    }
+
+    async wait_for_results() {
+        await this.page.waitForSelector('#b_content', { timeout: this.STANDARD_TIMEOUT });
+    }
+
+    async detected() {
+        // TODO: I was actually never detected by bing. those are good boys.
+    }
 }


 class BingNewsScraper extends Scraper {

-	parse(html) {
-		// load the page source into cheerio
-		const $ = cheerio.load(html);
+    parse(html) {
+        // load the page source into cheerio
+        const $ = cheerio.load(html);

-		// perform queries
-		const results = [];
-		$('#algocore .newsitem').each((i, link) => {
-			results.push({
-				link: $(link).attr('url'),
-				title: $(link).find('a.title').text(),
-				snippet: $(link).find('.snippet').text(),
-				date: $(link).find('.source span').last().text(),
-			})
-		});
+        // perform queries
+        const results = [];
+        $('#algocore .newsitem').each((i, link) => {
+            results.push({
+                link: $(link).attr('url'),
+                title: $(link).find('a.title').text(),
+                snippet: $(link).find('.snippet').text(),
+                date: $(link).find('.source span').last().text(),
+            })
+        });

-		const cleaned = [];
-		for (var i=0; i < results.length; i++) {
-			let res = results[i];
-			if (res.link && res.link.trim() && res.title && res.title.trim()) {
-				res.rank = this.result_rank++;
-				cleaned.push(res);
-			}
-		}
+        const cleaned = this.clean_results(results, ['title', 'link']);

-		return {
-			time: (new Date()).toUTCString(),
-			results: cleaned,
-		}
-	}
+        return {
+            time: (new Date()).toUTCString(),
+            results: cleaned,
+        }
+    }

-	async load_start_page() {
-		try {
-			await this.page.goto('https://www.bing.com/news/search?');
-			if (this.config.set_manual_settings === true) {
-				console.log('Sleeping 30 seconds. Set your settings now.');
-				await this.sleep(30000);
-			}
-			await this.page.waitForSelector('input[name="q"]', { timeout: 5000 });
-		} catch (e) {
-			return false;
-		}
-		return true;
-	}
+    async load_start_page() {
+        let startUrl = 'https://www.bing.com/news/search?';

-	async search_keyword(keyword) {
-		const input = await this.page.$('input[name="q"]');
-		await this.set_input_value(`input[name="q"]`, keyword);
-		await this.sleep(50);
-		await input.focus();
-		await this.page.keyboard.press("Enter");
-	}
+        try {
+            await this.page.goto(startUrl);
+            if (this.config.set_manual_settings === true) {
+                console.log('Sleeping 30 seconds. Set your settings now.');
+                await this.sleep(30000);
+            }
+            await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
+        } catch (e) {
+            return false;
+        }

-	async next_page() {
-		let next_page_link = await this.page.$('.sb_pagN', {timeout: 1000});
-		if (!next_page_link) {
-			return false;
-		}
-		await next_page_link.click();
-		await this.page.waitForNavigation();
+        return true;
+    }

-		return true;
-	}
+    async search_keyword(keyword) {
+        const input = await this.page.$('input[name="q"]');
+        await this.set_input_value(`input[name="q"]`, keyword);
+        await this.sleep(50);
+        await input.focus();
+        await this.page.keyboard.press("Enter");
+    }

-	async wait_for_results() {
-		await this.page.waitForSelector('#news', { timeout: 5000 });
-		await this.sleep(2000);
-	}
+    async next_page() {
+        let next_page_link = await this.page.$('.sb_pagN', {timeout: 1000});
+        if (!next_page_link) {
+            return false;
+        }

-	async detected() {
-		// TODO: I was actually never detected by bing news.
-	}
+        this.last_response = await Promise.all([
+            next_page_link.click(), // The promise resolves after navigation has finished
+            this.page.waitForNavigation(), // Clicking the link will indirectly cause a navigation
+        ]);
+
+        return true;
+    }
+
+    async wait_for_results() {
+        await this.page.waitForSelector('#news', { timeout: this.STANDARD_TIMEOUT });
+    }
+
+    async detected() {
+        // TODO: I was actually never detected by bing news.
+    }
 }

 module.exports = {
-	BingNewsScraper: BingNewsScraper,
-	BingScraper: BingScraper,
-};
+    BingNewsScraper: BingNewsScraper,
+    BingScraper: BingScraper,
+};
--- a/src/modules/duckduckgo.js
+++ b/src/modules/duckduckgo.js
@ -1,15 +1,18 @@
 const cheerio = require('cheerio');
 const Scraper = require('./se_scraper');
+const debug = require('debug')('se-scraper:DuckduckgoScraper');

 class DuckduckgoScraper extends Scraper {

    parse(html) {
+        debug('parse');
        // load the page source into cheerio
        const $ = cheerio.load(html);

        // perform queries
        const results = [];
-        $('.result__body').each((i, link) => {
+        const organicSelector = ($('#links .result--sep').length > 0) ? `#links #rld-${this.page_num - 1} ~ .result .result__body` : '#links .result__body';
+        $(organicSelector).each((i, link) => {
            results.push({
                link: $(link).find('.result__title .result__a').attr('href'),
                title: $(link).find('.result__title .result__a').text(),
@ -19,35 +22,40 @@ class DuckduckgoScraper extends Scraper {
            });
        });

+        const ads = [];
+        $('.results--ads .result').each((i, element) => {
+            ads.push({
+                visible_link: $(element).find('.result__url').text(),
+                tracking_link: $(element).find('.result__title .result__a').attr('href'),
+                title: $(element).find('.result__title .result__a').text(),
+                snippet: $(element).find('.result__snippet').text(),
+            })
+        });
+
        let effective_query = $('a.js-spelling-suggestion-link').attr('data-query') || '';

-        const cleaned = [];
-        for (var i=0; i < results.length; i++) {
-            let res = results[i];
-            if (res.link && res.link.trim() && res.title && res.title.trim()) {
-                res.rank = this.result_rank++;
-                cleaned.push(res);
-            }
-        }
+        const cleaned = this.clean_results(results, ['title', 'link']);

        return {
            time: (new Date()).toUTCString(),
            effective_query: effective_query,
-            results: cleaned
+            results: cleaned,
+            ads: ads,
        }
    }

    async load_start_page() {
-        try {
-            await this.page.goto('https://duckduckgo.com/');
-            await this.page.waitForSelector('input[name="q"]', { timeout: 5000 });
-        } catch (e) {
-            return false;
-        }
+        debug('load_start_page');
+        let startUrl = 'https://duckduckgo.com/';
+
+        this.last_response = await this.page.goto(startUrl);
+        await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
+        
        return true;
    }

    async search_keyword(keyword) {
+        debug('search_keyword');
        const input = await this.page.$('input[name="q"]');
        await this.set_input_value(`input[name="q"]`, keyword);
        await this.sleep(50);
@ -56,90 +64,20 @@ class DuckduckgoScraper extends Scraper {
    }

    async next_page() {
-        let next_page_link = await this.page.$('a.result--more__btn', {timeout: 1000});
+        debug('next_page');
+        let next_page_link = await this.page.$('.result.result--more a', {timeout: this.STANDARD_TIMEOUT});
        if (!next_page_link) {
            return false;
        }
        await next_page_link.click();
-        //await this.page.waitForNavigation();
+        await this.page.waitForNavigation({ timeout: this.STANDARD_TIMEOUT });

        return true;
    }

    async wait_for_results() {
-        await this.page.waitForSelector('.serp__results', { timeout: 5000 });
-        await this.sleep(250);
-    }
-
-    async detected() {
-    }
-}
-
-
-class DuckduckgoNewsScraper extends Scraper {
-
-    parse(html) {
-        // load the page source into cheerio
-        const $ = cheerio.load(html);
-
-        // perform queries
-        const results = [];
-        $('.result--news').each((i, link) => {
-            results.push({
-                link: $(link).find('.result__title .result__a').attr('href'),
-                title: $(link).find('.result__title .result__a').text(),
-                date: $(link).find('.result__timestamp').text(),
-                snippet: $(link).find('.result__snippet').text(),
-            });
-        });
-
-        const cleaned = [];
-        for (var i=0; i < results.length; i++) {
-            let res = results[i];
-            if (res.link && res.link.trim() && res.title && res.title.trim()) {
-                res.rank = this.result_rank++;
-                cleaned.push(res);
-            }
-        }
-
-        return {
-            time: (new Date()).toUTCString(),
-            results: cleaned
-        }
-    }
-
-    async load_start_page() {
-        try {
-            await page.goto('https://duckduckgo.com/?q=42&t=h_&iar=news&ia=news');
-            await page.waitForSelector('input[name="q"]', { timeout: 5000 });
-        } catch (e) {
-            return false;
-        }
-        return true;
-    }
-
-    async search_keyword(keyword) {
-        const input = await this.page.$('input[name="q"]');
-        await this.set_input_value(`input[name="q"]`, keyword);
-        await this.sleep(50);
-        await input.focus();
-        await this.page.keyboard.press("Enter");
-    }
-
-    async next_page() {
-        let next_page_link = await this.page.$('.sb_pagN', {timeout: 1000});
-        if (!next_page_link) {
-            return false;
-        }
-        await next_page_link.click();
-        await this.page.waitForNavigation();
-
-        return true;
-    }
-
-    async wait_for_results() {
-        await this.page.waitForSelector('.serp__results', { timeout: 5000 });
-        await this.sleep(1500);
+        debug('wait_for_results');
+        await this.page.waitForSelector('.result__body', { timeout: this.STANDARD_TIMEOUT });
    }

    async detected() {
@ -147,6 +85,5 @@ class DuckduckgoNewsScraper extends Scraper {
 }

 module.exports = {
-    DuckduckgoNewsScraper: DuckduckgoNewsScraper,
    DuckduckgoScraper: DuckduckgoScraper,
 };
--- a/src/modules/google.js
+++ b/src/modules/google.js
--- a/src/modules/infospace.js
+++ b/src/modules/infospace.js
@ -41,8 +41,11 @@ class InfospaceScraper extends Scraper {
    }

    async load_start_page() {
+
+        let startUrl = this.build_start_url('http://search.infospace.com/search/web?') || 'http://infospace.com/index.html';
+
        try {
-            await this.page.goto('http://infospace.com/index.html');
+            this.last_response = await this.page.goto(startUrl);
            await this.page.waitForSelector('input[name="q"]', { timeout: 5000 });
        } catch (e) {
            return false;
@ -64,14 +67,13 @@ class InfospaceScraper extends Scraper {
            return false;
        }
        await next_page_link.click();
-        await this.page.waitForNavigation();
+        this.last_response = await this.page.waitForNavigation();

        return true;
    }

    async wait_for_results() {
        await this.page.waitForSelector('.mainline-results', { timeout: 5000 }); // TODO: this is not the best selector.
-        await this.sleep(250);
    }

    async detected() {
@ -98,14 +100,7 @@ class WebcrawlerNewsScraper extends Scraper {
            });
        });

-        const cleaned = [];
-        for (var i=0; i < results.length; i++) {
-            let res = results[i];
-            if (res.link && res.link.trim() && res.title && res.title.trim()) {
-                res.rank = this.result_rank++;
-                cleaned.push(res);
-            }
-        }
+        const cleaned = this.clean_results(results, ['title', 'link']);

        return {
            time: (new Date()).toUTCString(),
@ -115,7 +110,7 @@ class WebcrawlerNewsScraper extends Scraper {

    async load_start_page() {
        try {
-            await this.page.goto('https://www.webcrawler.com/?qc=news');
+            this.last_response = await this.page.goto('https://www.webcrawler.com/?qc=news');
            await this.page.waitForSelector('input[name="q"]', { timeout: 5000 });
        } catch (e) {
            return false;
@ -144,7 +139,6 @@ class WebcrawlerNewsScraper extends Scraper {

    async wait_for_results() {
        await this.page.waitForSelector('.mainline-results', { timeout: 5000 });
-        await this.sleep(150);
    }

    async detected() {
--- a/src/modules/metadata.js
+++ b/src/modules/metadata.js
@ -1,33 +1,31 @@
 const cheerio = require('cheerio');

 module.exports = {
-	get_ip_data: get_ip_data,
-	get_http_headers: get_http_headers,
+    get_ip_data: get_ip_data,
+    get_http_headers: get_http_headers,
 };

-async function get_ip_data(browser) {
-	const page = await browser.newPage();
-	await page.goto('https://ipinfo.io/json', {
-	  waitLoad: true, 
-	  waitNetworkIdle: true // defaults to false
-	});
-	let json = await page.content({
-		timeout: 20000
-	});
-	const $ = cheerio.load(json);
-	let ipinfo_text =  $('pre').text();
-	return JSON.parse(ipinfo_text);
+async function get_ip_data(page) {
+    await page.goto('https://ipinfo.io/json', {
+      waitLoad: true,
+      waitNetworkIdle: true
+    });
+    let json = await page.content({
+        timeout: 20000
+    });
+    const $ = cheerio.load(json);
+    let ipinfo_text =  $('pre').text();
+    return JSON.parse(ipinfo_text);
 }

-async function get_http_headers(browser) {
-	const page = await browser.newPage();
-	await page.goto('https://httpbin.org/get', {
-	  waitLoad: true, 
-	  waitNetworkIdle: true // defaults to false
-	});
-	let headers = await page.content();
+async function get_http_headers(page) {
+    await page.goto('https://httpbin.org/get', {
+      waitLoad: true,
+      waitNetworkIdle: true
+    });
+    let headers = await page.content();

-	const $ = cheerio.load(headers);
-	let headers_text =  $('pre').text();
-	return JSON.parse(headers_text);
+    const $ = cheerio.load(headers);
+    let headers_text =  $('pre').text();
+    return JSON.parse(headers_text);
 }
--- a/src/modules/se_scraper.js
+++ b/src/modules/se_scraper.js
@ -1,7 +1,6 @@
-const start_url = {
-    'google': ''
-};
-
+'use strict';
+const meta = require('./metadata.js');
+const debug = require('debug')('se-scraper:Scraper');
 /*
    Get useful JS knowledge and get awesome...

@ -11,21 +10,28 @@ const start_url = {

 module.exports = class Scraper {
    constructor(options = {}) {
+        debug('constructor');
        const {
-            browser = null,
            config = {},
            context = {},
            pluggable = null,
+            page = null,
        } = options;

+        this.page = page;
+        this.last_response = null; // the last response object
+        this.metadata = {
+            scraping_detected: false,
+        };
        this.pluggable = pluggable;
-        this.browser = browser;
        this.config = config;
+        this.logger = this.config.logger;
        this.context = context;

-        this.STANDARD_TIMEOUT = 8000;
-        // longer timeout when using proxies
-        this.PROXY_TIMEOUT = 15000;
+        this.proxy = config.proxy;
+        this.keywords = config.keywords;
+
+        this.STANDARD_TIMEOUT = 10000;
        this.SOLVE_CAPTCHA_TIME = 45000;

        this.results = {};
@ -34,20 +40,42 @@ module.exports = class Scraper {
        this.num_requests = 0;
        // keep track of the keywords searched
        this.num_keywords = 0;
+
+        let settings = this.config[`${this.config.search_engine}_settings`];
+        if (settings) {
+            if (typeof settings === 'string') {
+                settings = JSON.parse(settings);
+                this.config[`${this.config.search_engine}_settings`] = settings;
+            }
+        }
    }

-    async run() {
+    async run({page, data, worker}) {

-        let do_continue = await this.load_search_engine();
+        debug('worker=%o', worker, this.config.keywords);
+
+        if (page) {
+            this.page = page;
+        }
+
+        await this.page.setViewport({ width: 1920, height: 1040 });
+        let do_continue = true;
+
+        if (this.config.scrape_from_file.length <= 0) {
+            do_continue = await this.load_search_engine();
+        }

        if (!do_continue) {
            console.error('Failed to load the search engine: load_search_engine()');
-            return this.results;
+        } else {
+            await this.scraping_loop();
        }

-        await this.scraping_loop();
-
-        return this.results;
+        return {
+            results: this.results,
+            metadata: this.metadata,
+            num_requests: this.num_requests,
+        }
    }

    /**
@ -58,10 +86,10 @@ module.exports = class Scraper {
     */
    async load_search_engine() {

-        this.page = await this.browser.newPage();
-
-        // prevent detection by evading common detection techniques
-        await evadeChromeHeadlessDetection(this.page);
+        if (this.config.apply_evasion_techniques === true) {
+            // prevent detection by evading common detection techniques
+            await evadeChromeHeadlessDetection(this.page);
+        }

        // block some assets to speed up scraping
        if (this.config.block_assets === true) {
@ -79,12 +107,35 @@ module.exports = class Scraper {

        if (this.config.test_evasion === true) {
            // Navigate to the page that will perform the tests.
-            const testUrl = 'https://intoli.com/blog/' +
-                'not-possible-to-block-chrome-headless/chrome-headless-test.html';
+            const testUrl = 'https://bot.sannysoft.com';
            await this.page.goto(testUrl);
-
            // Save a screenshot of the results.
-            await this.page.screenshot({path: 'headless-test-result.png'});
+            await this.page.screenshot({path: 'headless-evasion-result.png'});
+        }
+
+        if (this.config.log_http_headers === true) {
+            this.metadata.http_headers = await meta.get_http_headers(this.page);
+            debug('this.metadata.http_headers=%O', this.metadata.http_headers);
+        }
+
+        if (this.config.log_ip_address === true) {
+            let ipinfo = await meta.get_ip_data(this.page);
+            this.metadata.ipinfo = ipinfo;
+            debug('this.metadata.ipinfo', this.metadata.ipinfo);
+        }
+
+        // check that our proxy is working by confirming
+        // that ipinfo.io sees the proxy IP address
+        if (this.proxy && this.config.log_ip_address === true) {
+            debug(`${this.metadata.ipinfo.ip} vs ${this.proxy}`);
+
+            // if the ip returned by ipinfo is not a substring of our proxystring, get the heck outta here
+            if (!this.proxy.includes(this.metadata.ipinfo.ip)) {
+                throw new Error(`Proxy output ip ${this.proxy} does not match with provided one`);
+            } else {
+                this.logger.info(`Using valid Proxy: ${this.proxy}`);
+            }
+
        }

        return await this.load_start_page();
@ -98,37 +149,39 @@ module.exports = class Scraper {
     * @returns {Promise<void>}
     */
    async scraping_loop() {
-        for (let keyword of this.config.keywords) {
+        for (var keyword of this.keywords) {
            this.num_keywords++;
            this.keyword = keyword;
            this.results[keyword] = {};
            this.result_rank = 1;

-            if (this.pluggable.before_keyword_scraped) {
-                await this.pluggable.before_keyword_scraped({
-                    num_keywords: this.num_keywords,
-                    num_requests: this.num_requests,
-                    keyword: keyword,
-                    page: this.page,
-                    config: this.config,
-                    context: this.context,
-                });
-            }
-
-            let page_num = 1;
-
            try {

-                await this.search_keyword(keyword);
+                if (this.pluggable && this.pluggable.before_keyword_scraped) {
+                    await this.pluggable.before_keyword_scraped({
+                        results: this.results,
+                        num_keywords: this.num_keywords,
+                        num_requests: this.num_requests,
+                        keyword: keyword,
+                    });
+                }
+
+                this.page_num = 1;
+
+                // load scraped page from file if `scrape_from_file` is given
+                if (this.config.scrape_from_file.length <= 0) {
+                    await this.search_keyword(keyword);
+                } else {
+                    this.last_response = await this.page.goto(this.config.scrape_from_file);
+                }
+
                // when searching the keyword fails, num_requests will not
                // be incremented.
                this.num_requests++;

                do {

-                    if (this.config.verbose === true) {
-                        console.log(`${this.config.search_engine} scrapes keyword "${keyword}" on page ${page_num}`);
-                    }
+                    this.logger.info(`${this.config.search_engine_name} scrapes keyword "${keyword}" on page ${this.page_num}`);

                    await this.wait_for_results();

@ -138,13 +191,66 @@ module.exports = class Scraper {

                    let html = await this.page.content();
                    let parsed = this.parse(html);
-                    this.results[keyword][page_num] = parsed ? parsed : await this.parse_async(html);
+                    this.results[keyword][this.page_num] = parsed ? parsed : await this.parse_async(html);

-                    page_num += 1;
+                    if (this.config.screen_output) {
+                        this.results[keyword][this.page_num].screenshot = await this.page.screenshot({
+                            encoding: 'base64',
+                            fullPage: false,
+                        });
+                    }
+
+                    if (this.config.html_output) {
+
+                        if (this.config.clean_html_output) {
+                            await this.page.evaluate(() => {
+                                // remove script and style tags
+                                Array.prototype.slice.call(document.getElementsByTagName('script')).forEach(
+                                  function(item) {
+                                    item.remove();
+                                });
+                                Array.prototype.slice.call(document.getElementsByTagName('style')).forEach(
+                                  function(item) {
+                                    item.remove();
+                                });
+
+                                // remove all comment nodes
+                                var nodeIterator = document.createNodeIterator(
+                                    document.body,
+                                    NodeFilter.SHOW_COMMENT,    
+                                    { acceptNode: function(node) { return NodeFilter.FILTER_ACCEPT; } }
+                                );
+                                while(nodeIterator.nextNode()){
+                                    var commentNode = nodeIterator.referenceNode;
+                                    commentNode.remove();
+                                }
+                            });
+                        }
+
+                        if (this.config.clean_data_images) {
+                            await this.page.evaluate(() => {
+                                Array.prototype.slice.call(document.getElementsByTagName('img')).forEach(
+                                  function(item) {
+                                    let src = item.getAttribute('src');
+                                    if (src && src.startsWith('data:')) {
+                                        item.setAttribute('src', '');
+                                    }
+                                });
+                            });
+                        }
+
+                        let html_contents = await this.page.content();
+                        // https://stackoverflow.com/questions/27841112/how-to-remove-white-space-between-html-tags-using-javascript
+                        // TODO: not sure if this is save!
+                        html_contents = html_contents.replace(/>\s+</g,'><');
+                        this.results[keyword][this.page_num].html = html_contents;
+                    }
+
+                    this.page_num += 1;

                    // only load the next page when we will pass the next iteration
                    // step from the while loop
-                    if (page_num <= this.config.num_pages) {
+                    if (this.page_num <= this.config.num_pages) {

                        let next_page_loaded = await this.next_page();

@ -155,36 +261,66 @@ module.exports = class Scraper {
                        }
                    }

-                } while (page_num <= this.config.num_pages);
+                } while (this.page_num <= this.config.num_pages);

            } catch (e) {

-                console.error(`Problem with scraping ${keyword} in search engine ${this.config.search_engine}: ${e}`);
+                this.logger.warn(`Problem with scraping ${keyword} in search engine ${this.config.search_engine_name}: ${e.message}`);
+                debug('this.last_response=%O', this.last_response);

-                if (await this.detected() === true) {
-                    console.error(`${this.config.search_engine} DETECTED the scraping!`);
+                if (this.config.take_screenshot_on_error) {
+                    await this.page.screenshot({ path: `debug_se_scraper_${this.config.search_engine_name}_${keyword}.png` });
+                }
+
+                this.metadata.scraping_detected = await this.detected();
+
+                if (this.metadata.scraping_detected === true) {
+                    this.logger.warn(`${this.config.search_engine_name} detected the scraping!`);

                    if (this.config.is_local === true) {
                        await this.sleep(this.SOLVE_CAPTCHA_TIME);
-                        console.error(`You have ${this.SOLVE_CAPTCHA_TIME}ms to enter the captcha.`);
+                        this.logger.info(`You have ${this.SOLVE_CAPTCHA_TIME}ms to enter the captcha.`);
                        // expect that user filled out necessary captcha
                    } else {
-                        break;
+                        if (this.config.throw_on_detection === true) {
+                            throw( e );
+                        } else {
+                            return;
+                        }
                    }
                } else {
                    // some other error, quit scraping process if stuff is broken
-                    if (this.config.is_local === true) {
-                        console.error('You have 30 seconds to fix this.');
-                        await this.sleep(30000);
+                    if (this.config.throw_on_detection === true) {
+                        throw( e );
                    } else {
-                        break;
+                        return;
                    }
                }
-
            }
        }
    }

+    /**
+     * Generic function to append queryArgs to a search engine url.
+     *
+     * @param: The baseUrl to use for the build process.
+     */
+    build_start_url(baseUrl) {
+        let settings = this.config[`${this.config.search_engine}_settings`];
+
+        if (settings) {
+            for (var key in settings) {
+                baseUrl += `${key}=${settings[key]}&`
+            }
+
+            this.logger.info('Using startUrl: ' + baseUrl);
+
+            return baseUrl;
+        }
+
+        return false;
+    }
+
    sleep(ms) {
        return new Promise(resolve => {
            setTimeout(resolve, ms)
@ -194,9 +330,7 @@ module.exports = class Scraper {
    async random_sleep() {
        const [min, max] = this.config.sleep_range;
        let rand = Math.floor(Math.random() * (max - min + 1) + min); //Generate Random number
-        if (this.config.debug === true) {
-            console.log(`Sleeping for ${rand}s`);
-        }
+        this.logger.info(`Sleeping for ${rand}s`);
        await this.sleep(rand * 1000);
    }

@ -210,15 +344,35 @@ module.exports = class Scraper {
    no_results(needles, html) {
        for (let needle of needles) {
            if (html.includes(needle)) {
-                if (this.config.debug) {
-                    console.log(`HTML contains needle ${needle}. no_results=true`);
-                }
+                this.logger.warn(`HTML contains needle ${needle}. no_results=true`);
                return true;
            }
        }
        return false;
    }

+    /*
+        Throw away all elements that do not have data in the
+        specified attributes. Most be of value string.
+     */
+    clean_results(results, attributes) {
+        const cleaned = [];
+        for (var res of results) {
+            let goodboy = true;
+            for (var attr of attributes) {
+                if (!res[attr] || !res[attr].trim()) {
+                    goodboy = false;
+                    break;
+                }
+            }
+            if (goodboy) {
+                res.rank = this.result_rank++;
+                cleaned.push(res);
+            }
+        }
+        return cleaned;
+    }
+
    parse(html) {

    }
@ -265,127 +419,131 @@ module.exports = class Scraper {

 // This is where we'll put the code to get around the tests.
 async function evadeChromeHeadlessDetection(page) {
-    // Pass the Webdriver Test.
-    await page.evaluateOnNewDocument(() => {
-        const newProto = navigator.__proto__;
-        delete newProto.webdriver;
-        navigator.__proto__ = newProto;
-    });

-    // Pass the Chrome Test.
-    await page.evaluateOnNewDocument(() => {
-        // We can mock this in as much depth as we need for the test.
-        const mockObj = {
-            app: {
-                isInstalled: false,
-            },
-            webstore: {
-                onInstallStageChanged: {},
-                onDownloadProgress: {},
-            },
-            runtime: {
-                PlatformOs: {
-                    MAC: 'mac',
-                    WIN: 'win',
-                    ANDROID: 'android',
-                    CROS: 'cros',
-                    LINUX: 'linux',
-                    OPENBSD: 'openbsd',
-                },
-                PlatformArch: {
-                    ARM: 'arm',
-                    X86_32: 'x86-32',
-                    X86_64: 'x86-64',
-                },
-                PlatformNaclArch: {
-                    ARM: 'arm',
-                    X86_32: 'x86-32',
-                    X86_64: 'x86-64',
-                },
-                RequestUpdateCheckStatus: {
-                    THROTTLED: 'throttled',
-                    NO_UPDATE: 'no_update',
-                    UPDATE_AVAILABLE: 'update_available',
-                },
-                OnInstalledReason: {
-                    INSTALL: 'install',
-                    UPDATE: 'update',
-                    CHROME_UPDATE: 'chrome_update',
-                    SHARED_MODULE_UPDATE: 'shared_module_update',
-                },
-                OnRestartRequiredReason: {
-                    APP_UPDATE: 'app_update',
-                    OS_UPDATE: 'os_update',
-                    PERIODIC: 'periodic',
-                },
-            },
-        };
-
-        window.navigator.chrome = mockObj;
-        window.chrome = mockObj;
-    });
-
-    // Pass the Permissions Test.
-    await page.evaluateOnNewDocument(() => {
-        const originalQuery = window.navigator.permissions.query;
-        window.navigator.permissions.__proto__.query = parameters =>
-            parameters.name === 'notifications'
-                ? Promise.resolve({state: Notification.permission})
-                : originalQuery(parameters);
-
-        // Inspired by: https://github.com/ikarienator/phantomjs_hide_and_seek/blob/master/5.spoofFunctionBind.js
-        const oldCall = Function.prototype.call;
-        function call() {
-            return oldCall.apply(this, arguments);
-        }
-        Function.prototype.call = call;
-
-        const nativeToStringFunctionString = Error.toString().replace(/Error/g, "toString");
-        const oldToString = Function.prototype.toString;
-
-        function functionToString() {
-            if (this === window.navigator.permissions.query) {
-                return "function query() { [native code] }";
-            }
-            if (this === functionToString) {
-                return nativeToStringFunctionString;
-            }
-            return oldCall.call(oldToString, this);
-        }
-        Function.prototype.toString = functionToString;
-    });
-
-    // Pass the Plugins Length Test.
-    await page.evaluateOnNewDocument(() => {
-        // Overwrite the `plugins` property to use a custom getter.
-        Object.defineProperty(navigator, 'plugins', {
-            // This just needs to have `length > 0` for the current test,
-            // but we could mock the plugins too if necessary.
-            get: () => [1, 2, 3, 4, 5]
+        // Pass the Webdriver Test.
+        await page.evaluateOnNewDocument(() => {
+            const newProto = navigator.__proto__;
+            delete newProto.webdriver;
+            navigator.__proto__ = newProto;
        });
-    });

-    // Pass the Languages Test.
-    await page.evaluateOnNewDocument(() => {
-        // Overwrite the `plugins` property to use a custom getter.
-        Object.defineProperty(navigator, 'languages', {
-            get: () => ['en-US', 'en']
+        // Pass the Chrome Test.
+        await page.evaluateOnNewDocument(() => {
+            // We can mock this in as much depth as we need for the test.
+            const mockObj = {
+                app: {
+                    isInstalled: false,
+                },
+                webstore: {
+                    onInstallStageChanged: {},
+                    onDownloadProgress: {},
+                },
+                runtime: {
+                    PlatformOs: {
+                        MAC: 'mac',
+                        WIN: 'win',
+                        ANDROID: 'android',
+                        CROS: 'cros',
+                        LINUX: 'linux',
+                        OPENBSD: 'openbsd',
+                    },
+                    PlatformArch: {
+                        ARM: 'arm',
+                        X86_32: 'x86-32',
+                        X86_64: 'x86-64',
+                    },
+                    PlatformNaclArch: {
+                        ARM: 'arm',
+                        X86_32: 'x86-32',
+                        X86_64: 'x86-64',
+                    },
+                    RequestUpdateCheckStatus: {
+                        THROTTLED: 'throttled',
+                        NO_UPDATE: 'no_update',
+                        UPDATE_AVAILABLE: 'update_available',
+                    },
+                    OnInstalledReason: {
+                        INSTALL: 'install',
+                        UPDATE: 'update',
+                        CHROME_UPDATE: 'chrome_update',
+                        SHARED_MODULE_UPDATE: 'shared_module_update',
+                    },
+                    OnRestartRequiredReason: {
+                        APP_UPDATE: 'app_update',
+                        OS_UPDATE: 'os_update',
+                        PERIODIC: 'periodic',
+                    },
+                },
+            };
+
+            window.navigator.chrome = mockObj;
+            window.chrome = mockObj;
        });
-    });

-    // Pass the iframe Test
-    await page.evaluateOnNewDocument(() => {
-        Object.defineProperty(HTMLIFrameElement.prototype, 'contentWindow', {
-            get: function() {
-                return window;
+        // Pass the Permissions Test.
+        await page.evaluateOnNewDocument(() => {
+            const originalQuery = window.navigator.permissions.query;
+            window.navigator.permissions.__proto__.query = parameters =>
+                parameters.name === 'notifications'
+                    ? Promise.resolve({state: Notification.permission})
+                    : originalQuery(parameters);
+
+            // Inspired by: https://github.com/ikarienator/phantomjs_hide_and_seek/blob/master/5.spoofFunctionBind.js
+            const oldCall = Function.prototype.call;
+
+            function call() {
+                return oldCall.apply(this, arguments);
            }
-        });
-    });

-    // Pass toString test, though it breaks console.debug() from working
-    await page.evaluateOnNewDocument(() => {
-        window.console.debug = () => {
-            return null;
-        };
-    });
-}
+            Function.prototype.call = call;
+
+            const nativeToStringFunctionString = Error.toString().replace(/Error/g, "toString");
+            const oldToString = Function.prototype.toString;
+
+            function functionToString() {
+                if (this === window.navigator.permissions.query) {
+                    return "function query() { [native code] }";
+                }
+                if (this === functionToString) {
+                    return nativeToStringFunctionString;
+                }
+                return oldCall.call(oldToString, this);
+            }
+
+            Function.prototype.toString = functionToString;
+        });
+
+        // Pass the Plugins Length Test.
+        await page.evaluateOnNewDocument(() => {
+            // Overwrite the `plugins` property to use a custom getter.
+            Object.defineProperty(navigator, 'plugins', {
+                // This just needs to have `length > 0` for the current test,
+                // but we could mock the plugins too if necessary.
+                get: () => [1, 2, 3, 4, 5]
+            });
+        });
+
+        // Pass the Languages Test.
+        await page.evaluateOnNewDocument(() => {
+            // Overwrite the `plugins` property to use a custom getter.
+            Object.defineProperty(navigator, 'languages', {
+                get: () => ['en-US', 'en']
+            });
+        });
+
+        // Pass the iframe Test
+        await page.evaluateOnNewDocument(() => {
+            Object.defineProperty(HTMLIFrameElement.prototype, 'contentWindow', {
+                get: function () {
+                    return window;
+                }
+            });
+        });
+
+        // Pass toString test, though it breaks console.debug() from working
+        await page.evaluateOnNewDocument(() => {
+            window.console.debug = () => {
+                return null;
+            };
+        });
+}
--- a/src/modules/ticker_search.js
+++ b/src/modules/ticker_search.js
@ -1,215 +0,0 @@
-const cheerio = require('cheerio');
-const Scraper = require('./se_scraper');
-
-class YahooFinanceScraper extends Scraper {
-
-    parse(html) {
-        // load the page source into cheerio
-        const $ = cheerio.load(html);
-
-        const results = [];
-        $('.js-stream-content .Cf').each((i, link) => {
-            results.push({
-                link: $(link).find('h3 a').attr('href'),
-                title: $(link).find('h3').text(),
-                snippet: $(link).find('p').text(),
-            })
-        });
-
-        return {
-            time: (new Date()).toUTCString(),
-            results: results,
-        }
-    }
-
-    async load_start_page() {
-        try {
-            await this.page.goto('https://finance.yahoo.com/');
-            for (var i = 0; i < 3; i++) {
-                let consent = await this.page.waitForSelector('[type="submit"]');
-                await consent.click();
-            }
-        } catch (e) {
-            return false;
-        }
-        return true;
-    }
-
-    async search_keyword(keyword) {
-        await this.page.goto(`https://finance.yahoo.com/quote/${keyword}/news?p=${keyword}`);
-        await this.page.waitForSelector('#quote-header-info', { timeout: 8000 });
-        await this.sleep(1000);
-    }
-
-    async next_page() {
-        return false;
-    }
-
-    async wait_for_results() {
-        await this.page.waitForSelector('#b_content', { timeout: 5000 });
-        await this.sleep(500);
-    }
-
-    async detected() {
-    }
-}
-
-class MarketwatchFinanceScraper extends Scraper {
-
-    async parse_async(html) {
-        let res = await this.page.evaluate(() => {
-            let results = [];
-            // get the hotel elements
-            let items = document.querySelectorAll('.article__content');
-            // get the hotel data
-            items.forEach((newsitem) => {
-                let data = {};
-                try {
-                    data.link = newsitem.querySelector('.article__headline a').getAttribute('href');
-                    data.title = newsitem.querySelector('.article__headline a').innerText;
-                    data.date = newsitem.querySelector('.article__timestamp').innerText;
-                    data.author = newsitem.querySelector('.article__author').innerText;
-                }
-                catch (exception) {
-                    console.error('Error parsing marketwatch data: ', exception);
-                }
-                results.push(data);
-            });
-            return results;
-        });
-
-        return {
-            time: (new Date()).toUTCString(),
-            results: res,
-        }
-    }
-
-    async load_start_page() {
-        return true;
-    }
-
-    async search_keyword(keyword) {
-        await this.page.goto(`https://www.marketwatch.com/investing/stock/${keyword}`);
-    }
-
-    async next_page() {
-        return false;
-    }
-
-    async wait_for_results() {
-        await this.page.waitForSelector('.intraday__data', { timeout: 8000 });
-        await this.sleep(500);
-    }
-
-    async detected() {
-    }
-}
-
-class ReutersFinanceScraper extends Scraper {
-
-    async parse_async(html) {
-        let newsData = await this.page.evaluate(() => {
-            let results = [];
-            // get the hotel elements
-            let items = document.querySelectorAll('div.feature');
-            // get the hotel data
-            items.forEach((newsitem) => {
-                let data = {};
-                try {
-                    data.link = newsitem.querySelector('h2 a').getAttribute('href');
-                    data.link = 'https://www.reuters.com' + data.link;
-                    data.title = newsitem.querySelector('h2 a').innerText;
-                    data.snippet = newsitem.querySelector('p').innerText;
-                    data.date = newsitem.querySelector('.timestamp').innerText;
-                }
-                catch (exception) {
-                    console.error('Error parsing reuters data: ', exception);
-                }
-                results.push(data);
-            });
-            return results;
-        });
-
-        return {
-            time: (new Date()).toUTCString(),
-            results: newsData,
-        }
-    }
-
-    async load_start_page() {
-        return true;
-    }
-
-    async search_keyword(keyword) {
-        await this.page.goto(`https://www.reuters.com/finance/stocks/overview/${keyword}`);
-    }
-
-    async next_page() {
-        return false;
-    }
-
-    async wait_for_results() {
-        await this.page.waitForSelector('#sectionHeader', { timeout: 8000 });
-        await this.sleep(500);
-    }
-
-    async detected() {
-    }
-}
-
-class CnbcFinanceScraper extends Scraper {
-
-    async parse_async(html) {
-        let newsData = await this.page.evaluate(() => {
-            let results = [];
-            // get the hotel elements
-            let items = document.querySelectorAll('div.headline');
-            // get the hotel data
-            items.forEach((newsitem) => {
-                let data = {};
-                try {
-                    data.link = newsitem.querySelector('a').getAttribute('href');
-                    data.title = newsitem.querySelector('[ng-bind="asset.headline"]').innerText;
-                    data.date = newsitem.querySelector('span.note').innerText;
-                }
-                catch (exception) {
-                    console.error('Error parsing cnbc data: ', exception);
-                }
-                results.push(data);
-            });
-            return results;
-        });
-
-        return {
-            time: (new Date()).toUTCString(),
-            results: newsData,
-        }
-    }
-
-    async load_start_page() {
-        return true;
-    }
-
-    async search_keyword(keyword) {
-        await this.page.goto(`https://www.cnbc.com/quotes/?symbol=${keyword}&tab=news`);
-    }
-
-    async next_page() {
-        return false;
-    }
-
-    async wait_for_results() {
-        await this.page.waitForSelector('#quote_title_and_chart', { timeout: 8000 });
-        await this.sleep(500);
-    }
-
-    async detected() {
-    }
-}
-
-module.exports = {
-    YahooFinanceScraper: YahooFinanceScraper,
-    ReutersFinanceScraper: ReutersFinanceScraper,
-    CnbcFinanceScraper: CnbcFinanceScraper,
-    MarketwatchFinanceScraper: MarketwatchFinanceScraper,
-};
--- a/src/modules/user_agents.js
+++ b/src/modules/user_agents.js
@ -1,81 +0,0 @@
-module.exports = {
-	random_user_agent: random_user_agent,
-};
-
-function random_user_agent() {
-	let rand = user_agents[Math.floor(Math.random()*user_agents.length)];
-}
-
-// updated: 29 Jan 2019
-const user_agents = [
-	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-	'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0',
-	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-	'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.2 Safari/605.1.15',
-	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134',
-	'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0',
-	'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0',
-	'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:64.0) Gecko/20100101 Firefox/64.0',
-	'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-	'Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0',
-	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-	'Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0',
-	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.2 Safari/605.1.15',
-	'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
-	'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
-	'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:64.0) Gecko/20100101 Firefox/64.0',
-	'Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0',
-	'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0',
-	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
-	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-	'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 YaBrowser/18.11.1.805 Yowser/2.5 Safari/537.36',
-	'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
-	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Safari/605.1.15',
-	'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0',
-	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/18.17763',
-	'Mozilla/5.0 (iPad; CPU OS 12_1_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1',
-	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.1 Safari/605.1.15',
-	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 OPR/57.0.3098.116',
-	'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 YaBrowser/18.11.1.805 Yowser/2.5 Safari/537.36',
-	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299',
-	'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15',
-	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 OPR/57.0.3098.106',
-	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-	'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:64.0) Gecko/20100101 Firefox/64.0',
-	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.2 Safari/605.1.15',
-	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15',
-	'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0',
-	'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.3 Safari/605.1.15',
-	'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0',
-	'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/71.0.3578.98 Chrome/71.0.3578.98 Safari/537.36',
-	'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; Touch; rv:11.0) like Gecko',
-	'Mozilla/5.0 (Windows NT 6.1; rv:64.0) Gecko/20100101 Firefox/64.0',
-	'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 OPR/57.0.3098.116',
-	'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36',
-	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
-	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
-	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.3 Safari/605.1.15',
-	'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0',
-	'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0',
-	'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/71.0.3578.80 Chrome/71.0.3578.80 Safari/537.36',
-	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:64.0) Gecko/20100101 Firefox/64.0',
-	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36',
-	'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:63.0) Gecko/20100101 Firefox/63.0',
-	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:64.0) Gecko/20100101 Firefox/64.0',
-	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8',
-	'Mozilla/5.0 (Windows NT 10.0; rv:64.0) Gecko/20100101 Firefox/64.0',
-	'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',
-	'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0',
-	'Mozilla/5.0 (X11; CrOS x86_64 11151.59.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.94 Safari/537.36',
-	'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
-];
--- a/src/modules/yandex.js
+++ b/src/modules/yandex.js
@ -0,0 +1,114 @@
+'use strict';
+
+const Scraper = require('./se_scraper');
+
+class YandexScraper extends Scraper {
+
+    constructor(...args) {
+        super(...args);
+    }
+
+    async parse_async(html) {
+
+        let results = await this.page.evaluate(() => {
+            let serp_items =  document.querySelectorAll('.serp-item');
+            const data = [];
+            serp_items.forEach((item) => {
+                let obj = {
+                    is_ad: false,
+                };
+                try {
+                    if (item) {
+
+                        let linkElement = item.querySelector('h2 a.link');
+
+                        if (linkElement) {
+                            obj.link = linkElement.getAttribute('href');
+                            obj.title = linkElement.innerText;
+                        }
+
+
+                        let label = item.querySelector('.organic__subtitle .label');
+
+                        if (label) {
+                            let labelText = label.innerText;
+
+                            if (labelText) {
+                                labelText = labelText.trim().toLowerCase();
+                                console.log(labelText);
+                                let ad_labels = ['ad', 'werbung', 'реклама', 'anuncio'];
+                                obj.is_ad = ad_labels.includes(labelText);
+                            }
+                        }
+
+                        obj.snippet = item.querySelector('.text-container.typo').innerText;
+                        obj.visible_link = item.querySelector('.typo_type_greenurl').innerText;
+
+                        if (obj.title) {
+                            data.push(obj);
+                        }
+                    }
+                } catch (e) {
+                }
+            });
+            return data;
+        });
+
+        let num_results = await this.page.evaluate(() => {
+            let num_results =  document.querySelector('.serp-adv__found');
+            if (num_results) {
+                return num_results.innerText;
+            }
+        });
+
+        const cleaned = this.clean_results(results, ['title', 'link' , 'snippet']);
+
+        return {
+            time: (new Date()).toUTCString(),
+            num_results: num_results,
+            results: cleaned,
+        };
+    }
+
+    async load_start_page() {
+        let startUrl = 'https://yandex.com';
+
+        this.logger.info('Using startUrl: ' + startUrl);
+
+        this.last_response = await this.page.goto(startUrl);
+
+        await this.page.waitForSelector('input[name="text"]', { timeout: this.STANDARD_TIMEOUT });
+
+        return true;
+    }
+
+    async search_keyword(keyword) {
+        const input = await this.page.$('input[name="text"]');
+        await this.set_input_value(`input[name="text"]`, keyword);
+        await this.sleep(50);
+        await input.focus();
+        await this.page.keyboard.press("Enter");
+    }
+
+    async next_page() {
+        let next_page_link = await this.page.$('.pager .pager__item_kind_next', {timeout: 1000});
+        if (!next_page_link) {
+            return false;
+        }
+        await next_page_link.click();
+
+        return true;
+    }
+
+    async wait_for_results() {
+        await this.page.waitForSelector('.main__content', { timeout: this.STANDARD_TIMEOUT });
+    }
+
+    async detected() {
+
+    }
+}
+
+module.exports = {
+    YandexScraper: YandexScraper,
+};
--- a/src/modules/youtube.js
+++ b/src/modules/youtube.js
@ -1,105 +0,0 @@
-const cheerio = require('cheerio');
-const Scraper = require('./se_scraper');
-
-class YoutubeScraper extends Scraper {
-
-	parse(html) {
-		// load the page source into cheerio
-		const $ = cheerio.load(html);
-
-		// perform queries
-		const results = [];
-		$('#contents ytd-video-renderer,#contents ytd-grid-video-renderer').each((i, link) => {
-			results.push({
-				link: $(link).find('#video-title').attr('href'),
-				title: $(link).find('#video-title').text(),
-				snippet: $(link).find('#description-text').text(),
-				channel: $(link).find('#byline a').text(),
-				channel_link: $(link).find('#byline a').attr('href'),
-				num_views: $(link).find('#metadata-line span:nth-child(1)').text(),
-				release_date: $(link).find('#metadata-line span:nth-child(2)').text(),
-			})
-		});
-
-		let no_results = this.no_results(
-			['No results found', 'Keine Ergebnisse', 'Es werden Ergebnisse angezeigt', 'Showing results for' ],
-			$('yt-showing-results-for-renderer').text()
-		);
-
-		let effective_query = $('#corrected-link').text() || '';
-
-		const cleaned = [];
-		for (var i=0; i < results.length; i++) {
-			let res = results[i];
-			if (res.link && res.link.trim() && res.title && res.title.trim()) {
-				res.title = res.title.trim();
-				res.snippet = res.snippet.trim();
-				res.rank = this.result_rank++;
-
-				// check if this result has been used before
-				if (this.all_videos.has(res.title) === false) {
-					cleaned.push(res);
-				}
-				this.all_videos.add(res.title);
-			}
-		}
-
-		return {
-			time: (new Date()).toUTCString(),
-			no_results: no_results,
-			effective_query: effective_query,
-			num_results: '',
-			results: cleaned,
-		}
-	}
-
-	async load_start_page() {
-		try {
-			this.all_videos = new Set();
-			await this.page.goto('https://www.youtube.com', {
-				referer: 'https://google.com'
-			});
-			await this.page.waitForSelector('input[id="search"]', { timeout: 5000 });
-			// before we do anything, parse the results of the front page of youtube
-			await this.page.waitForSelector('ytd-video-renderer,ytd-grid-video-renderer', { timeout: 10000 });
-			await this.sleep(500);
-			let html = await this.page.content();
-			this.results['frontpage'] = this.parse(html);
-			this.result_rank = 1;
-		} catch(e) {
-			return false;
-		}
-		return true;
-	}
-
-	async search_keyword(keyword) {
-		const input = await this.page.$('input[id="search"]');
-		// overwrites last text in input
-		await input.click({ clickCount: 3 });
-		await input.type(keyword);
-		await input.focus();
-		await this.page.keyboard.press("Enter");
-	}
-
-	async next_page() {
-		// youtube needs scrolling
-		// TODO: implement scrolling, no priority right now
-		return false;
-	}
-
-	async wait_for_results() {
-		await this.page.waitForFunction(`document.title.indexOf('${this.keyword}') !== -1`, { timeout: 5000 });
-		await this.page.waitForSelector('ytd-video-renderer,ytd-grid-video-renderer', { timeout: 5000 });
-		await this.sleep(500);
-	}
-
-	async detected() {
-		const title = await this.page.title();
-		let html = await this.page.content();
-		return html.indexOf('detected unusual traffic') !== -1 || title.indexOf('/sorry/') !== -1;
-	}
-}
-
-module.exports = {
-	YoutubeScraper: YoutubeScraper,
-};
--- a/src/node_scraper.js
+++ b/src/node_scraper.js
@ -1,291 +1,411 @@
-const puppeteer = require('puppeteer');
-const zlib = require('zlib');
-var fs = require('fs');
+'use strict';

-// local module imports
+const fs = require('fs');
+const os = require('os');
+const _ = require('lodash');
+const { createLogger, format, transports } = require('winston');
+const { combine, timestamp, printf } = format;
+const debug = require('debug')('se-scraper:ScrapeManager');
+const { Cluster } = require('puppeteer-cluster');
+
+const UserAgent = require('user-agents');
 const google = require('./modules/google.js');
 const bing = require('./modules/bing.js');
-const baidu = require('./modules/baidu.js');
+const yandex = require('./modules/yandex.js');
 const infospace = require('./modules/infospace.js');
-const youtube = require('./modules/youtube.js');
-const ua = require('./modules/user_agents.js');
-const meta = require('./modules/metadata.js');
 const duckduckgo = require('./modules/duckduckgo.js');
-const tickersearch = require('./modules/ticker_search.js');
+const CustomConcurrencyImpl = require('./concurrency-implementation');
+
+const MAX_ALLOWED_BROWSERS = 6;

 function write_results(fname, data) {
-	fs.writeFileSync(fname, data, (err) => {
-		if (err) throw err;
-		console.log(`Results written to file ${fname}`);
-	});
+    fs.writeFileSync(fname, data, (err) => {
+        if (err) throw err;
+        console.log(`Results written to file ${fname}`);
+    });
 }

-module.exports.handler = async function handler (event, context, callback) {
-	config = event;
-	pluggable = {};
-	if (config.custom_func) {
-		if (fs.existsSync(config.custom_func)) {
-			try {
-				Pluggable = require(config.custom_func);
-				pluggable = new Pluggable({config: config});
-			} catch (exception) {
-				console.error(exception);
-			}
-		} else {
-			console.error(`File "${config.custom_func}" does not exist...`);
-		}
-	}
+function read_keywords_from_file(fname) {
+    let kws =  fs.readFileSync(fname).toString().split(os.EOL);
+    // clean keywords
+    kws = kws.filter((kw) => {
+        return kw.trim().length > 0;
+    });
+    return kws;
+}

-	try {
-		const startTime = Date.now();
-		config = parseEventData(config);
-		if (config.debug === true) {
-			console.log(config);
-		}

-        var ADDITIONAL_CHROME_FLAGS = [
-			'--disable-infobars',
-			'--window-position=0,0',
-			'--ignore-certifcate-errors',
-			'--ignore-certifcate-errors-spki-list',
-			'--no-sandbox',
-			'--disable-setuid-sandbox',
-			'--disable-dev-shm-usage',
-			'--disable-accelerated-2d-canvas',
-			'--disable-gpu',
-			'--window-size=1920x1080',
-            '--hide-scrollbars',
-        ];
+function getScraper(search_engine, args) {
+    if (typeof search_engine === 'string') {
+        return new {
+            google: google.GoogleScraper,
+            google_news_old: google.GoogleNewsOldScraper,
+            google_news: google.GoogleNewsScraper,
+            google_image: google.GoogleImageScraper,
+            bing: bing.BingScraper,
+            yandex: yandex.YandexScraper,
+            bing_news: bing.BingNewsScraper,
+            duckduckgo: duckduckgo.DuckduckgoScraper,
+            infospace: infospace.InfospaceScraper,
+            webcrawler: infospace.WebcrawlerNewsScraper,
+        }[search_engine](args);
+    } else if (typeof search_engine === 'function') {
+        return new search_engine(args);
+    } else {
+        throw new Error(`search_engine must either be a string of class (function)`);
+    }
+}

-		let USER_AGENT = '';

-        if (config.user_agent) {
-			USER_AGENT = config.user_agent;
-		}
+class ScrapeManager {

-		if (config.random_user_agent === true) {
-			USER_AGENT = ua.random_user_agent();
-		}
+    constructor(config, context={}) {

-        if (USER_AGENT) {
-			ADDITIONAL_CHROME_FLAGS.push(
-				`--user-agent="${USER_AGENT}"`
-			)
-		}
+        this.cluster = null;
+        this.pluggable = null;
+        this.scraper = null;
+        this.context = context;

-        if (config.proxy) {
-        	// check this out bubbles
-			// https://www.systutorials.com/241062/how-to-set-google-chromes-proxy-settings-in-command-line-on-linux/
-			// [<proxy-scheme>://]<proxy-host>[:<proxy-port>]
-			// "http", "socks", "socks4", "socks5".
-        	ADDITIONAL_CHROME_FLAGS.push(
-				'--proxy-server=' + config.proxy,
-			)
-		}
+        this.config = _.defaults(config, {
+            // the user agent to scrape with
+            user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3835.0 Safari/537.36',
+            // if random_user_agent is set to True, a random user agent is chosen
+            random_user_agent: false,
+            // whether to select manual settings in visible mode
+            set_manual_settings: false,
+            // log ip address data
+            log_ip_address: false,
+            // log http headers
+            log_http_headers: false,
+            // how long to sleep between requests. a random sleep interval within the range [a,b]
+            // is drawn before every request. empty string for no sleeping.
+            sleep_range: null,
+            // which search engine to scrape
+            search_engine: 'google',
+            search_engine_name: 'google',
+            logger: createLogger({
+                level: 'info',
+                format: combine(
+                    timestamp(),
+                    printf(({ level, message, timestamp }) => {
+                        return `${timestamp} [${level}] ${message}`;
+                    })
+                ),
+                transports: [
+                    new transports.Console()
+                ]
+            }),
+            keywords: ['nodejs rocks',],
+            // whether to start the browser in headless mode
+            headless: true,
+            // specify flags passed to chrome here
+            // About our defaults values https://peter.sh/experiments/chromium-command-line-switches/
+            chrome_flags: [
+                '--disable-infobars',
+                '--window-position=0,0',
+                '--ignore-certifcate-errors',
+                '--ignore-certifcate-errors-spki-list',
+                '--no-sandbox',
+                '--disable-setuid-sandbox',
+                '--disable-dev-shm-usage',
+                '--disable-accelerated-2d-canvas',
+                '--disable-gpu',
+                '--window-size=1920,1040',
+                '--start-fullscreen',
+                '--hide-scrollbars',
+                '--disable-notifications',
+            ],
+            // the number of pages to scrape for each keyword
+            num_pages: 1,
+            // path to output file, data will be stored in JSON
+            output_file: '',
+            // whether to also passthru all the html output of the serp pages
+            html_output: false,
+            // whether to strip JS and CSS from the html_output
+            // has only an effect if `html_output` is true
+            clean_html_output: true,
+            // remove all data images from the html
+            clean_data_images: true,
+            // whether to return a screenshot of serp pages as b64 data
+            screen_output: false,
+            // Scrape url from local file. Mainly used for testing.
+            scrape_from_file: '',
+            // whether to prevent images, css, fonts and media from being loaded
+            // will speed up scraping a great deal
+            block_assets: true,
+            // path to js module that extends functionality
+            // this module should export the functions:
+            // get_browser, handle_metadata, close_browser
+            //custom_func: resolve('examples/pluggable.js'),
+            custom_func: null,
+            throw_on_detection: false,
+            // List of proxies to use ['socks5://78.94.172.42:1080', 'http://localhost:1080']
+            proxies: null,
+            // a file with one proxy per line. Example:
+            // socks5://78.94.172.42:1080
+            // http://118.174.233.10:48400
+            proxy_file: '',
+            // whether to use proxies only
+            // when this is set to true, se-scraper will not use
+            // your default IP address
+            use_proxies_only: false,
+            // check if headless chrome escapes common detection techniques
+            // this is a quick test and should be used for debugging
+            test_evasion: false,
+            apply_evasion_techniques: true,
+            // settings for puppeteer-cluster
+            puppeteer_cluster_config: {
+                timeout: 30 * 60 * 1000, // max timeout set to 30 minutes
+                monitor: false,
+                concurrency: Cluster.CONCURRENCY_BROWSER,
+                maxConcurrency: 1,
+            }
+        });

-        let launch_args = {
-			args: ADDITIONAL_CHROME_FLAGS,
-			headless: config.headless,
-			ignoreHTTPSErrors: true,
-		};
+        this.logger = this.config.logger;

-		if (config.debug === true) {
-			console.log("Chrome Args: ", launch_args);
-		}
+        if (config.sleep_range) {
+            // parse an array
+            config.sleep_range = eval(config.sleep_range);

-        if (pluggable.start_browser) {
-			launch_args.config = config;
-			browser = await pluggable.start_browser(launch_args);
-		} else {
-			browser = await puppeteer.launch(launch_args);
-		}
+            if (config.sleep_range.length !== 2 && typeof i[0] !== 'number' && typeof i[1] !== 'number') {
+                throw "sleep_range is not a valid array of two integers.";
+            }
+        }

-		let metadata = {};
+        if (fs.existsSync(this.config.keyword_file)) {
+            this.config.keywords = read_keywords_from_file(this.config.keyword_file);
+        }

-		if (config.log_http_headers === true) {
-			metadata.http_headers = await meta.get_http_headers(browser);
-		}
+        if (this.config.proxies && this.config.proxy_file) {
+            throw new Error('Either use a proxy_file or specify a proxy for all connections. Do not use both options.');
+        }

-		if (config.log_ip_address === true) {
-			metadata.ipinfo = await meta.get_ip_data(browser);
-		}
+        if (this.config.proxy_file) {
+            this.config.proxies = read_keywords_from_file(this.config.proxy_file);
+            this.logger.info(`${this.config.proxies.length} proxies read from file.`);
+        }

-		// check that our proxy is working by confirming
-		// that ipinfo.io sees the proxy IP address
-		if (config.proxy && config.log_ip_address === true) {
-			console.log(`${metadata.ipinfo} vs ${config.proxy}`);
+        if (!this.config.proxies && this.config.use_proxies_only) {
+            throw new Error('Must provide at least one proxy in proxies if you enable use_proxies_only');
+        }

-			try {
-				// if the ip returned by ipinfo is not a substring of our proxystring, get the heck outta here
-				if (!config.proxy.includes(metadata.ipinfo.ip)) {
-					console.error('Proxy not working properly.');
-					await browser.close();
-					return;
-				}
-			} catch (exception) {
+        debug('this.config=%O', this.config);
+    }

-			}
-		}
+    /*
+     * Launches the puppeteer cluster or browser.
+     *
+     * Returns true if the browser was successfully launched. Otherwise will return false.
+     */
+    async start() {

-		var results = {};
+        if (this.config.custom_func) {
+            if (fs.existsSync(this.config.custom_func)) {
+                try {
+                    const PluggableClass = require(this.config.custom_func);
+                    this.pluggable = new PluggableClass({
+                        config: this.config,
+                        context: this.context
+                    });
+                } catch (exception) {
+                    console.error(exception);
+                    return false;
+                }
+            } else {
+                console.error(`File "${this.config.custom_func}" does not exist!`);
+                return false;
+            }
+        }

-		Scraper = {
-			google: google.GoogleScraper,
-			google_news_old: google.GoogleNewsOldScraper,
-			google_news: google.GoogleNewsScraper,
-			google_image: google.GoogleImageScraper,
-			bing: bing.BingScraper,
-			bing_news: bing.BingNewsScraper,
-			duckduckgo: duckduckgo.DuckduckgoScraper,
-			duckduckgo_news: duckduckgo.DuckduckgoNewsScraper,
-			infospace: infospace.InfospaceScraper,
-			webcrawler: infospace.WebcrawlerNewsScraper,
-			baidu: baidu.BaiduScraper,
-			youtube: youtube.YoutubeScraper,
-			yahoo_news: tickersearch.YahooFinanceScraper,
-			reuters: tickersearch.ReutersFinanceScraper,
-			cnbc: tickersearch.CnbcFinanceScraper,
-			marketwatch: tickersearch.MarketwatchFinanceScraper,
-		}[config.search_engine];
+        const chrome_flags = _.clone(this.config.chrome_flags);

-		if (Scraper === undefined) {
-			console.info('Currently not implemented search_engine: ', config.search_engine);
-		} else {
-			scraperObj = new Scraper({
-				browser: browser,
-				config: config,
-				context: context,
-				pluggable: pluggable,
-			});
-			results = await scraperObj.run();
-		}
+        if (this.pluggable && this.pluggable.start_browser) {
+            launch_args.config = this.config;
+            this.browser = await this.pluggable.start_browser({
+                config: this.config,
+            });
+            this.page = await this.browser.newPage();
+        } else {
+            // if no custom start_browser functionality was given
+            // use puppeteer-cluster for scraping

-		if (pluggable.close_browser) {
-			await pluggable.close_browser();
-		} else {
-			await browser.close();
-		}
+            let proxies;
+            // if we have at least one proxy, always use CONCURRENCY_BROWSER
+            // and set maxConcurrency to this.config.proxies.length + 1
+            // else use whatever this.configuration was passed
+            if (this.config.proxies && this.config.proxies.length > 0) {

-		let num_requests = scraperObj.num_requests;
-		let timeDelta = Date.now() - startTime;
-		let ms_per_request = timeDelta/num_requests;
+                // because we use real browsers, we ran out of memory on normal laptops
+                // when using more than maybe 5 or 6 browsers.
+                // therefore hardcode a limit here
+                // TODO not sure this what we want
+                this.numClusters = Math.min(
+                    this.config.proxies.length + (this.config.use_proxies_only ? 0 : 1),
+                    MAX_ALLOWED_BROWSERS
+                );
+                proxies = _.clone(this.config.proxies);

-		if (config.verbose === true) {
-			console.log(`Scraper took ${timeDelta}ms to perform ${num_requests} requests.`);
-			console.log(`On average ms/request: ${ms_per_request}ms/request`);
-			console.dir(results, {depth: null, colors: true});
-		}
+                // Insert a first config without proxy if use_proxy_only is false
+                if (this.config.use_proxies_only === false) {
+                    proxies.unshift(null);
+                }

-		if (config.compress === true) {
-			results = JSON.stringify(results);
-			// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding
-			results = zlib.deflateSync(results).toString('base64');
-		}
+            } else {
+                this.numClusters = this.config.puppeteer_cluster_config.maxConcurrency;
+                proxies = _.times(this.numClusters, null);
+            }

-		if (pluggable.handle_results) {
-			await pluggable.handle_results({
-				config: config,
-				results: results,
-			});
-		}
+            this.logger.info(`Using ${this.numClusters} clusters.`);

-		metadata.id = `${config.job_name} ${config.chunk_lines}`;
-		metadata.chunk_lines = config.chunk_lines;
-		metadata.elapsed_time = timeDelta.toString();
-		metadata.ms_per_keyword = ms_per_request.toString();
-		metadata.num_requests = num_requests;
+            // Give the per browser options
+            const perBrowserOptions = _.map(proxies, (proxy) => {
+                const userAgent = (this.config.random_user_agent) ? (new UserAgent({deviceCategory: 'desktop'})).toString() : this.config.user_agent;
+                let args = chrome_flags.concat([`--user-agent=${userAgent}`]);

-		if (config.verbose === true) {
-			console.log(metadata);
-		}
+                if (proxy) {
+                    args = args.concat([`--proxy-server=${proxy}`]);
+                }

-		if (pluggable.handle_metadata) {
-			await pluggable.handle_metadata({metadata: metadata, config: config});
-		}
+                return {
+                    headless: this.config.headless,
+                    ignoreHTTPSErrors: true,
+                    args
+                };
+            });

-		if (config.output_file) {
-			write_results(config.output_file, JSON.stringify(results));
-		}
+            debug('perBrowserOptions=%O', perBrowserOptions)

-		let response = {
-		  headers: {
-		  	'Content-Type': 'text/json',
-		  },
-		  results: results,
-		  metadata: metadata || {},
-		  statusCode: 200
-		};
+            this.cluster = await Cluster.launch({
+                monitor: this.config.puppeteer_cluster_config.monitor,
+                timeout: this.config.puppeteer_cluster_config.timeout, // max timeout set to 30 minutes
+                concurrency: CustomConcurrencyImpl,
+                maxConcurrency: this.numClusters,
+                puppeteerOptions: {
+                    perBrowserOptions: perBrowserOptions
+                }
+            });
+        }
+    }

-		callback(null, response);
+    /*
+     * Scrapes the keywords specified by the config.
+     */
+    async scrape(scrape_config = {}) {

-	}  catch (e) {
-		callback(e, null);
-	} 
+        if (!scrape_config.keywords && !scrape_config.keyword_file) {
+            throw new Error('Either keywords or keyword_file must be supplied to scrape()');
+        }
+
+        Object.assign(this.config, scrape_config);
+
+        var results = {};
+        var num_requests = 0;
+        var metadata = {};
+        var startTime = Date.now();
+
+        this.config.search_engine_name = typeof this.config.search_engine === 'function' ? this.config.search_engine.name : this.config.search_engine;
+
+        this.logger.info(`scrapes ${this.config.search_engine_name} with ${this.config.keywords.length} keywords on ${this.config.num_pages} pages each.`);
+
+        if (this.pluggable && this.pluggable.start_browser) {
+
+            this.scraper = getScraper(this.config.search_engine, {
+                config: this.config,
+                context: this.context,
+                pluggable: this.pluggable,
+                page: this.page,
+            });
+
+            var {results, metadata, num_requests} = await this.scraper.run(this.page);
+
+        } else {
+            // Each browser will get N/(K+1) keywords and will issue N/(K+1) * M total requests to the search engine.
+            // https://github.com/GoogleChrome/puppeteer/issues/678
+            // The question is: Is it possible to set proxies per Page? Per Browser?
+            // as far as I can see, puppeteer cluster uses the same puppeteerOptions
+            // for every browser instance. We will use our custom puppeteer-cluster version.
+            // https://www.npmjs.com/package/proxy-chain
+            // this answer looks nice: https://github.com/GoogleChrome/puppeteer/issues/678#issuecomment-389096077
+            let chunks = [];
+            for (var n = 0; n < this.numClusters; n++) {
+                chunks.push([]);
+            }
+            for (var k = 0; k < this.config.keywords.length; k++) {
+                chunks[k % this.numClusters].push(this.config.keywords[k]);
+            }
+
+            debug('chunks=%o', chunks);
+
+            let execPromises = [];
+            for (var c = 0; c < chunks.length; c++) {
+                const config = _.clone(this.config);
+                config.keywords = chunks[c];
+
+                var obj = getScraper(this.config.search_engine, {
+                    config: config,
+                    context: {},
+                    pluggable: this.pluggable,
+                });
+
+                var boundMethod = obj.run.bind(obj);
+                execPromises.push(this.cluster.execute({}, boundMethod));
+            }
+
+            let promiseReturns = await Promise.all(execPromises);
+
+            // Merge results and metadata per keyword
+            for (let promiseReturn of promiseReturns) {
+                Object.assign(results, promiseReturn.results);
+                Object.assign(metadata, promiseReturn.metadata);
+                num_requests += promiseReturn.num_requests;
+            }
+        }
+
+        let timeDelta = Date.now() - startTime;
+        let ms_per_request = timeDelta/num_requests;
+
+        this.logger.info(`Scraper took ${timeDelta}ms to perform ${num_requests} requests.`);
+        this.logger.info(`On average ms/request: ${ms_per_request}ms/request`);
+
+        if (this.pluggable && this.pluggable.handle_results) {
+            await this.pluggable.handle_results(results);
+        }
+
+        metadata.elapsed_time = timeDelta.toString();
+        metadata.ms_per_keyword = ms_per_request.toString();
+        metadata.num_requests = num_requests;
+
+        debug('metadata=%O', metadata);
+
+        if (this.pluggable && this.pluggable.handle_metadata) {
+            await this.pluggable.handle_metadata(metadata);
+        }
+
+        if (this.config.output_file) {
+            this.logger.info(`Writing results to ${this.config.output_file}`);
+            write_results(this.config.output_file, JSON.stringify(results, null, 4));
+        }
+
+        return {
+            results: results,
+            metadata: metadata || {},
+        };
+    }
+
+    /*
+     * Quit the puppeteer cluster/browser.
+     */
+    async quit() {
+        if (this.pluggable && this.pluggable.close_browser) {
+            await this.pluggable.close_browser();
+        } else {
+            await this.cluster.idle();
+            await this.cluster.close();
+        }
+    }
+}
+
+module.exports = {
+    ScrapeManager: ScrapeManager,
 };
-
-function parseEventData(config) {
-
-	function _bool(e) {
-		e = String(e);
-		if (typeof e.trim === "function") { 
-		    return e.trim().toLowerCase() == 'true';
-		} else {
-			return e.toLowerCase() == 'true';
-		}
-	}
-
-	if (config.debug) {
-		config.debug = _bool(config.debug);
-	}
-
-	if (config.verbose) {
-		config.verbose = _bool(config.verbose);
-	}
-
-	if (config.upload_to_s3) {
-		config.upload_to_s3 = _bool(config.upload_to_s3);
-	}
-
-	if (config.log_ip_address) {
-		config.log_ip_address = _bool(config.log_ip_address);
-	}
-
-	if (config.log_http_headers) {
-		config.log_http_headers = _bool(config.log_http_headers);
-	}
-
-	if (config.random_user_agent) {
-		config.random_user_agent = _bool(config.random_user_agent);
-	}
-
-	if (config.compress) {
-		config.compress = _bool(config.compress);
-	}
-
-	if (config.is_local) {
-		config.is_local = _bool(config.is_local);
-	}
-
-	if (config.max_results) {
-		config.max_results = parseInt(config.max_results);
-	}
-
-	if (config.set_manual_settings) {
-		config.set_manual_settings = _bool(config.set_manual_settings);
-	}
-
-	if (config.block_assets) {
-		config.block_assets = _bool(config.block_assets);
-	}
-
-	if (config.sleep_range) {
-		// parse an array
-		config.sleep_range = eval(config.sleep_range);
-
-		if (config.sleep_range.length !== 2 && typeof i[0] !== 'number' && typeof i[1] !== 'number') {
-            throw "sleep_range is not a valid array of two integers.";
-		}
-	}
-
-	return config;
-}
--- a/test/html_output.js
+++ b/test/html_output.js
@ -0,0 +1,101 @@
+'use strict';
+const express = require('express');
+const { createLogger, transports } = require('winston');
+const http = require('http');
+const https = require('https');
+const assert = require('assert');
+const path = require('path');
+const keyCert = require('key-cert');
+const Promise = require('bluebird');
+const Proxy = require('http-mitm-proxy');
+
+const debug = require('debug')('se-scraper:test');
+const se_scraper = require('../');
+
+const httpPort = 3012;
+const httpsPort = httpPort + 1;
+const proxyPort = httpPort + 2;
+
+const fakeSearchEngine = express();
+fakeSearchEngine.get('/search', (req, res) => {
+    debug('q=%s', req.query.q);
+    const pageNumber = ((req.query.start/10) || 0)  + 1;
+    res.sendFile(path.join(__dirname, 'mocks/google/' + req.query.q + '_page' + pageNumber + '.html'));
+});
+fakeSearchEngine.use(express.static('test/mocks/google', {extensions: ['html']}));
+
+describe('Config', function(){
+
+    let httpServer, httpsServer, proxy;
+    before(async function(){
+        // Here mount our fake engine in both http and https listen server
+        httpServer = http.createServer(fakeSearchEngine);
+        httpsServer = https.createServer(await keyCert(), fakeSearchEngine);
+       
+        proxy = Proxy();
+        proxy.onRequest((ctx, callback) => {
+            ctx.proxyToServerRequestOptions.host = 'localhost';
+            ctx.proxyToServerRequestOptions.port = (ctx.isSSL) ? httpsPort : httpPort;
+            ctx.proxyToServerRequestOptions.headers['X-Forwarded-Host'] = 'ProxiedThroughFakeEngine';
+            debug('Proxy request to %s', ctx.clientToProxyRequest.headers.host);
+            return callback();
+        });
+
+        await Promise.promisify(proxy.listen, {context: proxy})({port: proxyPort});
+        await Promise.promisify(httpServer.listen, {context: httpServer})(httpPort);
+        await Promise.promisify(httpsServer.listen, {context: httpsServer})(httpsPort);
+        debug('Fake http search engine servers started');
+    });
+
+    after(function(){
+        httpsServer.close();
+        httpServer.close();
+        proxy.close();
+    });
+
+    describe('html_output', function(){
+
+        const testLogger = createLogger({
+            transports: [
+                new transports.Console({
+                    level: 'error'
+                })
+            ]
+        });
+
+        /**
+         * Test html_output option
+         */
+        it('html_output single page single keyword', async function () {
+
+            const scrape_job = {
+                search_engine: 'google',
+                /* TODO refactor start_url
+                google_settings: {
+                    start_url: 'http://localhost:' + httpPort
+                },
+                */
+                keywords: ['test keyword'],
+            };
+
+            var scraper = new se_scraper.ScrapeManager({
+                throw_on_detection: true,
+                logger: testLogger,
+                html_output: true,
+                //clean_html_output: false,
+                //clean_data_images: false,
+                // TODO refactor start_url so we can use-it instead of depending of the proxy for this test
+                proxies: ['http://localhost:' + proxyPort],
+                use_proxies_only: true,
+            });
+            await scraper.start();
+            const { results } = await scraper.scrape(scrape_job);
+            await scraper.quit();
+
+            assert(results['test keyword']['1'].html.length > 1000, 'Html of google page 1 should be provided');
+            
+        });
+
+    });
+
+});
--- a/test/mocks/bing/index.html
+++ b/test/mocks/bing/index.html
--- a/test/mocks/bing/test
+++ b/test/mocks/bing/test
--- a/test/mocks/bing/test
+++ b/test/mocks/bing/test
--- a/test/mocks/bing/test
+++ b/test/mocks/bing/test
--- a/test/mocks/duckduckgo/index.html
+++ b/test/mocks/duckduckgo/index.html
@ -0,0 +1,148 @@
+<!DOCTYPE html>
+<!--[if IEMobile 7 ]> <html lang="en_US" class="no-js iem7"> <![endif]-->
+<!--[if lt IE 7]> <html class="ie6 lt-ie10 lt-ie9 lt-ie8 lt-ie7 no-js" lang="en_US"> <![endif]-->
+<!--[if IE 7]>    <html class="ie7 lt-ie10 lt-ie9 lt-ie8 no-js" lang="en_US"> <![endif]-->
+<!--[if IE 8]>    <html class="ie8 lt-ie10 lt-ie9 no-js" lang="en_US"> <![endif]-->
+<!--[if IE 9]>    <html class="ie9 lt-ie10 no-js" lang="en_US"> <![endif]-->
+<!--[if (gte IE 9)|(gt IEMobile 7)|!(IEMobile)|!(IE)]><!--><html class="no-js" lang="en_US"><!--<![endif]-->
+
+<head>
+	<meta http-equiv="X-UA-Compatible" content="IE=Edge" />
+<meta http-equiv="content-type" content="text/html; charset=UTF-8;charset=utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=1" />
+<meta name="HandheldFriendly" content="true"/>
+
+<link rel="canonical" href="https://duckduckgo.com/">
+
+<link rel="stylesheet" href="/s1847.css" type="text/css">
+
+<link rel="stylesheet" href="/o1847.css" type="text/css">
+
+
+
+<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon"/>
+<link rel="apple-touch-icon" href="/assets/icons/meta/DDG-iOS-icon_60x60.png"/>
+<link rel="apple-touch-icon" sizes="76x76" href="/assets/icons/meta/DDG-iOS-icon_76x76.png"/>
+<link rel="apple-touch-icon" sizes="120x120" href="/assets/icons/meta/DDG-iOS-icon_120x120.png"/>
+<link rel="apple-touch-icon" sizes="152x152" href="/assets/icons/meta/DDG-iOS-icon_152x152.png"/>
+<link rel="image_src" href="/assets/icons/meta/DDG-icon_256x256.png"/>
+<link rel="manifest" href="/manifest.json"/>
+
+<meta name="twitter:card" content="summary">
+<meta name="twitter:site" value="@duckduckgo">
+
+<meta property="og:url" content="https://duckduckgo.com/" />
+<meta property="og:site_name" content="DuckDuckGo" />
+<meta property="og:image" content="https://duckduckgo.com/assets/logo_social-media.png">
+
+
+	<title>DuckDuckGo — Privacy, simplified.</title>
+<meta property="og:title" content="DuckDuckGo — Privacy, simplified." />
+
+
+<meta property="og:description" content="The Internet privacy company that empowers you to seamlessly take control of your personal information online, without any tradeoffs.">
+<meta name="description" content="The Internet privacy company that empowers you to seamlessly take control of your personal information online, without any tradeoffs.">
+
+
+</head>
+<body id="pg-index" class="page-index body--home">
+	<script type="text/javascript">
+var settings_js_version = "/s2475.js",
+    locale = "en_US";
+</script>
+<script type="text/javascript" src="/lib/l113.js"></script>
+<script type="text/javascript" src="/locale/en_US/duckduckgo14.js"></script>
+<script type="text/javascript" src="/util/u418.js"></script>
+<script type="text/javascript" src="/d2727.js"></script>
+
+
+
+<script type="text/javascript">
+    DDG.page = new DDG.Pages.Home();
+</script>
+
+
+
+	<div class="site-wrapper  site-wrapper--home  js-site-wrapper">
+	
+		
+			<div class="header-wrap--home  js-header-wrap">
+	<div class="header--aside js-header-aside"></div>
+	<div class="js-header-home-search header-wrap--home__search">
+				<div class="logo-wrap--home">
+			<a id="logo_homepage_link" class="logo_homepage" href="/about">
+				About DuckDuckGo
+				<span class="logo_homepage__tt">Duck it!</span>
+			</a>
+		</div>
+
+		<form id="search_form_homepage_top" class="search  search--home  js-search-form-top" name="x" method="POST" action="/html">
+			<input class="search__input  js-search-input" type="text" autocomplete="off" name="q" tabindex="1" value="">
+			<input class="search__button  js-search-button" type="submit" tabindex="2" value="S" />
+			<input class="search__clear  empty  js-search-clear" type="button" tabindex="3" value="X" />
+			<div class="search__hidden  js-search-hidden"></div>
+		</form>
+	</div>
+</div>
+			<div id="" class="content-wrap--home">
+				<div id="content_homepage" class="content--home">
+					<div class="cw--c">
+								<div class="logo-wrap--home">
+			<a id="logo_homepage_link" class="logo_homepage" href="/about">
+				About DuckDuckGo
+				<span class="logo_homepage__tt">Duck it!</span>
+			</a>
+		</div>
+
+						<div class="search-wrap--home">
+									<form id="search_form_homepage" class="search  search--home  js-search-form" name="x" method="POST" action="/html">
+			<input id="search_form_input_homepage" class="search__input  js-search-input" type="text" autocomplete="off" name="q" tabindex="1" value="">
+			<input id="search_button_homepage" class="search__button  js-search-button" type="submit" tabindex="2" value="S" />
+			<input id="search_form_input_clear" class="search__clear  empty  js-search-clear" type="button" tabindex="3" value="X" />
+			<div id="search_elements_hidden" class="search__hidden  js-search-hidden"></div>
+		</form>
+
+						</div>
+		
+	
+
+						<!-- en_US All Settings -->
+<noscript>
+    <div class="tag-home">
+        <div class="tag-home__wrapper">
+            <div class="tag-home__item">
+                The search engine that doesn't track you.
+                <span class="hide--screen-xs"><a href="/about" class="tag-home__link">Learn More</a>.</span>
+            </div>
+        </div>
+    </div>
+</noscript>
+<div class="tag-home  tag-home--slide  no-js__hide  js-tag-home"></div>
+        <div id="error_homepage"></div>
+
+
+	
+		
+					</div> <!-- cw -->
+				</div> <!-- content_homepage //-->
+			</div> <!-- content_wrapper_homepage //-->
+			<div id="footer_homepage" class="foot-home  js-foot-home"></div>
+
+<script type="text/javascript">
+	{function seterr(str) {
+		var error=document.getElementById('error_homepage');
+		error.innerHTML=str;
+		$(error).css('display','block');
+	}
+	var err=new RegExp('[\?\&]e=([^\&]+)');var errm=new Array();errm['2']='no search';errm['3']='search too long';errm['4']='not UTF\u002d8 encoding';errm['6']='too many search terms';if (err.test(window.location.href)) seterr('Oops, '+(errm[RegExp.$1]?errm[RegExp.$1]:'there was an error.')+' &nbsp;Please try again');};
+	
+	if (kurl) {
+	  document.getElementById("logo_homepage_link").href += (document.getElementById("logo_homepage_link").href.indexOf('?')==-1 ? '?t=i' : '') + kurl;
+	}
+</script>
+
+		
+	
+	</div> <!-- site-wrapper -->
+</body>
+</html>
--- a/test/mocks/duckduckgo/test
+++ b/test/mocks/duckduckgo/test
--- a/test/mocks/duckduckgo/test
+++ b/test/mocks/duckduckgo/test
--- a/test/mocks/duckduckgo/test
+++ b/test/mocks/duckduckgo/test
--- a/test/mocks/google/index.html
+++ b/test/mocks/google/index.html
--- a/test/mocks/google/test
+++ b/test/mocks/google/test
--- a/test/mocks/google/test
+++ b/test/mocks/google/test
--- a/test/mocks/google/test
+++ b/test/mocks/google/test
--- a/test/modules/bing.js
+++ b/test/modules/bing.js
@ -0,0 +1,123 @@
+'use strict';
+const express = require('express');
+const puppeteer = require('puppeteer');
+const { createLogger, transports } = require('winston');
+const http = require('http');
+const https = require('https');
+const assert = require('assert');
+const path = require('path');
+const keyCert = require('key-cert');
+const Promise = require('bluebird');
+const Proxy = require('http-mitm-proxy');
+
+const debug = require('debug')('se-scraper:test');
+const { BingScraper } = require('../../src/modules/bing');
+
+const httpPort = 3012;
+const httpsPort = httpPort + 1;
+const proxyPort = httpPort + 2;
+
+const fakeSearchEngine = express();
+fakeSearchEngine.get('/search', (req, res, next) => {
+    debug('q=%s', req.query.q);
+    const pageNumber = Math.round((req.query.first || 0) /10) + 1;
+    res.sendFile(path.join(__dirname, '../mocks/bing/' + req.query.q + '_page' + pageNumber + '.html'));
+});
+fakeSearchEngine.use(express.static('test/mocks/bing', {extensions: ['html']}));
+
+describe('Module Bing', function(){
+
+    let httpServer, httpsServer, proxy;
+    before(async function(){
+        // Here mount our fake engine in both http and https listen server
+        httpServer = http.createServer(fakeSearchEngine);
+        httpsServer = https.createServer(await keyCert(), fakeSearchEngine);
+        
+        proxy = Proxy();
+        proxy.onRequest((ctx, callback) => {
+            ctx.proxyToServerRequestOptions.host = 'localhost';
+            ctx.proxyToServerRequestOptions.port = (ctx.isSSL) ? httpsPort : httpPort;
+            ctx.proxyToServerRequestOptions.headers['X-Forwarded-Host'] = 'ProxiedThroughFakeEngine';
+            debug('connection proxied askedHost=%s toPort=%s', ctx.clientToProxyRequest.headers.host, ctx.proxyToServerRequestOptions.port);
+            return callback();
+        });
+
+        await Promise.promisify(proxy.listen, { context: proxy })({ port: proxyPort });
+        await Promise.promisify(httpServer.listen, {context: httpServer})(httpPort);
+        await Promise.promisify(httpsServer.listen, {context: httpsServer})(httpsPort);
+        debug('Fake http search engine servers started');
+    });
+
+    after(function(){
+        proxy.close();
+        httpsServer.close();
+        httpServer.close();
+    });
+
+    let browser;
+    let page;
+    beforeEach(async function(){
+        debug('Start a new browser');
+        browser = await puppeteer.launch({
+            //dumpio: true,
+            //headless: false,
+            ignoreHTTPSErrors: true,
+            args: [ '--proxy-server=http://localhost:' + proxyPort ]
+        });
+        debug('Open a fresh page');
+        page = await browser.newPage();
+    });
+
+    afterEach(async function(){
+        await browser.close();
+    });
+
+    const testLogger = createLogger({
+        transports: [
+            new transports.Console({
+                level: 'error'
+            })
+        ]
+    });
+
+    it('one keyword one page', function(){
+        const bingScraper = new BingScraper({
+            config: {
+                search_engine_name: 'bing',
+                throw_on_detection: true,
+                keywords: ['test keyword'],
+                logger: testLogger,
+                scrape_from_file: '',
+            }
+        });
+        bingScraper.STANDARD_TIMEOUT = 500;
+        return bingScraper.run({page}).then(({results, metadata, num_requests}) => {
+            assert.strictEqual(num_requests, 1, 'Must do one request');
+            assert.strictEqual(results['test keyword']['1'].results.length, 6, 'Must have 6 organic results parsed');
+        });
+    });
+
+    it('one keyword 3 pages', function () {
+        const bingScraper = new BingScraper({
+            config: {
+                search_engine_name: 'bing',
+                throw_on_detection: true,
+                keywords: ['test keyword'],
+                logger: testLogger,
+                scrape_from_file: '',
+                num_pages: 3,
+            }
+        });
+        bingScraper.STANDARD_TIMEOUT = 500;
+        return bingScraper.run({page}).then(({results, metadata, num_requests}) => {
+            assert.strictEqual(num_requests, 3, 'Must three requests');
+            assert.strictEqual(results['test keyword']['1'].results.length, 6, 'Must have 6 organic results parsed on page 1');
+            assert.strictEqual(results['test keyword']['1'].results[0].title, 'Keyword Tests | TestComplete Documentation', 'Title not matching on first organic result page 1');
+            assert.strictEqual(results['test keyword']['2'].results.length, 10, 'Must have 10 organic results parsed on page 2');
+            assert.strictEqual(results['test keyword']['2'].results[0].title, 'Keywords - TestLink', 'Title not matching on first organic result page 2');
+            assert.strictEqual(results['test keyword']['3'].results.length, 10, 'Must have 10 organic results parsed on page 3');
+            assert.strictEqual(results['test keyword']['3'].results[0].title, 'Keyword Driven Testing | TestComplete', 'Title not matching on first organic result page 3');
+        });
+    });
+
+});
--- a/test/modules/duckduckgo.js
+++ b/test/modules/duckduckgo.js
@ -0,0 +1,140 @@
+'use strict';
+const express = require('express');
+const puppeteer = require('puppeteer');
+const { createLogger, transports } = require('winston');
+const http = require('http');
+const https = require('https');
+const assert = require('assert');
+const path = require('path');
+const keyCert = require('key-cert');
+const Promise = require('bluebird');
+const Proxy = require('http-mitm-proxy');
+
+const debug = require('debug')('se-scraper:test');
+const { DuckduckgoScraper } = require('../../src/modules/duckduckgo');
+
+const httpPort = 3012;
+const httpsPort = httpPort + 1;
+const proxyPort = httpPort + 2;
+
+const fakeSearchEngine = express();
+fakeSearchEngine.use(express.urlencoded({ extended: true }))
+fakeSearchEngine.get('/', (req, res, next) => {
+    if(!req.query.q){
+        return next();
+    }
+    debug('q=%s page=%d', req.query.q, req.query.page);
+    const pageNumber = req.query.page;
+    res.sendFile(path.join(__dirname, '../mocks/duckduckgo/' + req.query.q + '_page' + pageNumber + '.html'));
+});
+fakeSearchEngine.post('/html', (req, res) => {
+    debug('body=%o', req.body);
+    const pageNumber = 1;
+    res.sendFile(path.join(__dirname, '../mocks/duckduckgo/' + req.body.q + '_page' + pageNumber + '.html'));
+});
+fakeSearchEngine.use(express.static('test/mocks/duckduckgo', {extensions: ['html']}));
+
+describe('Module DuckDuckGo', function(){
+
+    let httpServer, httpsServer, proxy;
+    before(async function(){
+        // Here mount our fake engine in both http and https listen server
+        httpServer = http.createServer(fakeSearchEngine);
+        httpsServer = https.createServer(await keyCert(), fakeSearchEngine);
+        
+        proxy = Proxy();
+        proxy.onRequest((ctx, callback) => {
+            ctx.proxyToServerRequestOptions.host = 'localhost';
+            ctx.proxyToServerRequestOptions.port = (ctx.isSSL) ? httpsPort : httpPort;
+            ctx.proxyToServerRequestOptions.headers['X-Forwarded-Host'] = 'ProxiedThroughFakeEngine';
+            debug('proxy askedHost=%s method=%s url=%s toPort=%s',
+                ctx.clientToProxyRequest.headers.host,
+                ctx.clientToProxyRequest.method,
+                ctx.clientToProxyRequest.url,
+                ctx.proxyToServerRequestOptions.port
+            );
+            return callback();
+        });
+
+        await Promise.promisify(proxy.listen, { context: proxy })({ port: proxyPort });
+        await Promise.promisify(httpServer.listen, {context: httpServer})(httpPort);
+        await Promise.promisify(httpsServer.listen, {context: httpsServer})(httpsPort);
+        debug('Fake http search engine servers started');
+    });
+
+    after(function(){
+        proxy.close();
+        httpsServer.close();
+        httpServer.close();
+    });
+
+    let browser;
+    let page;
+    beforeEach(async function(){
+        debug('Start a new browser');
+        browser = await puppeteer.launch({
+            //dumpio: true,
+            //headless: false,
+            ignoreHTTPSErrors: true,
+            args: [ '--proxy-server=http://localhost:' + proxyPort ]
+        });
+        debug('Open a fresh page');
+        page = await browser.newPage();
+    });
+
+    afterEach(async function(){
+        await browser.close();
+    });
+
+    const testLogger = createLogger({
+        transports: [
+            new transports.Console({
+                level: 'error'
+            })
+        ]
+    });
+
+    it('one keyword one page', function(){
+        const duckduckgoScraper = new DuckduckgoScraper({
+            config: {
+                search_engine_name: 'duckduckgo',
+                throw_on_detection: true,
+                keywords: ['test keyword'],
+                logger: testLogger,
+                scrape_from_file: '',
+            }
+        });
+        duckduckgoScraper.STANDARD_TIMEOUT = 1000;
+        return duckduckgoScraper.run({page}).then(({results, metadata, num_requests}) => {
+            assert.strictEqual(num_requests, 1, 'Must do one request');
+            assert.strictEqual(results['test keyword']['1'].results.length, 10, 'Must have 10 organic results parsed');
+        });
+    });
+
+    it('one keyword 3 pages', function () {
+        this.timeout(4000);
+        const duckduckgoScraper = new DuckduckgoScraper({
+            config: {
+                search_engine_name: 'google',
+                throw_on_detection: true,
+                keywords: ['test keyword'],
+                logger: testLogger,
+                scrape_from_file: '',
+                num_pages: 3,
+            }
+        });
+        duckduckgoScraper.STANDARD_TIMEOUT = 1000;
+        return duckduckgoScraper.run({page}).then(({results, metadata, num_requests}) => {
+            assert.strictEqual(num_requests, 3, 'Must three requests');
+            assert.strictEqual(results['test keyword']['1'].results.length, 10, 'Must have 10 organic results parsed on page 1');
+            assert.strictEqual(results['test keyword']['1'].results[0].title, 'Keyword Tests | TestComplete Documentation', 'Title not matching on first organic result page 1');
+            debug('results page 1 %O',results['test keyword']['1'].results);
+            debug('results page 2 %O', results['test keyword']['2'].results);
+            assert.strictEqual(results['test keyword']['2'].results.length, 19, 'Must have 19 organic results parsed on page 2');
+            assert.strictEqual(results['test keyword']['2'].results[0].title, 'Quest Diagnostics: Test Directory', 'Title not matching on first organic result page 1');
+            assert.strictEqual(results['test keyword']['3'].results.length, 48, 'Must have 48 organic results parsed on page 3');
+            assert.strictEqual(results['test keyword']['3'].results[0].title, 'Java Keywords Quiz - Sporcle', 'Title not matching on first organic result page 1');
+        });
+    });
+
+});
--- a/test/modules/google.js
+++ b/test/modules/google.js
@ -0,0 +1,123 @@
+'use strict';
+const express = require('express');
+const puppeteer = require('puppeteer');
+const { createLogger, transports } = require('winston');
+const http = require('http');
+const https = require('https');
+const assert = require('assert');
+const path = require('path');
+const keyCert = require('key-cert');
+const Promise = require('bluebird');
+const Proxy = require('http-mitm-proxy');
+
+const debug = require('debug')('se-scraper:test');
+const { GoogleScraper } = require('../../src/modules/google');
+
+const httpPort = 3012;
+const httpsPort = httpPort + 1;
+const proxyPort = httpPort + 2;
+
+const fakeSearchEngine = express();
+fakeSearchEngine.get('/search', (req, res) => {
+    debug('q=%s', req.query.q);
+    const pageNumber = ((req.query.start/10) || 0)  + 1;
+    res.sendFile(path.join(__dirname, '../mocks/google/' + req.query.q + '_page' + pageNumber + '.html'));
+});
+fakeSearchEngine.use(express.static('test/mocks/google', {extensions: ['html']}));
+
+describe('Module Google', function(){
+
+    let httpServer, httpsServer, proxy;
+    before(async function(){
+        // Here mount our fake engine in both http and https listen server
+        httpServer = http.createServer(fakeSearchEngine);
+        httpsServer = https.createServer(await keyCert(), fakeSearchEngine);
+        
+        proxy = Proxy();
+        proxy.onRequest((ctx, callback) => {
+            ctx.proxyToServerRequestOptions.host = 'localhost';
+            ctx.proxyToServerRequestOptions.port = (ctx.isSSL) ? httpsPort : httpPort;
+            ctx.proxyToServerRequestOptions.headers['X-Forwarded-Host'] = 'ProxiedThroughFakeEngine';
+            debug('connection proxied askedHost=%s toPort=%s', ctx.clientToProxyRequest.headers.host, ctx.proxyToServerRequestOptions.port);
+            return callback();
+        });
+
+        await Promise.promisify(proxy.listen, { context: proxy })({ port: proxyPort });
+        await Promise.promisify(httpServer.listen, {context: httpServer})(httpPort);
+        await Promise.promisify(httpsServer.listen, {context: httpsServer})(httpsPort);
+        debug('Fake http search engine servers started');
+    });
+
+    after(function(){
+        proxy.close();
+        httpsServer.close();
+        httpServer.close();
+    });
+
+    let browser;
+    let page;
+    beforeEach(async function(){
+        debug('Start a new browser');
+        browser = await puppeteer.launch({
+            //dumpio: true,
+            //headless: false,
+            ignoreHTTPSErrors: true,
+            args: [ '--proxy-server=http://localhost:' + proxyPort ]
+        });
+        debug('Open a fresh page');
+        page = await browser.newPage();
+    });
+
+    afterEach(async function(){
+        await browser.close();
+    });
+
+    const testLogger = createLogger({
+        transports: [
+            new transports.Console({
+                level: 'error'
+            })
+        ]
+    });
+
+    it('one keyword one page', function(){
+        const googleScraper = new GoogleScraper({
+            config: {
+                search_engine_name: 'google',
+                throw_on_detection: true,
+                keywords: ['test keyword'],
+                logger: testLogger,
+                scrape_from_file: '',
+            }
+        });
+        googleScraper.STANDARD_TIMEOUT = 500;
+        return googleScraper.run({page}).then(({results, metadata, num_requests}) => {
+            assert.strictEqual(num_requests, 1, 'Must do one request');
+            assert.strictEqual(results['test keyword']['1'].results.length, 10, 'Must have 10 organic results parsed');
+        });
+    });
+
+    it('one keyword 3 pages', function () {
+        const googleScraper = new GoogleScraper({
+            config: {
+                search_engine_name: 'google',
+                throw_on_detection: true,
+                keywords: ['test keyword'],
+                logger: testLogger,
+                scrape_from_file: '',
+                num_pages: 3,
+            }
+        });
+        googleScraper.STANDARD_TIMEOUT = 500;
+        return googleScraper.run({page}).then(({results, metadata, num_requests}) => {
+            assert.strictEqual(num_requests, 3, 'Must three requests');
+            assert.strictEqual(results['test keyword']['1'].results.length, 10, 'Must have 10 organic results parsed on page 1');
+            assert.strictEqual(results['test keyword']['1'].results[0].title, 'Keyword Tool (FREE) ᐈ #1 Google Keyword Planner Alternative', 'Title not matching on first organic result page 1');
+            assert.strictEqual(results['test keyword']['2'].results.length, 10, 'Must have 10 organic results parsed on page 2');
+            assert.strictEqual(results['test keyword']['2'].results[0].title, 'Keyword Research | The Beginner\'s Guide to SEO - Moz', 'Title not matching on first organic result page 1');
+            assert.strictEqual(results['test keyword']['3'].results.length, 10, 'Must have 10 organic results parsed on page 3');
+            assert.strictEqual(results['test keyword']['3'].results[0].title, 'The ACT Keyword Study Plan — NerdCoach', 'Title not matching on first organic result page 1');
+        });
+    });
+
+});
--- a/test/proxy.js
+++ b/test/proxy.js
@ -0,0 +1,161 @@
+'use strict';
+const express = require('express');
+const { createLogger, transports } = require('winston');
+const http = require('http');
+const https = require('https');
+const assert = require('assert');
+const keyCert = require('key-cert');
+const Promise = require('bluebird');
+const Proxy = require('http-mitm-proxy');
+
+const debug = require('debug')('se-scraper:test');
+const se_scraper = require('../');
+const Scraper = require('../src/modules/se_scraper');
+
+const httpPort = 3012;
+const httpsPort = httpPort + 1;
+const proxyPort = httpPort + 2;
+
+const fakeSearchEngine = express();
+fakeSearchEngine.set('trust proxy', 'loopback');
+fakeSearchEngine.get('/test-proxy', (req, res) => {
+    debug('fake-search-engine req.hostname=%s', req.hostname);
+    //debug('req to', req.socket.localAddress, req.socket.localPort);
+    res.send(req.hostname);
+});
+
+describe('Config', function(){
+
+    let httpServer, httpsServer, proxy;
+    before(async function(){
+        // Here mount our fake engine in both http and https listen server
+        httpServer = http.createServer(fakeSearchEngine);
+        httpsServer = https.createServer(await keyCert(), fakeSearchEngine);
+       
+        proxy = Proxy();
+        proxy.onRequest((ctx, callback) => {
+            ctx.proxyToServerRequestOptions.host = 'localhost';
+            ctx.proxyToServerRequestOptions.port = (ctx.isSSL) ? httpsPort : httpPort;
+            ctx.proxyToServerRequestOptions.headers['X-Forwarded-Host'] = 'ProxiedThroughFakeEngine';
+            debug('Proxy request to %s', ctx.clientToProxyRequest.headers.host);
+            return callback();
+        });
+
+        await Promise.promisify(proxy.listen, {context: proxy})({port: proxyPort});
+        await Promise.promisify(httpServer.listen, {context: httpServer})(httpPort);
+        await Promise.promisify(httpsServer.listen, {context: httpsServer})(httpsPort);
+        debug('Fake http search engine servers started');
+    });
+
+    after(function(){
+        httpsServer.close();
+        httpServer.close();
+        proxy.close();
+    });
+
+    describe('proxies', function(){
+
+        class MockScraperTestProxy extends Scraper {
+
+            async load_start_page(){
+                return true;
+            }
+            
+            async search_keyword(){
+                await this.page.goto('http://test.local:' + httpPort + '/test-proxy');
+            }
+
+            async parse_async(){
+                const bodyHandle = await this.page.$('body');
+                return await this.page.evaluate(body => body.innerHTML, bodyHandle);
+            }
+        }
+
+        const testLogger = createLogger({
+            transports: [
+                new transports.Console({
+                    level: 'error'
+                })
+            ]
+        });
+
+        /**
+         * Jobs will be executed 2 by 2 through the proxy and direct connection
+         * THIS TEST NEED TO HAVE test.local 127.0.0.1 in /etc/hosts because chrome bypass localhost even with proxy set
+         */
+        it('one proxy given, use_proxies_only=false', async function () {
+
+            const scrape_job = {
+                search_engine: MockScraperTestProxy,
+                keywords: ['news', 'some stuff', 'i work too much', 'what to do?', 'javascript is hard'],
+            };
+
+            var scraper = new se_scraper.ScrapeManager({
+                throw_on_detection: true,
+                proxies: ['http://localhost:' + proxyPort],
+                // default is use_proxies_only: false,
+                logger: testLogger,
+            });
+            await scraper.start();
+
+            const { results } = await scraper.scrape(scrape_job);
+            assert.strictEqual(results['news']['1'], 'test.local');
+            assert.strictEqual(results['some stuff']['1'], 'ProxiedThroughFakeEngine');
+            assert.strictEqual(results['i work too much']['1'], 'test.local');
+            assert.strictEqual(results['what to do?']['1'], 'ProxiedThroughFakeEngine');
+            assert.strictEqual(results['javascript is hard']['1'], 'test.local');
+
+            await scraper.quit();
+        });
+
+        /**
+         * Jobs will be executed 1 by 1 through the proxy
+         */
+        it('one proxy given, use_proxies_only=true', async function () {
+
+            const scrape_job = {
+                search_engine: MockScraperTestProxy,
+                keywords: ['news', 'some stuff', 'i work too much', 'what to do?', 'javascript is hard'],
+            };
+
+            var scraper = new se_scraper.ScrapeManager({
+                throw_on_detection: true,
+                proxies: ['http://localhost:' + proxyPort],
+                use_proxies_only: true,
+                logger: testLogger,
+            });
+            await scraper.start();
+
+            const { results } = await scraper.scrape(scrape_job);
+            assert.strictEqual(results['news']['1'], 'ProxiedThroughFakeEngine');
+            assert.strictEqual(results['some stuff']['1'], 'ProxiedThroughFakeEngine');
+            assert.strictEqual(results['i work too much']['1'], 'ProxiedThroughFakeEngine');
+            assert.strictEqual(results['what to do?']['1'], 'ProxiedThroughFakeEngine');
+            assert.strictEqual(results['javascript is hard']['1'], 'ProxiedThroughFakeEngine');
+
+            await scraper.quit();
+        });
+
+        it('zero proxy given, use_proxies_only=true', async function () {
+
+            const scrape_job = {
+                search_engine: MockScraperTestProxy,
+                keywords: ['news', 'some stuff', 'i work too much', 'what to do?', 'javascript is hard'],
+            };
+
+            await assert.rejects(async () => {
+                var scraper = new se_scraper.ScrapeManager({
+                    throw_on_detection: true,
+                    use_proxies_only: true,
+                    logger: testLogger,
+                });
+                await scraper.start();
+                const { results } = await scraper.scrape(scrape_job);
+                await scraper.quit();
+            }, /Must provide at least one proxy in proxies if you enable use_proxies_only/);
+
+        });
+
+    });
+
+});
--- a/test/test_bing.js
+++ b/test/test_bing.js
@ -1,203 +0,0 @@
-const se_scraper =  require('./../index.js');
-var assert = require('chai').assert;
-
-/*
- * Use chai and mocha for tests.
- * https://mochajs.org/#installation
- */
-
-const normal_search_keywords = ['apple tree', 'weather tomorrow'];
-
-async function normal_search_test() {
-    let config = {
-        search_engine: 'bing',
-        compress: false,
-        debug: false,
-        verbose: false,
-        keywords: normal_search_keywords,
-        keyword_file: '',
-        num_pages: 3,
-        headless: true,
-        output_file: '',
-        block_assets: true,
-        user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
-        random_user_agent: false,
-    };
-
-    console.log('normal_search_test()');
-    await se_scraper.scrape(config, normal_search_test_case);
-}
-
-// we test with a callback function to our handler
-function normal_search_test_case(err, response) {
-
-    if (err) {
-        console.error(err);
-    } else {
-        assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
-        assert.equal(response.statusCode, 200, 'status code must be 200');
-        assert.equal(response.metadata.num_requests, 6);
-
-        for (let query in response.results) {
-            let total_rank = 1;
-            assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.');
-
-            for (let page_number in response.results[query]) {
-
-                assert.isNumber(parseInt(page_number), 'page_number must be numeric');
-
-                let obj = response.results[query][page_number];
-
-                assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
-
-                assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
-                assert.equal(obj.no_results, false, 'no results should be false');
-                assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
-                assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
-                assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
-
-                for (let res of obj.results) {
-
-                    assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link', 'rank'], 'not all keys are in the SERP object');
-
-                    assert.isOk(res.link, 'link must be ok');
-                    assert.typeOf(res.link, 'string', 'link must be string');
-                    assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
-
-                    assert.isOk(res.visible_link, 'visible_link must be ok');
-                    assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
-                    assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
-
-                    assert.isOk(res.title, 'title must be ok');
-                    assert.typeOf(res.title, 'string', 'title must be string');
-                    assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars');
-
-                    assert.isOk(res.snippet, 'snippet must be ok');
-                    assert.typeOf(res.snippet, 'string', 'snippet must be string');
-                    assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
-
-                    assert.isNumber(res.rank, 'rank must be integer');
-                    assert.equal(res.rank, total_rank++, 'rank ist wrong');
-                }
-            }
-        }
-    }
-}
-
-const keywords_no_results = ['2342kljp;fj9834u40abJAkasdlfkjsladfkjasfdas;lk3453-934023safkl34a44dsflkjaQQuBBdfk',];
-
-async function no_results_test() {
-    let config = {
-        search_engine: 'bing',
-        compress: false,
-        debug: false,
-        verbose: false,
-        keywords: keywords_no_results,
-        keyword_file: '',
-        num_pages: 1,
-        headless: true,
-        output_file: '',
-        block_assets: true,
-        user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
-        random_user_agent: false,
-    };
-    console.log('no_results_test()');
-    await se_scraper.scrape(config, test_case_no_results);
-}
-
-// we test with a callback function to our handler
-function test_case_no_results(err, response) {
-    if (err) {
-        console.error(err);
-    } else {
-        assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
-        assert.equal(response.statusCode, 200, 'status code must be 200');
-        assert.equal(response.metadata.num_requests, 1);
-
-        results = response.results;
-        for (let query in response.results) {
-
-            assert.containsAllKeys(response.results, keywords_no_results, 'not all keywords were scraped.');
-
-            for (let page_number in response.results[query]) {
-
-                assert.isNumber(parseInt(page_number), 'page_number must be numeric');
-
-                let obj = response.results[query][page_number];
-
-                assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
-
-                assert(obj.results.length === 0, 'results must have 0 SERP objects');
-                assert.equal(obj.no_results, true, 'no results should be true');
-                assert.isEmpty(obj.num_results, 'no results should be a empty string');
-                assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
-            }
-        }
-    }
-}
-
-const effective_query_keywords = ['mount everrest'];
-
-async function effective_query_test() {
-    let config = {
-        search_engine: 'bing',
-        compress: false,
-        debug: false,
-        verbose: false,
-        keywords: effective_query_keywords,
-        keyword_file: '',
-        num_pages: 1,
-        headless: true,
-        output_file: '',
-        block_assets: true,
-        user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
-        random_user_agent: false,
-    };
-    console.log('effective_query_test()');
-    await se_scraper.scrape(config, test_case_effective_query);
-}
-
-// we test with a callback function to our handler
-function test_case_effective_query(err, response) {
-
-    if (err) {
-        console.error(err);
-    } else {
-
-        assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
-        assert.equal(response.statusCode, 200, 'status code must be 200');
-        assert.equal(response.metadata.num_requests, 1);
-
-        results = response.results;
-        for (let query in response.results) {
-
-            assert.containsAllKeys(response.results, effective_query_keywords, 'not all keywords were scraped.');
-
-            for (let page_number in response.results[query]) {
-
-                assert.isNumber(parseInt(page_number), 'page_number must be numeric');
-
-                let obj = response.results[query][page_number];
-
-                assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
-
-                // effective query must be different to the original keyword
-                assert.isOk(obj.effective_query, 'effective query must be ok');
-                assert.isNotEmpty(obj.effective_query, 'effective query must be valid');
-                assert(obj.effective_query !== query, 'effective query must be different from keyword');
-
-                assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
-                assert.equal(obj.no_results, false, 'no results should be false');
-                assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
-                assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
-                assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
-            }
-        }
-    }
-}
-
-(async () => {
-    await normal_search_test();
-    await no_results_test();
-    await effective_query_test();
-})();
--- a/test/test_duckduckgo.js
+++ b/test/test_duckduckgo.js
@ -1,145 +0,0 @@
-const se_scraper =  require('./../index.js');
-var assert = require('chai').assert;
-
-/*
- * Use chai and mocha for tests.
- * https://mochajs.org/#installation
- */
-
-const normal_search_keywords = ['apple tree', 'weather tomorrow'];
-
-async function normal_search_test() {
-    let config = {
-        search_engine: 'duckduckgo',
-        compress: false,
-        debug: false,
-        verbose: false,
-        keywords: normal_search_keywords,
-        keyword_file: '',
-        num_pages: 2,
-        headless: false,
-        output_file: '',
-        block_assets: true,
-        user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
-        random_user_agent: false,
-    };
-
-    console.log('normal_search_test()');
-    await se_scraper.scrape(config, normal_search_test_case);
-}
-
-// we test with a callback function to our handler
-function normal_search_test_case(err, response) {
-
-    if (err) {
-        console.error(err);
-    } else {
-        assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
-        assert.equal(response.statusCode, 200, 'status code must be 200');
-        assert.equal(response.metadata.num_requests, 4);
-
-        for (let query in response.results) {
-            let total_rank = 1;
-
-            assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.');
-
-            for (let page_number in response.results[query]) {
-
-                assert.isNumber(parseInt(page_number), 'page_number must be numeric');
-
-                let obj = response.results[query][page_number];
-
-                assert.containsAllKeys(obj, ['results', 'time', 'effective_query'], 'not all keys are in the object');
-
-                assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
-                assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
-
-                for (let res of obj.results) {
-
-                    assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link', 'snippet'], 'not all keys are in the SERP object');
-
-                    assert.isOk(res.link, 'link must be ok');
-                    assert.typeOf(res.link, 'string', 'link must be string');
-                    assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
-
-                    assert.isOk(res.visible_link, 'visible_link must be ok');
-                    assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
-                    assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
-
-                    assert.isOk(res.title, 'title must be ok');
-                    assert.typeOf(res.title, 'string', 'title must be string');
-                    assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars');
-
-                    assert.isOk(res.snippet, 'snippet must be ok');
-                    assert.typeOf(res.snippet, 'string', 'snippet must be string');
-                    assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
-
-                    assert.isNumber(res.rank, 'rank must be integer');
-                    assert.equal(res.rank, total_rank++, 'rank ist wrong');
-                }
-            }
-        }
-    }
-}
-
-const effective_query_keywords = ['mount everrest'];
-
-async function effective_query_test() {
-    let config = {
-        search_engine: 'duckduckgo',
-        compress: false,
-        debug: false,
-        verbose: false,
-        keywords: effective_query_keywords,
-        keyword_file: '',
-        num_pages: 1,
-        headless: true,
-        output_file: '',
-        block_assets: true,
-        user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
-        random_user_agent: false,
-    };
-    console.log('effective_query_test()');
-    await se_scraper.scrape(config, test_case_effective_query);
-}
-
-// we test with a callback function to our handler
-function test_case_effective_query(err, response) {
-
-    if (err) {
-        console.error(err);
-    } else {
-
-        assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
-        assert.equal(response.statusCode, 200, 'status code must be 200');
-        assert.equal(response.metadata.num_requests, 1);
-
-        results = response.results;
-        for (let query in response.results) {
-
-            assert.containsAllKeys(response.results, effective_query_keywords, 'not all keywords were scraped.');
-
-            for (let page_number in response.results[query]) {
-
-                assert.isNumber(parseInt(page_number), 'page_number must be numeric');
-
-                let obj = response.results[query][page_number];
-
-                assert.containsAllKeys(obj, ['results', 'time', 'effective_query'], 'not all keys are in the object');
-
-                // effective query must be different to the original keyword
-                assert.isOk(obj.effective_query, 'effective query must be ok');
-                assert.isNotEmpty(obj.effective_query, 'effective query must be valid');
-                assert(obj.effective_query !== query, 'effective query must be different from keyword');
-
-                assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
-                assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
-            }
-        }
-    }
-}
-
-(async () => {
-    await normal_search_test();
-    await effective_query_test();
-})();
--- a/test/test_google.js
+++ b/test/test_google.js
@ -1,204 +0,0 @@
-const se_scraper =  require('./../index.js');
-var assert = require('chai').assert;
-
-/*
- * Use chai and mocha for tests.
- * https://mochajs.org/#installation
- */
-
-const normal_search_keywords = ['apple tree', 'weather tomorrow'];
-
-async function normal_search_test() {
-    let config = {
-        search_engine: 'google',
-        compress: false,
-        debug: false,
-        verbose: false,
-        keywords: normal_search_keywords,
-        keyword_file: '',
-        num_pages: 3,
-        headless: true,
-        output_file: '',
-        block_assets: true,
-        user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
-        random_user_agent: false,
-    };
-
-    console.log('normal_search_test()');
-    await se_scraper.scrape(config, normal_search_test_case);
-}
-
-// we test with a callback function to our handler
-function normal_search_test_case(err, response) {
-
-    if (err) {
-        console.error(err);
-    } else {
-        assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
-        assert.equal(response.statusCode, 200, 'status code must be 200');
-        assert.equal(response.metadata.num_requests, 6);
-
-        for (let query in response.results) {
-            let total_rank = 1;
-
-            assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.');
-
-            for (let page_number in response.results[query]) {
-
-                assert.isNumber(parseInt(page_number), 'page_number must be numeric');
-
-                let obj = response.results[query][page_number];
-
-                assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
-
-                assert.isAtLeast(obj.results.length, 8, 'results must have at least 8 SERP objects');
-                assert.equal(obj.no_results, false, 'no results should be false');
-                assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
-                assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
-                assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
-
-                for (let res of obj.results) {
-
-                    assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link'], 'not all keys are in the SERP object');
-
-                    assert.isOk(res.link, 'link must be ok');
-                    assert.typeOf(res.link, 'string', 'link must be string');
-                    assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
-
-                    assert.isOk(res.visible_link, 'visible_link must be ok');
-                    assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
-                    assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
-
-                    assert.isOk(res.title, 'title must be ok');
-                    assert.typeOf(res.title, 'string', 'title must be string');
-                    assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
-
-                    assert.isOk(res.snippet, 'snippet must be ok');
-                    assert.typeOf(res.snippet, 'string', 'snippet must be string');
-                    assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
-
-                    assert.isNumber(res.rank, 'rank must be integer');
-                    assert.equal(res.rank, total_rank++, 'rank ist wrong');
-                }
-            }
-        }
-    }
-}
-
-const keywords_no_results = ['fgskl34440abJAksafkl34a44dsflkjaQQuBBdfk',];
-
-async function no_results_test() {
-    let config = {
-        search_engine: 'google',
-        compress: false,
-        debug: false,
-        verbose: false,
-        keywords: keywords_no_results,
-        keyword_file: '',
-        num_pages: 1,
-        headless: true,
-        output_file: '',
-        block_assets: true,
-        user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
-        random_user_agent: false,
-    };
-    console.log('no_results_test()');
-    await se_scraper.scrape(config, test_case_no_results);
-}
-
-// we test with a callback function to our handler
-function test_case_no_results(err, response) {
-    if (err) {
-        console.error(err);
-    } else {
-        assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
-        assert.equal(response.statusCode, 200, 'status code must be 200');
-        assert.equal(response.metadata.num_requests, 1);
-
-        results = response.results;
-        for (let query in response.results) {
-
-            assert.containsAllKeys(response.results, keywords_no_results, 'not all keywords were scraped.');
-
-            for (let page_number in response.results[query]) {
-
-                assert.isNumber(parseInt(page_number), 'page_number must be numeric');
-
-                let obj = response.results[query][page_number];
-
-                assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
-
-                assert(obj.results.length === 0, 'results must have 0 SERP objects');
-                assert.equal(obj.no_results, true, 'no results should be true');
-                assert.isEmpty(obj.num_results, 'no results should be a empty string');
-                assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
-            }
-        }
-    }
-}
-
-const effective_query_keywords = ['mount evverrest'];
-
-async function effective_query_test() {
-    let config = {
-        search_engine: 'google',
-        compress: false,
-        debug: false,
-        verbose: false,
-        keywords: effective_query_keywords,
-        keyword_file: '',
-        num_pages: 1,
-        headless: true,
-        output_file: '',
-        block_assets: true,
-        user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
-        random_user_agent: false,
-    };
-    console.log('effective_query_test()');
-    await se_scraper.scrape(config, test_case_effective_query);
-}
-
-// we test with a callback function to our handler
-function test_case_effective_query(err, response) {
-
-    if (err) {
-        console.error(err);
-    } else {
-
-        assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
-        assert.equal(response.statusCode, 200, 'status code must be 200');
-        assert.equal(response.metadata.num_requests, 1);
-
-        results = response.results;
-        for (let query in response.results) {
-
-            assert.containsAllKeys(response.results, effective_query_keywords, 'not all keywords were scraped.');
-
-            for (let page_number in response.results[query]) {
-
-                assert.isNumber(parseInt(page_number), 'page_number must be numeric');
-
-                let obj = response.results[query][page_number];
-
-                assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
-
-                // effective query must be different to the original keyword
-                assert.isOk(obj.effective_query, 'effective query must be ok');
-                assert.isNotEmpty(obj.effective_query, 'effective query must be valid');
-                assert(obj.effective_query !== query, 'effective query must be different from keyword');
-
-                assert.isAtLeast(obj.results.length, 8, 'results must have at least 8 SERP objects');
-                assert.equal(obj.no_results, false, 'no results should be false');
-                assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
-                assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
-                assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
-            }
-        }
-    }
-}
-
-(async () => {
-    await normal_search_test();
-    await no_results_test();
-    await effective_query_test();
-})();
--- a/test/test_googleimage.js
+++ b/test/test_googleimage.js
@ -1,85 +0,0 @@
-const se_scraper =  require('./../index.js');
-var assert = require('chai').assert;
-
-/*
- * Use chai and mocha for tests.
- * https://mochajs.org/#installation
- */
-
-const normal_search_keywords = ['apple', 'rain'];
-
-async function normal_image_search_test() {
-    let config = {
-        search_engine: 'google_image',
-        compress: false,
-        debug: false,
-        verbose: false,
-        keywords: normal_search_keywords,
-        keyword_file: '',
-        num_pages: 2,
-        headless: true,
-        output_file: '',
-        block_assets: true,
-        user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
-        random_user_agent: false,
-    };
-
-    console.log('normal_image_search_test()');
-    await se_scraper.scrape(config, normal_image_search_test_case);
-}
-
-// we test with a callback function to our handler
-function normal_image_search_test_case(err, response) {
-
-    if (err) {
-        console.error(err);
-    } else {
-        assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
-        assert.equal(response.statusCode, 200, 'status code must be 200');
-        assert.equal(response.metadata.num_requests, 2);
-
-        for (let query in response.results) {
-
-            let total_rank = 1;
-
-            assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.');
-
-            for (let page_number in response.results[query]) {
-
-                assert.isNumber(parseInt(page_number), 'page_number must be numeric');
-
-                let obj = response.results[query][page_number];
-
-                assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'effective_query'], 'not all keys are in the object');
-
-                assert.isAtLeast(obj.results.length, 15, 'results must have at least 15 SERP objects');
-                assert.equal(obj.no_results, false, 'no results should be false');
-                assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
-
-                for (let res of obj.results) {
-
-                    assert.containsAllKeys(res, ['link', 'snippet', 'rank', 'clean_link'], 'not all keys are in the SERP object');
-
-                    assert.isOk(res.link, 'link must be ok');
-                    assert.typeOf(res.link, 'string', 'link must be string');
-                    assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
-
-                    assert.isOk(res.clean_link, 'clean_link must be ok');
-                    assert.typeOf(res.clean_link, 'string', 'clean_link must be string');
-                    assert.isAtLeast(res.clean_link.length, 5, 'clean_link must have at least 5 chars');
-
-                    assert.isOk(res.snippet, 'snippet must be ok');
-                    assert.typeOf(res.snippet, 'string', 'snippet must be string');
-                    assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
-
-                    assert.isNumber(res.rank, 'rank must be integer');
-                    assert.equal(res.rank, total_rank++, 'rank ist wrong');
-                }
-            }
-        }
-    }
-}
-
-(async () => {
-    await normal_image_search_test();
-})();
--- a/test/test_ticker_search.js
+++ b/test/test_ticker_search.js
@ -1,221 +0,0 @@
-const se_scraper =  require('./../index.js');
-var assert = require('chai').assert;
-
-/*
- * Use chai and mocha for tests.
- * https://mochajs.org/#installation
- */
-
-const quote_search_keywords = ['MSFT', 'AAPL'];
-
-async function reuters_search_test() {
-    let config = {
-        search_engine: 'reuters',
-        compress: false,
-        debug: false,
-        verbose: false,
-        keywords: quote_search_keywords,
-        keyword_file: '',
-        num_pages: 1,
-        headless: true,
-        output_file: '',
-        block_assets: true,
-        user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
-        random_user_agent: false,
-    };
-
-    console.log('reuters_search_test()');
-    await se_scraper.scrape(config, reuters_search_test_case);
-}
-
-// we test with a callback function to our handler
-function reuters_search_test_case(err, response) {
-
-    if (err) {
-        console.error(err);
-    } else {
-        assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
-        assert.equal(response.statusCode, 200, 'status code must be 200');
-
-        for (let query in response.results) {
-            let total_rank = 1;
-            assert.containsAllKeys(response.results, quote_search_keywords, 'not all keywords were scraped.');
-
-            for (let page_number in response.results[query]) {
-
-                assert.isNumber(parseInt(page_number), 'page_number must be numeric');
-
-                let obj = response.results[query][page_number];
-
-                assert.containsAllKeys(obj, ['results', 'time'], 'not all keys are in the object');
-
-                assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
-                assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
-
-                for (let res of obj.results) {
-
-                    assert.containsAllKeys(res, ['link', 'title', 'date', 'snippet'], 'not all keys are in the SERP object');
-
-                    assert.isOk(res.link, 'link must be ok');
-                    assert.typeOf(res.link, 'string', 'link must be string');
-                    assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
-
-                    assert.isOk(res.title, 'title must be ok');
-                    assert.typeOf(res.title, 'string', 'title must be string');
-                    assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars');
-
-                    assert.isOk(res.snippet, 'snippet must be ok');
-                    assert.typeOf(res.snippet, 'string', 'snippet must be string');
-                    assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
-
-                    assert.isOk(res.date, 'date must be ok');
-                    assert.typeOf(res.date, 'string', 'date must be string');
-                    assert.isAtLeast(res.date.length, 5, 'date must have at least 5 chars');
-                }
-            }
-        }
-    }
-}
-
-async function cnbc_search_test() {
-    let config = {
-        search_engine: 'cnbc',
-        compress: false,
-        debug: false,
-        verbose: false,
-        keywords: quote_search_keywords,
-        keyword_file: '',
-        num_pages: 1,
-        headless: true,
-        output_file: '',
-        block_assets: true,
-        user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
-        random_user_agent: false,
-    };
-
-    console.log('cnbc_search_test()');
-    await se_scraper.scrape(config, cnbc_search_test_case);
-}
-
-// we test with a callback function to our handler
-function cnbc_search_test_case(err, response) {
-
-    if (err) {
-        console.error(err);
-    } else {
-        assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
-        assert.equal(response.statusCode, 200, 'status code must be 200');
-
-        for (let query in response.results) {
-            let total_rank = 1;
-            assert.containsAllKeys(response.results, quote_search_keywords, 'not all keywords were scraped.');
-
-            for (let page_number in response.results[query]) {
-
-                assert.isNumber(parseInt(page_number), 'page_number must be numeric');
-
-                let obj = response.results[query][page_number];
-
-                assert.containsAllKeys(obj, ['results', 'time'], 'not all keys are in the object');
-
-                assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
-                assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
-
-                for (let res of obj.results) {
-
-                    assert.containsAllKeys(res, ['link', 'title', 'date'], 'not all keys are in the SERP object');
-
-                    assert.isOk(res.link, 'link must be ok');
-                    assert.typeOf(res.link, 'string', 'link must be string');
-                    assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
-
-                    assert.isOk(res.title, 'title must be ok');
-                    assert.typeOf(res.title, 'string', 'title must be string');
-                    assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars');
-
-                    assert.isOk(res.date, 'date must be ok');
-                    assert.typeOf(res.date, 'string', 'date must be string');
-                    assert.isAtLeast(res.date.length, 5, 'date must have at least 5 chars');
-                }
-            }
-        }
-    }
-}
-
-const marketwatch_search_keywords = ['MSFT'];
-
-async function marketwatch_search_test() {
-    let config = {
-        search_engine: 'marketwatch',
-        compress: false,
-        debug: false,
-        verbose: false,
-        keywords: marketwatch_search_keywords,
-        keyword_file: '',
-        num_pages: 1,
-        headless: true,
-        output_file: '',
-        block_assets: true,
-        user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
-        random_user_agent: false,
-    };
-
-    console.log('marketwatch_search_test()');
-    await se_scraper.scrape(config, marketwatch_search_test_case);
-}
-
-// we test with a callback function to our handler
-function marketwatch_search_test_case(err, response) {
-
-    if (err) {
-        console.error(err);
-    } else {
-        assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
-        assert.equal(response.statusCode, 200, 'status code must be 200');
-
-        for (let query in response.results) {
-            let total_rank = 1;
-            assert.containsAllKeys(response.results, marketwatch_search_keywords, 'not all keywords were scraped.');
-
-            for (let page_number in response.results[query]) {
-
-                assert.isNumber(parseInt(page_number), 'page_number must be numeric');
-
-                let obj = response.results[query][page_number];
-
-                assert.containsAllKeys(obj, ['results', 'time'], 'not all keys are in the object');
-
-                assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
-                assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
-
-                for (let res of obj.results) {
-
-                    assert.containsAllKeys(res, ['link', 'title', 'date', 'author'], 'not all keys are in the SERP object');
-
-                    assert.isOk(res.link, 'link must be ok');
-                    assert.typeOf(res.link, 'string', 'link must be string');
-                    assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
-
-                    assert.isOk(res.title, 'title must be ok');
-                    assert.typeOf(res.title, 'string', 'title must be string');
-                    assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars');
-
-                    assert.isOk(res.author, 'author must be ok');
-                    assert.typeOf(res.author, 'string', 'author must be string');
-                    assert.isAtLeast(res.author.length, 5, 'author must have at least 5 chars');
-
-                    assert.isOk(res.date, 'date must be ok');
-                    assert.typeOf(res.date, 'string', 'date must be string');
-                    assert.isAtLeast(res.date.length, 5, 'date must have at least 5 chars');
-                }
-            }
-        }
-    }
-}
-
-
-(async () => {
-    await reuters_search_test();
-    await cnbc_search_test();
-    await marketwatch_search_test();
-})();
--- a/test/user_agent.js
+++ b/test/user_agent.js
@ -0,0 +1,144 @@
+'use strict';
+const express = require('express');
+const { createLogger, transports } = require('winston');
+const http = require('http');
+const https = require('https');
+const assert = require('assert');
+const keyCert = require('key-cert');
+const Promise = require('bluebird');
+const Proxy = require('http-mitm-proxy');
+const UAParser = require('ua-parser-js');
+const _ = require('lodash');
+
+const debug = require('debug')('se-scraper:test');
+const se_scraper = require('../');
+const Scraper = require('../src/modules/se_scraper');
+
+const httpPort = 3012;
+const httpsPort = httpPort + 1;
+const proxyPort = httpPort + 2;
+
+const fakeSearchEngine = express();
+fakeSearchEngine.set('trust proxy', 'loopback');
+fakeSearchEngine.get('/test-user_agent', (req, res) => {
+    debug('fake-search-engine req.headers.user-agent=%s', req.headers['user-agent']);
+    res.send(req.headers['user-agent']);
+});
+
+describe('Config', function(){
+
+    let httpServer, httpsServer, proxy;
+    before(async function(){
+        // Here mount our fake engine in both http and https listen server
+        httpServer = http.createServer(fakeSearchEngine);
+        httpsServer = https.createServer(await keyCert(), fakeSearchEngine);
+       
+        proxy = Proxy();
+        proxy.onRequest((ctx, callback) => {
+            ctx.proxyToServerRequestOptions.host = 'localhost';
+            ctx.proxyToServerRequestOptions.port = (ctx.isSSL) ? httpsPort : httpPort;
+            ctx.proxyToServerRequestOptions.headers['X-Forwarded-Host'] = 'ProxiedThroughFakeEngine';
+            debug('Proxy request to %s', ctx.clientToProxyRequest.headers.host);
+            return callback();
+        });
+
+        await Promise.promisify(proxy.listen, {context: proxy})({port: proxyPort});
+        await Promise.promisify(httpServer.listen, {context: httpServer})(httpPort);
+        await Promise.promisify(httpsServer.listen, {context: httpsServer})(httpsPort);
+        debug('Fake http search engine servers started');
+    });
+
+    after(function(){
+        httpsServer.close();
+        httpServer.close();
+        proxy.close();
+    });
+
+    describe('user_agent', function(){
+
+        class MockScraperTestUserAgent extends Scraper {
+
+            async load_start_page(){
+                return true;
+            }
+            
+            async search_keyword(){
+                await this.page.goto('http://localhost:' + httpPort + '/test-user_agent');
+            }
+
+            async parse_async(){
+                const bodyHandle = await this.page.$('body');
+                return await this.page.evaluate(body => body.innerHTML, bodyHandle);
+            }
+        }
+
+        const testLogger = createLogger({
+            transports: [
+                new transports.Console({
+                    level: 'error'
+                })
+            ]
+        });
+
+        /**
+         * Test user_agent option
+         */
+        it('fixed user_agent', async function () {
+
+            const scrape_job = {
+                search_engine: MockScraperTestUserAgent,
+                keywords: ['javascript is hard'],
+            };
+
+            var scraper = new se_scraper.ScrapeManager({
+                throw_on_detection: true,
+                logger: testLogger,
+                user_agent: 'THIS IS A USERAGENT 42.0'
+            });
+            await scraper.start();
+
+            const { results } = await scraper.scrape(scrape_job);
+            assert.strictEqual(results['javascript is hard']['1'], 'THIS IS A USERAGENT 42.0');
+
+            await scraper.quit();
+        });
+
+        /**
+         * Test random_user_agent option
+         * TODO generated user_agent should be different for each keyword
+         * TODO this test will sometimes fail because user_agent not very random :-(
+         */
+        it('random_user_agent', async function () {
+
+            const scrape_job = {
+                search_engine: MockScraperTestUserAgent,
+                keywords: ['news'],
+            };
+
+            const NUMBER_OF_EXEC = 10;
+
+            const uaList = await Promise.map(_.range(NUMBER_OF_EXEC), async (i) => {
+                const scraper = new se_scraper.ScrapeManager({
+                    throw_on_detection: true,
+                    logger: testLogger,
+                    random_user_agent: true,
+                });
+                await scraper.start();
+                const { results: { news } } = await scraper.scrape(scrape_job);
+                await scraper.quit();
+                return news['1'];
+            });
+            
+            uaList.forEach((userAgent) => {
+                const uaParsed = UAParser(userAgent);
+                assert(uaParsed.browser.name, 'UserAgent should have a browser name detected');
+                assert(uaParsed.os.name, 'UserAgent should have a os name detected');
+            });
+
+            assert( _.chain(uaList).countBy().toPairs().sortBy(e => e[1]).last().value()[1] < (NUMBER_OF_EXEC * 0.4), 'Each user agent should appear less than 40% of the time' );
+            
+        });
+
+    });
+
+});
Author	SHA1	Message	Date
Nikolai Tschacher	5a0eea201d	Merge branch 'master' of github.com:NikolaiT/se-scraper branchy	2020-05-17 22:06:57 +02:00
Nikolai Tschacher	0278b24f0d	ad	2020-05-17 22:06:33 +02:00
Nikolai Tschacher	33fa371716	Merge pull request #62 from aularon/patch-1 Take screenshot before modifying HTML	2020-02-13 20:04:20 +01:00
Nikolai Tschacher	6b806dedfe	Merge pull request #61 from Monibrand/refactor/use-original-puppeteer-cluster Refactor/use original puppeteer cluster	2020-02-13 20:03:39 +01:00
Nikolai Tschacher	5633b10e50	Merge pull request #60 from Monibrand/fix/unusable-proxy-file-option fix(scrape-manager): proxy_file options can't be used with proxies default value	2020-02-13 20:02:55 +01:00
HugoPoi	c58d4fa74d	fix(proxy): throw on use_proxies_only if no proxies given	2020-01-17 15:55:17 +01:00
HugoPoi	4f467abf1e	fix(scrape-manager): keywords propagated through a clone config for not being re-affected	2020-01-17 15:12:00 +01:00
HugoPoi	89dc5c3ebb	fix(scrape-manager): conflict between proxies and user_agent option	2020-01-17 12:07:12 +01:00
HugoPoi	4b33ef9b19	fix(duckduckgo): extract correct amount of results, handle pagination	2020-01-15 16:35:16 +01:00
HugoPoi	28332528ea	test(duckduckgo): implement tests for duckduckgo module	2020-01-15 16:33:30 +01:00
HugoPoi	b685fb4def	test: working test for html_output	2020-01-10 09:51:54 +01:00
HugoPoi	394b567db6	test: add user_agent tests, add html_output tests	2020-01-10 09:35:24 +01:00
HugoPoi	cac6b87e92	test: Bing tests working, refactor proxy for tests	2020-01-08 14:40:28 +01:00
HugoPoi	1c1db88545	test: add config proxy options tests	2020-01-07 16:50:09 +01:00
HugoPoi	8f6317cea7	style: add debug trace on some file	2020-01-07 16:47:09 +01:00
HugoPoi	f192e4ebb4	test: remove legacy tests	2020-01-07 16:43:17 +01:00
HugoPoi	3ab8e46126	test: add bing module test	2020-01-07 09:48:46 +01:00
HugoPoi	392c43390e	test(google): add real integration/unit tests for google module	2020-01-03 19:21:34 +01:00
aularon	77c1bb8372	Take screenshot before modifying HTML Otherwise the screenshot will be very messed up	2020-01-03 11:12:40 +02:00
HugoPoi	8f40057534	refactor(cluster): use custom concurrency for puppeteer-cluster	2019-12-20 19:44:59 +01:00
HugoPoi	301695cd2b	fix(scrape-manager): proxy_file options can be used with proxies default value	2019-12-20 19:35:23 +01:00
Nikolai Tschacher	d362e4ae2c	Merge pull request #59 from TDenoncin/refactor/logging Refactor logging	2019-12-20 14:42:09 +01:00
HugoPoi	bcd181111b	refactor(log): remove common.js, use winston and debug	2019-12-15 17:56:22 +01:00
HugoPoi	b4a86fcc51	refactor(proxy): remove proxy option not working replace by proxies	2019-12-13 18:02:22 +01:00
Nikolai Tschacher	9e6a555663	Merge pull request #52 from kujaomega/master Added post install script to build the puppeteer-cluster, and also ad…	2019-12-01 22:15:39 +01:00
David Solé	ca9f5f7f50	Added post install script to build the puppeteer-cluster, and also added the updated dependencies from puppeteer-cluster	2019-11-22 00:37:29 +01:00
Nikolai Tschacher	1694ee92d0	updated to puppeteeer 2.0	2019-11-08 16:21:16 +01:00
Nikolai Tschacher	da69913272	added detected status to metadata	2019-10-06 15:34:18 +02:00
Nikolai Tschacher	4a3a0e6fd4	better pluggable api	2019-10-05 19:39:33 +02:00
Nikolai Tschacher	4953d9da7a	chaned version	2019-09-23 23:39:06 +02:00
Nikolai Tschacher	5e47c27c70	too late to find a proper commit description	2019-09-23 23:38:38 +02:00
Nikolai Tschacher	95a5ee56d8	remove cheerio from parsing	2019-09-23 21:57:13 +02:00
Nikolai Tschacher	52a2ec7b33	changed README	2019-09-23 16:50:57 +02:00
Nikolai Tschacher	07f3dceba1	fixed google SERP title, better docker support	2019-09-23 16:46:22 +02:00
Nikolai Tschacher	b25f7a4285	added test to my working tree	2019-09-13 18:28:19 +02:00
Nikolai Tschacher	4b581bd03f	removed static tests because they are too larege	2019-09-13 18:21:17 +02:00
Nikolai Tschacher	21378dab02	removed some search engines, added tests for existing, added yandex search engines	2019-09-13 16:15:33 +02:00
Nikolai Tschacher	77d6c4f04a	removed some stuff	2019-09-12 10:43:57 +02:00
Nikolai Tschacher	b513bb0f5b	Merge branch 'master' of github.com:NikolaiT/se-scraper server in dockerfile was changed	2019-09-04 12:28:05 +02:00
Nikolai Tschacher	855a874f9e	some minor changes	2019-09-04 12:27:53 +02:00
Nikolai Tschacher	dde1711d9d	Merge pull request #45 from slotix/master add process supervisor for starting server.js	2019-08-29 20:41:42 +02:00
slotix	7ba7ee9226	add process supervisor for starting server.js	2019-08-19 14:01:37 +02:00
Nikolai Tschacher	e661241f6f	added some parsing to google	2019-08-16 20:10:40 +02:00
Nikolai Tschacher	98414259fe	docker support added	2019-08-13 17:35:06 +02:00
Nikolai Tschacher	19a172c654	better tests	2019-08-13 15:28:30 +02:00
Nikolai Tschacher	0f7e89c272	added little bug in cleaning	2019-08-12 17:16:37 +02:00
Nikolai Tschacher	ca941cee45	added static bing test, added html cleaning when exporting html	2019-08-12 16:05:17 +02:00
Nikolai Tschacher	4c77aeba76	Merge pull request #42 from TDenoncin/error-management Clean integration tests with mocha	2019-08-12 00:04:40 +02:00
Nikolai Tschacher	0427d9f915	Merge branch 'master' into error-management	2019-08-12 00:04:27 +02:00
Nikolai Tschacher	87fcdd35d5	readme in static tests	2019-08-12 00:01:02 +02:00
Nikolai Tschacher	4ca50ab2b9	added new static test case that runs much faster and tests a lot of behavior	2019-08-11 23:58:10 +02:00
Nikolai Tschacher	8e629f6266	Merge pull request #41 from victor9000/master Fix broken Google News selectors, fixes #40	2019-08-08 21:57:14 +02:00
HugoPoi	a369bd07f9	Add "use strict" to ensure quality code control	2019-08-06 12:18:51 +02:00
HugoPoi	dde2b14fc0	Remove uneeded try catch block in Google Search module	2019-08-06 11:50:08 +02:00
HugoPoi	0db6e068da	Remove uneeded try catch block Add proper error for ip matching test	2019-08-06 11:46:53 +02:00
HugoPoi	50bda275a6	Clean integration tests for mocha	2019-08-05 17:01:48 +02:00
Victor	a61fade2c9	Fix broken Google News selectors, fixes #40	2019-08-04 14:43:02 -07:00
Nikolai Tschacher	78fe12390b	better user agents now, added option to include screenshots as base64 in results	2019-07-18 20:19:15 +02:00
Nikolai Tschacher	fcbe66b56b	using random user agents now from https://github.com/intoli/user-agents	2019-07-18 19:34:09 +02:00
Nikolai Tschacher	59154694f2	fixed issue https://github.com/NikolaiT/se-scraper/issues/37	2019-07-18 19:14:33 +02:00
Nikolai Tschacher	60a9d52924	add fucking google product information	2019-07-11 19:23:40 +02:00
Nikolai Tschacher	1fc7f0d1c8	fixed a badboy	2019-07-11 16:54:32 +02:00
Nikolai Tschacher	baaff5824e	...	2019-07-11 16:43:41 +02:00
Nikolai Tschacher	dab25f9068	added google shopping results	2019-07-11 16:42:01 +02:00
Nikolai Tschacher	a413cb54ef	parsing ads works for duckduckgo, google, bing. tested.	2019-07-07 19:38:28 +02:00
Nikolai Tschacher	bbebe3ce60	parsing ads is supported now for google, bing and duckduckgo	2019-07-06 21:42:13 +02:00
Nikolai Tschacher	09c1255400	removed some superflous stuff	2019-07-02 18:04:01 +02:00
Nikolai Tschacher	5e8ff1cb34	Merge branch 'master' of https://github.com/NikolaiT/se-scraper	2019-06-29 17:01:25 +02:00
Nikolai Tschacher	c1a036e8da	removed some stuff	2019-06-29 17:00:50 +02:00
Nikolai Tschacher	d1e9b21269	added google maps scraper	2019-06-29 17:00:19 +02:00
Nikolai Tschacher	593f3a95e5	Merge pull request #33 from TDenoncin/add-html-output-rework Add html output option	2019-06-26 15:38:38 +02:00
HugoPoi	d9ac9f4162	Add test for html_output, refactor the results return	2019-06-26 12:03:42 +02:00
Thomas	a0e63aa4b0	Use bing_setting.bing_domain if defined for startUrl	2019-06-25 17:16:17 +02:00
Thomas	a3ebe357a4	Add html_output fonctionality Pagination support for html output Change return value to keep it compliant to the current version of se-scrapper	2019-06-25 17:02:34 +02:00
Nikolai Tschacher	0d7f6dcd11	worked on issue #31	2019-06-18 22:23:52 +02:00
Nikolai Tschacher	80d23a9d57	users may pass their own user agents, different browsers have random user agents and not the same now	2019-06-17 21:25:45 +02:00
Nikolai Tschacher	ebe9ba8ea9	added option to throw on detection	2019-06-17 15:02:44 +02:00
Nikolai Tschacher	caa93df3b0	random user agent fixed	2019-06-17 12:01:13 +02:00
Nikolai Tschacher	0c9f353cb2	remove hardcoded sleep() in Google Image	2019-06-17 00:03:13 +02:00
Nikolai Tschacher	43d5732de7	resolved issue #30 , custom scrapers now possible. new npm version	2019-06-13 12:34:39 +02:00
Nikolai Tschacher	06d500f75c	.	2019-06-12 21:25:40 +02:00
Nikolai Tschacher	784e887787	fixed issue #22	2019-06-12 21:25:20 +02:00
Nikolai Tschacher	db5fbb23d2	removed unnecessary sleeping times	2019-06-12 18:14:49 +02:00
Nikolai Tschacher	5bf7c94b9a	new version	2019-06-11 22:01:27 +02:00
Nikolai Tschacher	d4d06f7d67	need to edit readme	2019-06-11 18:34:51 +02:00
Nikolai Tschacher	35943e7449	minor stuff	2019-06-11 18:33:11 +02:00
Nikolai Tschacher	7e06944fa1	updated README	2019-06-11 18:27:34 +02:00
Nikolai Tschacher	6825c97790	changed api big time	2019-06-11 18:16:59 +02:00
Nikolai Tschacher	3d69f4e249	added a proxy test script	2019-05-06 21:54:23 +02:00
Nikolai Tschacher	1593759556	passing chrome flags directly now possible	2019-04-01 15:33:26 +02:00
Nikolai Tschacher	775dcfa077	proxy mgmt better	2019-03-22 18:55:17 +01:00
Nikolai Tschacher	b82c769bb1	google_news_old supports google_news_old_settings now	2019-03-20 15:28:04 +01:00
Nikolai Tschacher	1bed9c5854	fixed issue 12	2019-03-20 11:50:43 +01:00
Nikolai Tschacher	7a8c6f13f0	fixed #11 by improving baidu a lot in speed and quality	2019-03-14 23:33:46 +01:00
Nikolai Tschacher	51d617442d	added support for amazon	2019-03-10 20:02:42 +01:00
Nikolai Tschacher	dd1f36076e	can now parse args from string to json	2019-03-07 15:50:36 +01:00
Nikolai Tschacher	62b3b688b4	minor fixes	2019-03-07 13:16:12 +01:00
Nikolai Tschacher	7b52b4e62f	added suport for custom query string parameters	2019-03-06 00:08:25 +01:00
Nikolai Tschacher	7239e23cba	fixed pluggable	2019-03-03 16:46:10 +01:00
Nikolai Tschacher	8cbf37eaba	minor improvements	2019-03-02 22:32:26 +01:00
Nikolai Tschacher	abf4458e46	fixed quotes in user agent. this lead to cloudflare detecting the scraper. very bad.	2019-03-01 16:02:30 +01:00
Nikolai Tschacher	79d32a315a	fixed some errors and way better README	2019-02-28 15:34:25 +01:00
Nikolai Tschacher	089e410ec6	support for multible browsers and proxies	2019-02-27 20:58:13 +01:00
Nikolai Tschacher	393b9c0450	Merge pull request #8 from NikolaiT/add-license-1 Create LICENSE	2019-02-08 00:58:27 +01:00
Nikolai Tschacher	fb3f2836e4	Create LICENSE	2019-02-08 00:58:15 +01:00
Nikolai Tschacher	53c9ebf467	Merge pull request #7 from NikolaiT/add-code-of-conduct-1 Create CODE_OF_CONDUCT.md	2019-02-08 00:54:28 +01:00