resolved some issues. proxy possible now. scraping for more than one page possible now

2019-01-29 22:48:08 +01:00 · 2019-01-29 22:48:08 +01:00 · 9e62f23451
commit 9e62f23451
parent 89441070cd
14 changed files with 764 additions and 340 deletions
--- a/README.md
+++ b/README.md
@ -34,10 +34,50 @@ Scraping is done with a headless chromium browser using the automation library p

 If you need to deploy scraping to the cloud (AWS or Azure), you can contact me on hire@incolumitas.com

+The chromium browser is started with the following flags to prevent
+scraping detection.
+
+```js
+var ADDITIONAL_CHROME_FLAGS = [
+    '--disable-infobars',
+    '--window-position=0,0',
+    '--ignore-certifcate-errors',
+    '--ignore-certifcate-errors-spki-list',
+    '--no-sandbox',
+    '--disable-setuid-sandbox',
+    '--disable-dev-shm-usage',
+    '--disable-accelerated-2d-canvas',
+    '--disable-gpu',
+    '--window-size=1920x1080',
+    '--hide-scrollbars',
+];
+```
+
+Furthermore, to avoid loading unnecessary ressources and to speed up
+scraping a great deal, we instruct chrome to not load images and css:
+
+```js
+await page.setRequestInterception(true);
+page.on('request', (req) => {
+    let type = req.resourceType();
+    const block = ['stylesheet', 'font', 'image', 'media'];
+    if (block.includes(type)) {
+        req.abort();
+    } else {
+        req.continue();
+    }
+});
+```
+
+#### Making puppeteer and headless chrome undetectable
+
+Consider the following resources:
+
+* https://intoli.com/blog/making-chrome-headless-undetectable/

 ### Installation and Usage

-Install with 
+Install with

 ```bash
 npm install se-scraper
@ -53,12 +93,12 @@ let config = {
    // the user agent to scrape with
    user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
    // if random_user_agent is set to True, a random user agent is chosen
-    random_user_agent: false,
+    random_user_agent: true,
    // get meta data of scraping in return object
    write_meta_data: false,
    // how long to sleep between requests. a random sleep interval within the range [a,b]
    // is drawn before every request. empty string for no sleeping.
-    sleep_range: '',
+    sleep_range: '[1,2]',
    // which search engine to scrape
    search_engine: 'google',
    // whether debug information should be printed
@ -68,9 +108,11 @@ let config = {
    // this output is informational
    verbose: false,
    // an array of keywords to scrape
-    keywords: ['scrapeulous.com', ],
+    keywords: ['scraping scrapeulous.com'],
    // alternatively you can specify a keyword_file. this overwrites the keywords array
    keyword_file: '',
+    // the number of pages to scrape for each keyword
+    num_pages: 2,
    // whether to start the browser in headless mode
    headless: true,
    // path to output file, data will be stored in JSON
@ -84,9 +126,13 @@ let config = {
    // must be an absolute path to the module
    //custom_func: resolve('examples/pluggable.js'),
    custom_func: '',
+    // use a proxy for all connections
+    // example: 'socks5://78.94.172.42:1080'
+    // example: 'http://118.174.233.10:48400'
+    //proxy: 'socks5://78.94.172.42:1080',
 };

-se_scraper.scrape(config, (err, response) => {
+function callback(err, response) {
    if (err) { console.error(err) }

    /* response object has the following properties:
@ -97,7 +143,9 @@ se_scraper.scrape(config, (err, response) => {
     */

    console.dir(response.results, {depth: null, colors: true});
-});
+}
+
+se_scraper.scrape(config, callback);
 ```

 Supported options for the `search_engine` config key:
@ -123,199 +171,179 @@ Supported options for the `search_engine` config key:
 'marketwatch'
 ```

-Output for the above script on my laptop:
+Output for the above script on my machine:

 ```text
-Scraper took 4295ms to scrape 2 keywords.
-On average ms/keyword: 2147.5ms/keyword
-{ 'incolumitas.com scraping':
-   { time: 'Mon, 24 Dec 2018 13:07:43 GMT',
-     num_results: 'Ungefähr 2’020 Ergebnisse (0.18 Sekunden) ',
-     no_results: false,
-     effective_query: '',
-     results:
-      [ { link:
-           'https://incolumitas.com/2018/10/29/youtube-puppeteer-scraping/',
-          title:
-           'Coding, Learning and Business Ideas – Tutorial: Youtube scraping ...',
-          snippet:
-           '29.10.2018 - In this blog post I am going to show you how to scrape YouTube video data using the handy puppeteer library. Puppeteer is a Node library ...',
-          visible_link:
-           'https://incolumitas.com/2018/10/29/youtube-puppeteer-scraping/',
-          date: '29.10.2018 - ',
-          rank: 1 },
-        { link: 'https://incolumitas.com/2018/09/05/googlescraper-tutorial/',
-          title:
-           'GoogleScraper Tutorial - How to scrape 1000 keywords with Google',
-          snippet:
-           '05.09.2018 - Tutorial that teaches how to use GoogleScraper to scrape 1000 keywords with 10 selenium browsers.',
-          visible_link: 'https://incolumitas.com/2018/09/05/googlescraper-tutorial/',
-          date: '05.09.2018 - ',
-          rank: 2 },
-        { link: 'https://incolumitas.com/tag/scraping.html',
-          title: 'Coding, Learning and Business Ideas – Tag Scraping',
-          snippet:
-           'Scraping Amazon Reviews using Headless Chrome Browser and Python3. Posted on Mi ... GoogleScraper Tutorial - How to scrape 1000 keywords with Google.',
-          visible_link: 'https://incolumitas.com/tag/scraping.html',
-          date: '',
-          rank: 3 },
-        { link: 'https://incolumitas.com/category/scraping.html',
-          title: 'Coding, Learning and Business Ideas – Category Scraping',
-          snippet:
-           'Nikolai Tschacher\'s ideas and projects around IT security and computer science.',
-          visible_link: 'https://incolumitas.com/category/scraping.html',
-          date: '',
-          rank: 4 },
-        { link:
-           'https://github.com/NikolaiT/incolumitas/blob/master/content/Meta/scraping-and-extracting-links-from-any-major-search-engine-like-google-yandex-baidu-bing-and-duckduckgo.md',
-          title:
-           'incolumitas/scraping-and-extracting-links-from-any-major-search ...',
-          snippet:
-           'Title: Scraping and Extracting Links from any major Search Engine like Google, Yandex, Baidu, Bing and Duckduckgo Date: 2014-11-12 00:47 Author: Nikolai ...',
-          visible_link:
-           'https://github.com/.../incolumitas/.../scraping-and-extracting-links...',
-          date: '',
-          rank: 5 },
-        { link:
-           'https://stackoverflow.com/questions/16955325/scraping-google-results-with-python',
-          title: 'Scraping Google Results with Python - Stack Overflow',
-          snippet:
-           'I found this. incolumitas.com/2013/01/06/… But the author claims it is not ported to 2.7 yet. – user2351394 Jun 6 \'13 at 6:59 ...',
-          visible_link:
-           'https://stackoverflow.com/.../scraping-google-results-with-python',
-          date: '',
-          rank: 6 },
-        { link: 'https://pypi.org/project/GoogleScraper/0.1.18/',
-          title: 'GoogleScraper · PyPI',
-          snippet:
-           '[5]: http://incolumitas.com/2014/11/12/scraping-and-extracting-links-from-any-major-search-engine-like-google-yandex-baidu-bing-and-duckduckgo/ ...',
-          visible_link: 'https://pypi.org/project/GoogleScraper/0.1.18/',
-          date: '',
-          rank: 7 },
-        { link:
-           'https://www.reddit.com/r/Python/comments/2m0vyu/scraping_links_on_google_yandex_bing_duckduckgo/',
-          title:
-           'Scraping links on Google, Yandex, Bing, Duckduckgo, Baidu and ...',
-          snippet:
-           '12.11.2014 - Scraping links on Google, Yandex, Bing, Duckduckgo, Baidu and other search engines with Python ... submitted 4 years ago by incolumitas.',
-          visible_link:
-           'https://www.reddit.com/.../scraping_links_on_google_yandex_bi...',
-          date: '12.11.2014 - ',
-          rank: 9 },
-        { link: 'https://twitter.com/incolumitas_?lang=de',
-          title: 'Nikolai Tschacher (@incolumitas_) | Twitter',
-          snippet:
-           'Embed Tweet. How to use GoogleScraper to scrape images and download them ... Learn how to scrape millions of url from yandex and google or bing with: ...',
-          visible_link: 'https://twitter.com/incolumitas_?lang=de',
-          date: '',
-          rank: 10 } ] },
-  'best scraping framework':
-   { time: 'Mon, 24 Dec 2018 13:07:44 GMT',
-     num_results: 'Ungefähr 2’820’000 Ergebnisse (0.36 Sekunden) ',
-     no_results: false,
-     effective_query: '',
-     results:
-      [ { link:
-           'http://www.aioptify.com/top-web-scraping-frameworks-and-librares.php',
-          title: 'Top Web Scraping Frameworks and Libraries - AI Optify',
-          snippet: '',
-          visible_link:
-           'www.aioptify.com/top-web-scraping-frameworks-and-librares.php',
-          date: '',
-          rank: 1 },
-        { link:
-           'http://www.aioptify.com/top-web-scraping-frameworks-and-librares.php',
-          title: 'Top Web Scraping Frameworks and Libraries - AI Optify',
-          snippet: '',
-          visible_link:
-           'www.aioptify.com/top-web-scraping-frameworks-and-librares.php',
-          date: '',
-          rank: 2 },
-        { link:
-           'https://www.scrapehero.com/open-source-web-scraping-frameworks-and-tools/',
-          title:
-           'Best Open Source Web Scraping Frameworks and Tools - ScrapeHero',
-          snippet:
-           '05.06.2018 - List of Open Source Web Scraping Frameworks. Scrapy. MechanicalSoup. PySpider. Portia. Apify SDK. Nodecrawler. Selenium WebDriver. Puppeteer.',
-          visible_link:
-           'https://www.scrapehero.com/open-source-web-scraping-framewo...',
-          date: '05.06.2018 - ',
-          rank: 3 },
-        { link:
-           'https://medium.com/datadriveninvestor/best-data-scraping-tools-for-2018-top-10-reviews-558cc5a4992f',
-          title:
-           'Best Data Scraping Tools for 2018 (Top 10 Reviews) – Data Driven ...',
-          snippet:
-           '05.03.2018 - Pros: Octoparse is the best free data scraping tool I\'ve met. ... your Scrapy (a open-source data extraction framework) web spider\'s activities.',
-          visible_link:
-           'https://medium.com/.../best-data-scraping-tools-for-2018-top-10-...',
-          date: '05.03.2018 - ',
-          rank: 4 },
-        { link:
-           'https://www.quora.com/What-is-the-best-web-scraping-open-source-tool',
-          title: 'What is the best web scraping open source tool? - Quora',
-          snippet:
-           '15.06.2015 - My personal favourite is Python Scrapy and it is an excellent framework for building a web data scraper. Why Scrapy? 1) It is an open source framework and cost ...',
-          visible_link:
-           'https://www.quora.com/What-is-the-best-web-scraping-open-sour...',
-          date: '15.06.2015 - ',
-          rank: 5 },
-        { link:
-           'http://www.aioptify.com/top-web-scraping-frameworks-and-librares.php',
-          title: 'Top Web Scraping Frameworks and Libraries - AI Optify',
-          snippet:
-           '21.05.2018 - Top Web Scraping Frameworks and Libraries. Requests. Scrapy. Beautiful Soup. Selenium with Python. lxml. Webscraping with Selenium - part 1. Extracting data from websites with Scrapy. Scrapinghub.',
-          visible_link:
-           'www.aioptify.com/top-web-scraping-frameworks-and-librares.php',
-          date: '21.05.2018 - ',
-          rank: 6 },
-        { link: 'https://scrapy.org/',
-          title:
-           'Scrapy | A Fast and Powerful Scraping and Web Crawling Framework',
-          snippet:
-           'An open source and collaborative framework for extracting the data you need from ... Spider): name = \'blogspider\' start_urls = [\'https://blog.scrapinghub.com\'] def ...',
-          visible_link: 'https://scrapy.org/',
-          date: '',
-          rank: 7 },
-        { link:
-           'https://www.scraperapi.com/blog/the-10-best-web-scraping-tools',
-          title: 'The 10 Best Web Scraping Tools of 2018 - Scraper API',
-          snippet:
-           '19.07.2018 - The 10 Best Web Scraping Tools of 2018. ParseHub. Scrapy. Diffbot. Cheerio. Website: https://cheerio.js.org. Beautiful Soup. Website: https://www.crummy.com/software/BeautifulSoup/ Puppeteer. Website: https://github.com/GoogleChrome/puppeteer. Content Grabber. Website: http://www.contentgrabber.com/ Mozenda. Website: ...',
-          visible_link:
-           'https://www.scraperapi.com/blog/the-10-best-web-scraping-tools',
-          date: '19.07.2018 - ',
-          rank: 8 },
-        { link: 'https://elitedatascience.com/python-web-scraping-libraries',
-          title: '5 Tasty Python Web Scraping Libraries - EliteDataScience',
-          snippet:
-           '03.02.2017 - We\'ve decided to feature the 5 Python libraries for web scraping that ... The good news is that you can swap out its parser with a faster one if ... Scrapy is technically not even a library… it\'s a complete web scraping framework.',
-          visible_link: 'https://elitedatascience.com/python-web-scraping-libraries',
-          date: '03.02.2017 - ',
-          rank: 9 },
-        { link:
-           'https://blog.michaelyin.info/web-scraping-framework-review-scrapy-vs-selenium/',
-          title:
-           'Web Scraping Framework Review: Scrapy VS Selenium | MichaelYin ...',
-          snippet:
-           '01.10.2018 - In this Scrapy tutorial, I will cover the features of Scrapy and Selenium, and help you decide which one is better for your projects.',
-          visible_link:
-           'https://blog.michaelyin.info/web-scraping-framework-review-scr...',
-          date: '01.10.2018 - ',
-          rank: 10 },
-        { link: 'https://github.com/lorien/awesome-web-scraping',
-          title:
-           'GitHub - lorien/awesome-web-scraping: List of libraries, tools and APIs ...',
-          snippet:
-           'List of libraries, tools and APIs for web scraping and data processing. ... golang.md · add dataflow kit framework, 2 months ago ... Make this list better!',
-          visible_link: 'https://github.com/lorien/awesome-web-scraping',
-          date: '',
-          rank: 11 },
-        { link: 'https://www.import.io/post/best-web-scraping-tools-2018/',
-          title: 'Best Web Scraping Software Tools 2018 | Import.io',
-          snippet:
-           '07.08.2018 - List of Best Web Scraping SoftwareThere are hundreds of Web ... it is a fast high-level screen scraping and web crawling framework, used to ...',
-          visible_link: 'https://www.import.io/post/best-web-scraping-tools-2018/',
-          date: '07.08.2018 - ',
-          rank: 12 } ] } }
+{ 'scraping scrapeulous.com':
+   { '1':
+      { time: 'Tue, 29 Jan 2019 21:39:22 GMT',
+        num_results: 'Ungefähr 145 Ergebnisse (0,18 Sekunden) ',
+        no_results: false,
+        effective_query: '',
+        results:
+         [ { link: 'https://scrapeulous.com/',
+             title:
+              'Scrapeuloushttps://scrapeulous.com/Im CacheDiese Seite übersetzen',
+             snippet:
+              'Scrapeulous.com allows you to scrape various search engines automatically ... or to find hidden links, Scrapeulous.com enables you to scrape a ever increasing ...',
+             visible_link: 'https://scrapeulous.com/',
+             date: '',
+             rank: 1 },
+           { link: 'https://scrapeulous.com/about/',
+             title:
+              'About - Scrapeuloushttps://scrapeulous.com/about/Im CacheDiese Seite übersetzen',
+             snippet:
+              'Scrapeulous.com allows you to scrape various search engines automatically and in large quantities. The business requirement to scrape information from ...',
+             visible_link: 'https://scrapeulous.com/about/',
+             date: '',
+             rank: 2 },
+           { link: 'https://scrapeulous.com/howto/',
+             title:
+              'Howto - Scrapeuloushttps://scrapeulous.com/howto/Im CacheDiese Seite übersetzen',
+             snippet:
+              'We offer scraping large amounts of keywords for the Google Search Engine. Large means any number of keywords between 40 and 50000. Additionally, we ...',
+             visible_link: 'https://scrapeulous.com/howto/',
+             date: '',
+             rank: 3 },
+           { link: 'https://github.com/NikolaiT/se-scraper',
+             title:
+              'GitHub - NikolaiT/se-scraper: Javascript scraping module based on ...https://github.com/NikolaiT/se-scraperIm CacheDiese Seite übersetzen',
+             snippet:
+              '24.12.2018 - Javascript scraping module based on puppeteer for many different search ... for many different search engines... https://scrapeulous.com/.',
+             visible_link: 'https://github.com/NikolaiT/se-scraper',
+             date: '24.12.2018 - ',
+             rank: 4 },
+           { link:
+              'https://github.com/NikolaiT/GoogleScraper/blob/master/README.md',
+             title:
+              'GoogleScraper/README.md at master · NikolaiT/GoogleScraper ...https://github.com/NikolaiT/GoogleScraper/blob/.../README.mdIm CacheÄhnliche SeitenDiese Seite übersetzen',
+             snippet:
+              'GoogleScraper - Scraping search engines professionally. Scrapeulous.com - Scraping Service. GoogleScraper is a open source tool and will remain a open ...',
+             visible_link:
+              'https://github.com/NikolaiT/GoogleScraper/blob/.../README.md',
+             date: '',
+             rank: 5 },
+           { link: 'https://googlescraper.readthedocs.io/',
+             title:
+              'Welcome to GoogleScraper\'s documentation! — GoogleScraper ...https://googlescraper.readthedocs.io/Im CacheDiese Seite übersetzen',
+             snippet:
+              'Welcome to GoogleScraper\'s documentation!¶. Contents: GoogleScraper - Scraping search engines professionally · Scrapeulous.com - Scraping Service ...',
+             visible_link: 'https://googlescraper.readthedocs.io/',
+             date: '',
+             rank: 6 },
+           { link: 'https://incolumitas.com/pages/scrapeulous/',
+             title:
+              'Coding, Learning and Business Ideas – Scrapeulous.com - Incolumitashttps://incolumitas.com/pages/scrapeulous/Im CacheDiese Seite übersetzen',
+             snippet:
+              'A scraping service for scientists, marketing professionals, analysts or SEO folk. In autumn 2018, I created a scraping service called scrapeulous.com. There you ...',
+             visible_link: 'https://incolumitas.com/pages/scrapeulous/',
+             date: '',
+             rank: 7 },
+           { link: 'https://incolumitas.com/',
+             title:
+              'Coding, Learning and Business Ideashttps://incolumitas.com/Im CacheDiese Seite übersetzen',
+             snippet:
+              'Scraping Amazon Reviews using Headless Chrome Browser and Python3. Posted on Mi ... GoogleScraper Tutorial - How to scrape 1000 keywords with Google.',
+             visible_link: 'https://incolumitas.com/',
+             date: '',
+             rank: 8 },
+           { link: 'https://en.wikipedia.org/wiki/Search_engine_scraping',
+             title:
+              'Search engine scraping - Wikipediahttps://en.wikipedia.org/wiki/Search_engine_scrapingIm CacheDiese Seite übersetzen',
+             snippet:
+              'Search engine scraping is the process of harvesting URLs, descriptions, or other information from search engines such as Google, Bing or Yahoo. This is a ...',
+             visible_link: 'https://en.wikipedia.org/wiki/Search_engine_scraping',
+             date: '',
+             rank: 9 },
+           { link:
+              'https://readthedocs.org/projects/googlescraper/downloads/pdf/latest/',
+             title:
+              'GoogleScraper Documentation - Read the Docshttps://readthedocs.org/projects/googlescraper/downloads/.../latest...Im CacheDiese Seite übersetzen',
+             snippet:
+              '23.12.2018 - Contents: 1 GoogleScraper - Scraping search engines professionally. 1. 1.1 ... For this reason, I created the web service scrapeulous.com.',
+             visible_link:
+              'https://readthedocs.org/projects/googlescraper/downloads/.../latest...',
+             date: '23.12.2018 - ',
+             rank: 10 } ] },
+     '2':
+      { time: 'Tue, 29 Jan 2019 21:39:24 GMT',
+        num_results: 'Seite 2 von ungefähr 145 Ergebnissen (0,20 Sekunden) ',
+        no_results: false,
+        effective_query: '',
+        results:
+         [ { link: 'https://pypi.org/project/CountryGoogleScraper/',
+             title:
+              'CountryGoogleScraper · PyPIhttps://pypi.org/project/CountryGoogleScraper/Im CacheDiese Seite übersetzen',
+             snippet:
+              'A module to scrape and extract links, titles and descriptions from various search ... Look [here to get an idea how to use asynchronous mode](http://scrapeulous.',
+             visible_link: 'https://pypi.org/project/CountryGoogleScraper/',
+             date: '',
+             rank: 1 },
+           { link: 'https://www.youtube.com/watch?v=a6xn6rc9GbI',
+             title:
+              'scrapeulous intro - YouTubehttps://www.youtube.com/watch?v=a6xn6rc9GbIDiese Seite übersetzen',
+             snippet:
+              'scrapeulous intro. Scrapeulous Scrapeulous. Loading... Unsubscribe from ... on Dec 16, 2018. Introduction ...',
+             visible_link: 'https://www.youtube.com/watch?v=a6xn6rc9GbI',
+             date: '',
+             rank: 3 },
+           { link:
+              'https://www.reddit.com/r/Python/comments/2tii3r/scraping_260_search_queries_in_bing_in_a_matter/',
+             title:
+              'Scraping 260 search queries in Bing in a matter of seconds using ...https://www.reddit.com/.../scraping_260_search_queries_in_bing...Im CacheDiese Seite übersetzen',
+             snippet:
+              '24.01.2015 - Scraping 260 search queries in Bing in a matter of seconds using asyncio and aiohttp. (scrapeulous.com). submitted 3 years ago by ...',
+             visible_link:
+              'https://www.reddit.com/.../scraping_260_search_queries_in_bing...',
+             date: '24.01.2015 - ',
+             rank: 4 },
+           { link: 'https://twitter.com/incolumitas_?lang=de',
+             title:
+              'Nikolai Tschacher (@incolumitas_) | Twitterhttps://twitter.com/incolumitas_?lang=deIm CacheÄhnliche SeitenDiese Seite übersetzen',
+             snippet:
+              'Learn how to scrape millions of url from yandex and google or bing with: http://scrapeulous.com/googlescraper-market-analysis.html … 0 replies 0 retweets 0 ...',
+             visible_link: 'https://twitter.com/incolumitas_?lang=de',
+             date: '',
+             rank: 5 },
+           { link:
+              'http://blog.shodan.io/hostility-in-the-python-package-index/',
+             title:
+              'Hostility in the Cheese Shop - Shodan Blogblog.shodan.io/hostility-in-the-python-package-index/Im CacheDiese Seite übersetzen',
+             snippet:
+              '22.02.2015 - https://zzz.scrapeulous.com/r? According to the author of the website, these hostile packages are used as honeypots. Honeypots are usually ...',
+             visible_link: 'blog.shodan.io/hostility-in-the-python-package-index/',
+             date: '22.02.2015 - ',
+             rank: 6 },
+           { link: 'https://libraries.io/github/NikolaiT/GoogleScraper',
+             title:
+              'NikolaiT/GoogleScraper - Libraries.iohttps://libraries.io/github/NikolaiT/GoogleScraperIm CacheDiese Seite übersetzen',
+             snippet:
+              'A Python module to scrape several search engines (like Google, Yandex, Bing, ... https://scrapeulous.com/ ... You can install GoogleScraper comfortably with pip:',
+             visible_link: 'https://libraries.io/github/NikolaiT/GoogleScraper',
+             date: '',
+             rank: 7 },
+           { link: 'https://pydigger.com/pypi/CountryGoogleScraper',
+             title:
+              'CountryGoogleScraper - PyDiggerhttps://pydigger.com/pypi/CountryGoogleScraperDiese Seite übersetzen',
+             snippet:
+              '19.10.2016 - Look [here to get an idea how to use asynchronous mode](http://scrapeulous.com/googlescraper-260-keywords-in-a-second.html). ### Table ...',
+             visible_link: 'https://pydigger.com/pypi/CountryGoogleScraper',
+             date: '19.10.2016 - ',
+             rank: 8 },
+           { link: 'https://hub.docker.com/r/cimenx/data-mining-penandtest/',
+             title:
+              'cimenx/data-mining-penandtest - Docker Hubhttps://hub.docker.com/r/cimenx/data-mining-penandtest/Im CacheDiese Seite übersetzen',
+             snippet:
+              'Container. OverviewTagsDockerfileBuilds · http://scrapeulous.com/googlescraper-260-keywords-in-a-second.html. Docker Pull Command. Owner. profile ...',
+             visible_link: 'https://hub.docker.com/r/cimenx/data-mining-penandtest/',
+             date: '',
+             rank: 9 },
+           { link: 'https://www.revolvy.com/page/Search-engine-scraping',
+             title:
+              'Search engine scraping | Revolvyhttps://www.revolvy.com/page/Search-engine-scrapingIm CacheDiese Seite übersetzen',
+             snippet:
+              'Search engine scraping is the process of harvesting URLs, descriptions, or other information from search engines such as Google, Bing or Yahoo. This is a ...',
+             visible_link: 'https://www.revolvy.com/page/Search-engine-scraping',
+             date: '',
+             rank: 10 } ] } } }
 ```
--- a/TODO.txt
+++ b/TODO.txt
@ -14,6 +14,17 @@
        https://www.scrapehero.com/how-to-increase-web-scraping-speed-using-puppeteer/
        https://www.scrapehero.com/how-to-build-a-web-scraper-using-puppeteer-and-node-js/

+29.1.2019
+
+    - implement proxy support functionality
+        - implement proxy check
+
+    - implement scraping more than 1 page
+        - do it for google
+        - and bing
+
+    - implement duckduckgo scraping
+
 TODO:
    - think about implementing ticker search for: https://quotes.wsj.com/MSFT?mod=searchresults_companyquotes
    - add proxy support
@ -23,4 +34,28 @@ TODO:
 TODO:
    - think whether it makes sense to introduce a generic scraping class?
    - is scraping abstractable or is every scraper too unique?
-    - dont make the same mistakes as with GoogleScraper
+    - dont make the same mistakes as with GoogleScraper
+
+
+TODO:
+    okay its fucking time to make a generic scraping class like in GoogleScraper
+    i feel like history repeats
+
+    class Scraper
+
+        constructor(options = {}) {
+
+        }
+
+        async load_search_engine() {}
+
+        async search_keyword() {}
+
+        async new_page() {}
+
+        async detected() {}
+
+
+    then each search engine derives from this generic class
+
+    some search engines do not seed such a abstract class, because they are too complex
--- a/data.json
+++ b/data.json
--- a/index.js
+++ b/index.js
@ -35,6 +35,10 @@ exports.scrape = async function(config, callback) {
 		// get_browser, handle_metadata, close_browser
 		//custom_func: resolve('examples/pluggable.js'),
 		custom_func: '',
+		// use a proxy for all connections
+		// example: 'socks5://78.94.172.42:1080'
+		// example: 'http://118.174.233.10:48400'
+		proxy: '',
 	};

 	// overwrite default config
--- a/package-lock.json
+++ b/package-lock.json
@ -1,9 +1,22 @@
 {
  "name": "se-scraper",
-  "version": "1.1.4",
+  "version": "1.1.7",
  "lockfileVersion": 1,
  "requires": true,
  "dependencies": {
+    "@sindresorhus/is": {
+      "version": "0.14.0",
+      "resolved": "https://registry.npmjs.org/@sindresorhus/is/-/is-0.14.0.tgz",
+      "integrity": "sha512-9NET910DNaIPngYnLLPeg+Ogzqsi9uM4mSboU5y6p8S5DzMTVEsJZrawi+BoDNUVBa2DhJqQYUFvMDfgU062LQ=="
+    },
+    "@szmarczak/http-timer": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@szmarczak/http-timer/-/http-timer-1.1.2.tgz",
+      "integrity": "sha512-XIB2XbzHTN6ieIjfIMV9hlVcfPU26s2vafYWQcZHWXHOxiaRZYEDKEwdl129Zyg50+foYV2jCgtrqSA6qNuNSA==",
+      "requires": {
+        "defer-to-connect": "^1.0.1"
+      }
+    },
    "@types/node": {
      "version": "10.12.18",
      "resolved": "https://registry.npmjs.org/@types/node/-/node-10.12.18.tgz",
@ -51,6 +64,20 @@
      "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.1.tgz",
      "integrity": "sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A=="
    },
+    "cacheable-request": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/cacheable-request/-/cacheable-request-6.0.0.tgz",
+      "integrity": "sha512-2N7AmszH/WPPpl5Z3XMw1HAP+8d+xugnKQAeKvxFZ/04dbT/CAznqwbl+7eSr3HkwdepNwtb2yx3CAMQWvG01Q==",
+      "requires": {
+        "clone-response": "^1.0.2",
+        "get-stream": "^4.0.0",
+        "http-cache-semantics": "^4.0.0",
+        "keyv": "^3.0.0",
+        "lowercase-keys": "^1.0.1",
+        "normalize-url": "^3.1.0",
+        "responselike": "^1.0.2"
+      }
+    },
    "chai": {
      "version": "4.2.0",
      "resolved": "https://registry.npmjs.org/chai/-/chai-4.2.0.tgz",
@ -82,6 +109,14 @@
        "parse5": "^3.0.1"
      }
    },
+    "clone-response": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/clone-response/-/clone-response-1.0.2.tgz",
+      "integrity": "sha1-0dyXOSAxTfZ/vrlCI7TuNQI56Ws=",
+      "requires": {
+        "mimic-response": "^1.0.0"
+      }
+    },
    "concat-map": {
      "version": "0.0.1",
      "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
@ -151,6 +186,14 @@
        "ms": "^2.1.1"
      }
    },
+    "decompress-response": {
+      "version": "3.3.0",
+      "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-3.3.0.tgz",
+      "integrity": "sha1-gKTdMjdIOEv6JICDYirt7Jgq3/M=",
+      "requires": {
+        "mimic-response": "^1.0.0"
+      }
+    },
    "deep-eql": {
      "version": "3.0.1",
      "resolved": "https://registry.npmjs.org/deep-eql/-/deep-eql-3.0.1.tgz",
@ -159,6 +202,11 @@
        "type-detect": "^4.0.0"
      }
    },
+    "defer-to-connect": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/defer-to-connect/-/defer-to-connect-1.0.2.tgz",
+      "integrity": "sha512-k09hcQcTDY+cwgiwa6PYKLm3jlagNzQ+RSvhjzESOGOx+MNOuXkxTfEvPrO1IOQ81tArCFYQgi631clB70RpQw=="
+    },
    "dom-serializer": {
      "version": "0.1.0",
      "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-0.1.0.tgz",
@ -197,6 +245,19 @@
        "domelementtype": "1"
      }
    },
+    "duplexer3": {
+      "version": "0.1.4",
+      "resolved": "https://registry.npmjs.org/duplexer3/-/duplexer3-0.1.4.tgz",
+      "integrity": "sha1-7gHdHKwO08vH/b6jfcCo8c4ALOI="
+    },
+    "end-of-stream": {
+      "version": "1.4.1",
+      "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.1.tgz",
+      "integrity": "sha512-1MkrZNvWTKCaigbn+W15elq2BB/L22nqrSY5DKlo3X6+vclJm8Bb5djXJBmEX6fS3+zCh/F4VBK5Z2KxJt4s2Q==",
+      "requires": {
+        "once": "^1.4.0"
+      }
+    },
    "entities": {
      "version": "1.1.2",
      "resolved": "https://registry.npmjs.org/entities/-/entities-1.1.2.tgz",
@ -259,6 +320,14 @@
      "resolved": "https://registry.npmjs.org/get-func-name/-/get-func-name-2.0.0.tgz",
      "integrity": "sha1-6td0q+5y4gQJQzoGY2YCPdaIekE="
    },
+    "get-stream": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-4.1.0.tgz",
+      "integrity": "sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w==",
+      "requires": {
+        "pump": "^3.0.0"
+      }
+    },
    "glob": {
      "version": "7.1.3",
      "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.3.tgz",
@ -272,6 +341,24 @@
        "path-is-absolute": "^1.0.0"
      }
    },
+    "got": {
+      "version": "9.6.0",
+      "resolved": "https://registry.npmjs.org/got/-/got-9.6.0.tgz",
+      "integrity": "sha512-R7eWptXuGYxwijs0eV+v3o6+XH1IqVK8dJOEecQfTmkncw9AV4dcw/Dhxi8MdlqPthxxpZyizMzyg8RTmEsG+Q==",
+      "requires": {
+        "@sindresorhus/is": "^0.14.0",
+        "@szmarczak/http-timer": "^1.1.2",
+        "cacheable-request": "^6.0.0",
+        "decompress-response": "^3.3.0",
+        "duplexer3": "^0.1.4",
+        "get-stream": "^4.1.0",
+        "lowercase-keys": "^1.0.1",
+        "mimic-response": "^1.0.1",
+        "p-cancelable": "^1.0.0",
+        "to-readable-stream": "^1.0.0",
+        "url-parse-lax": "^3.0.0"
+      }
+    },
    "htmlparser2": {
      "version": "3.10.0",
      "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-3.10.0.tgz",
@ -285,6 +372,11 @@
        "readable-stream": "^3.0.6"
      }
    },
+    "http-cache-semantics": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.0.2.tgz",
+      "integrity": "sha512-laeSTWIkuFa6lUgZAt+ic9RwOSEwbi9VDQNcCvMFO4sZiDc2Ha8DaZVCJnfpLLQCcS8rvCnIWYmz0POLxt7Dew=="
+    },
    "https-proxy-agent": {
      "version": "2.2.1",
      "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-2.2.1.tgz",
@ -323,16 +415,39 @@
      "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
      "integrity": "sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE="
    },
+    "json-buffer": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.0.tgz",
+      "integrity": "sha1-Wx85evx11ne96Lz8Dkfh+aPZqJg="
+    },
+    "keyv": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/keyv/-/keyv-3.1.0.tgz",
+      "integrity": "sha512-9ykJ/46SN/9KPM/sichzQ7OvXyGDYKGTaDlKMGCAlg2UK8KRy4jb0d8sFc+0Tt0YYnThq8X2RZgCg74RPxgcVA==",
+      "requires": {
+        "json-buffer": "3.0.0"
+      }
+    },
    "lodash": {
      "version": "4.17.11",
      "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.11.tgz",
      "integrity": "sha512-cQKh8igo5QUhZ7lg38DYWAxMvjSAKG0A8wGSVimP07SIUEK2UO+arSRKbRZWtelMtN5V0Hkwh5ryOto/SshYIg=="
    },
+    "lowercase-keys": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/lowercase-keys/-/lowercase-keys-1.0.1.tgz",
+      "integrity": "sha512-G2Lj61tXDnVFFOi8VZds+SoQjtQC3dgokKdDG2mTm1tx4m50NUHBOZSBwQQHyy0V12A0JTG4icfZQH+xPyh8VA=="
+    },
    "mime": {
      "version": "2.4.0",
      "resolved": "https://registry.npmjs.org/mime/-/mime-2.4.0.tgz",
      "integrity": "sha512-ikBcWwyqXQSHKtciCcctu9YfPbFYZ4+gbHEmE0Q8jzcTYQg5dHCr3g2wwAZjPoJfQVXZq6KXAjpXOTf5/cjT7w=="
    },
+    "mimic-response": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-1.0.1.tgz",
+      "integrity": "sha512-j5EctnkH7amfV/q5Hgmoal1g2QHFJRraOtmx0JpIqkxhBhI/lJSl1nMpQ45hVarwNETOoWEimndZ4QK0RHxuxQ=="
+    },
    "minimatch": {
      "version": "3.0.4",
      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz",
@ -359,6 +474,11 @@
      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz",
      "integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg=="
    },
+    "normalize-url": {
+      "version": "3.3.0",
+      "resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-3.3.0.tgz",
+      "integrity": "sha512-U+JJi7duF1o+u2pynbp2zXDW2/PADgC30f0GsHZtRh+HOcXHnw137TrNlyxxRvWW5fjKd3bcLHPxofWuCjaeZg=="
+    },
    "nth-check": {
      "version": "1.0.2",
      "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-1.0.2.tgz",
@ -375,6 +495,11 @@
        "wrappy": "1"
      }
    },
+    "p-cancelable": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/p-cancelable/-/p-cancelable-1.0.0.tgz",
+      "integrity": "sha512-USgPoaC6tkTGlS831CxsVdmZmyb8tR1D+hStI84MyckLOzfJlYQUweomrwE3D8T7u5u5GVuW064LT501wHTYYA=="
+    },
    "parse5": {
      "version": "3.0.3",
      "resolved": "https://registry.npmjs.org/parse5/-/parse5-3.0.3.tgz",
@ -398,6 +523,11 @@
      "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz",
      "integrity": "sha1-elfrVQpng/kRUzH89GY9XI4AelA="
    },
+    "prepend-http": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/prepend-http/-/prepend-http-2.0.0.tgz",
+      "integrity": "sha1-6SQ0v6XqjBn0HN/UAddBo8gZ2Jc="
+    },
    "process-nextick-args": {
      "version": "2.0.0",
      "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.0.tgz",
@ -413,6 +543,15 @@
      "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.0.0.tgz",
      "integrity": "sha1-M8UDmPcOp+uW0h97gXYwpVeRx+4="
    },
+    "pump": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz",
+      "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==",
+      "requires": {
+        "end-of-stream": "^1.1.0",
+        "once": "^1.3.1"
+      }
+    },
    "puppeteer": {
      "version": "1.11.0",
      "resolved": "https://registry.npmjs.org/puppeteer/-/puppeteer-1.11.0.tgz",
@ -438,6 +577,14 @@
        "util-deprecate": "^1.0.1"
      }
    },
+    "responselike": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/responselike/-/responselike-1.0.2.tgz",
+      "integrity": "sha1-kYcg7ztjHFZCvgaPFa3lpG9Loec=",
+      "requires": {
+        "lowercase-keys": "^1.0.0"
+      }
+    },
    "rimraf": {
      "version": "2.6.2",
      "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-2.6.2.tgz",
@ -459,6 +606,11 @@
        "safe-buffer": "~5.1.0"
      }
    },
+    "to-readable-stream": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/to-readable-stream/-/to-readable-stream-1.0.0.tgz",
+      "integrity": "sha512-Iq25XBt6zD5npPhlLVXGFN3/gyR2/qODcKNNyTMd4vbm39HUaOiAM4PMq0eMVC/Tkxz+Zjdsc55g9yyz+Yq00Q=="
+    },
    "type-detect": {
      "version": "4.0.8",
      "resolved": "https://registry.npmjs.org/type-detect/-/type-detect-4.0.8.tgz",
@ -469,6 +621,14 @@
      "resolved": "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz",
      "integrity": "sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c="
    },
+    "url-parse-lax": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/url-parse-lax/-/url-parse-lax-3.0.0.tgz",
+      "integrity": "sha1-FrXK/Afb42dsGxmZF3gj1lA6yww=",
+      "requires": {
+        "prepend-http": "^2.0.0"
+      }
+    },
    "util-deprecate": {
      "version": "1.0.2",
      "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
--- a/package.json
+++ b/package.json
@ -1,6 +1,6 @@
 {
  "name": "se-scraper",
-  "version": "1.1.7",
+  "version": "1.1.8",
  "description": "A simple module which uses puppeteer to scrape several search engines.",
  "homepage": "https://scrapeulous.com/",
  "main": "index.js",
@ -22,6 +22,7 @@
  "dependencies": {
    "chai": "^4.2.0",
    "cheerio": "^1.0.0-rc.2",
+    "got": "^9.6.0",
    "puppeteer": "^1.9.0"
  }
 }
--- a/run.js
+++ b/run.js
@ -10,7 +10,7 @@ let config = {
    write_meta_data: false,
    // how long to sleep between requests. a random sleep interval within the range [a,b]
    // is drawn before every request. empty string for no sleeping.
-    sleep_range: '[1,1]',
+    sleep_range: '[1,2]',
    // which search engine to scrape
    search_engine: 'google',
    // whether debug information should be printed
@ -20,9 +20,11 @@ let config = {
    // this output is informational
    verbose: false,
    // an array of keywords to scrape
-    keywords: ['trump', 'chief'],
+    keywords: ['scraping scrapeulous.com'],
    // alternatively you can specify a keyword_file. this overwrites the keywords array
    keyword_file: '',
+    // the number of pages to scrape for each keyword
+    num_pages: 1,
    // whether to start the browser in headless mode
    headless: true,
    // path to output file, data will be stored in JSON
@ -35,7 +37,11 @@ let config = {
    // get_browser, handle_metadata, close_browser
    // must be an absolute path to the module
    //custom_func: resolve('examples/pluggable.js'),
-    custom_func: resolve('examples/pluggable.js'),
+    custom_func: '',
+    // use a proxy for all connections
+    // example: 'socks5://78.94.172.42:1080'
+    // example: 'http://118.174.233.10:48400'
+    //proxy: 'socks5://78.94.172.42:1080',
 };

 function callback(err, response) {
--- a/src/captcha_solver.js
+++ b/src/captcha_solver.js
@ -0,0 +1,96 @@
+/*
+    There are essentially two strategies to handle a search engine showing you a captcha:
+
+    1. Solve the captcha
+        https://github.com/ecthros/uncaptcha2
+        or use a captcha solving service such as https://anti-captcha.com/mainpage
+
+    2. Switch your IP address with rotating proxies
+
+ */
+
+/**
+ * @name download recaptcha2 audio captcha
+ *
+ * There are several issues:
+ *
+ * Google sees that we are using an automated browser.
+ *
+ * In the worst case we have to completely control the browser ourselves without puppeteer.
+ *
+ * https://github.com/ecthros/uncaptcha2
+ *
+ * See here:
+ *
+ * https://gist.github.com/tegansnyder/c3aeae4d57768c58247ae6c4e5acd3d1
+ *
+ * https://github.com/GoogleChrome/puppeteer/issues/3039
+ *
+ * https://intoli.com/blog/making-chrome-headless-undetectable/
+ *
+ * @desc  Go to the https://www.google.com/recaptcha/api2/demo demo page and download the captcha
+ */
+
+const puppeteer = require('puppeteer');
+const fs = require('fs');
+const got = require('got');
+
+try {
+    (async () => {
+        const browser = await puppeteer.launch({
+            args:  [
+                '--proxy-server=socks5://78.94.172.42:1080',
+                '--no-sandbox',
+                '--disable-setuid-sandbox',
+                '--disable-dev-shm-usage',
+                '--disable-accelerated-2d-canvas',
+                '--disable-gpu',
+                '--window-size=1920x1080',
+                '--hide-scrollbars',
+                '--user-agent="Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0"',
+            ],
+            headless: false,
+        });
+        const page = await browser.newPage()
+        await page.goto('https://www.google.com/recaptcha/api2/demo')
+
+        await page.waitFor(1000);
+
+        const frames = page.frames();
+
+        console.info('Available frames', frames.map(frame => frame.name()));
+        console.info('Available frame urls', frames.map(frame => frame.url()));
+
+        const frame = frames.find(frame => frame.url().includes('/recaptcha/api2/anchor?'));
+        const content_frame = frames.find(frame => frame.url().includes('/recaptcha/api2/bframe?'));
+
+        await frame.waitForSelector('#recaptcha-anchor', { timeout: 10000 });
+        await page.waitFor(1000);
+        const button = await frame.$('#recaptcha-anchor');
+        await button.click();
+
+        await content_frame.waitForSelector('#recaptcha-audio-button');
+
+        const audio_button = await content_frame.$('#recaptcha-audio-button');
+        await audio_button.click();
+        await page.waitFor(1000);
+
+        await content_frame.waitForSelector('.rc-audiochallenge-tdownload-link');
+
+        let download_link = await content_frame.evaluate(() => {
+            return document.querySelectorAll('.rc-audiochallenge-tdownload-link').getAttribute('href');
+        });
+        console.log('Got audio download link: ', download_link);
+        got.stream(download_link).pipe(fs.createWriteStream('audio.mp3'));
+
+        await browser.close();
+    })()
+} catch (err) {
+    console.error(err)
+}
+
+/*
+    translate this shit into js: https://github.com/ecthros/uncaptcha2/blob/master/queryAPI.py
+ */
+async function translate_audio_file() {
+}
--- a/src/modules/bing.js
+++ b/src/modules/bing.js
@ -21,6 +21,7 @@ async function scrape_bing_pup(page, event, context, pluggable) {
 	for (var i = 0; i < keywords.length; i++) {

 		keyword = keywords[i];
+		results[keyword] = {};

 		if (pluggable.before_keyword_scraped) {
 			await pluggable.before_keyword_scraped({
@ -33,23 +34,35 @@ async function scrape_bing_pup(page, event, context, pluggable) {

 		try {
 			const input = await page.$('input[name="q"]');
-			// overwrites last text in input
-			await input.click({ clickCount: 3 });
-			await input.type(keyword);
+			await sfunctions.set_input_value(page, `input[name="q"]`, keyword);
+			await sfunctions.sleep(50);
 			await input.focus();
 			await page.keyboard.press("Enter");

-            if (event.sleep_range) {
-                await sfunctions.random_sleep(event);
-            }
+			let page_num = 1;

-			await page.waitForSelector('#b_content', { timeout: 5000 });
-			if (event.debug === true && event.is_local === true) {
-				await page.screenshot({path: `debug/${keyword}.png`});
-			}
+			do {
+				if (event.verbose === true) {
+					console.log(`${event.search_engine} is scraping keyword: ${keyword} on page ${page_num}`);
+				}
+				if (event.sleep_range) {
+					await sfunctions.random_sleep(event);
+				}
+				await page.waitForSelector('#b_content', { timeout: 5000 });
+				await sfunctions.sleep(500);
+				let html = await page.content();
+				results[keyword][page_num] = parse(html);

-			let html = await page.content();
-			results[keyword] = parse(html);
+				page_num += 1;
+
+				let next_page_link = await page.$('.sb_pagN', {timeout: 1000});
+				if (!next_page_link) {
+					break;
+				}
+				await next_page_link.click();
+				await page.waitForNavigation();
+
+			} while (page_num <= event.num_pages)

 		} catch (e) {
 			console.error(`Problem with scraping ${keyword}: ${e}`);
--- a/src/modules/google.js
+++ b/src/modules/google.js
@ -25,8 +25,8 @@ async function scrape_google_pup(page, event, context, pluggable) {
 	var results = {};

 	for (var i = 0; i < keywords.length; i++) {
-
 		keyword = keywords[i];
+		results[keyword] = {};

 		if (pluggable.before_keyword_scraped) {
 			await pluggable.before_keyword_scraped({
@ -37,54 +37,63 @@ async function scrape_google_pup(page, event, context, pluggable) {
 			});
 		}

-		if (event.verbose === true) {
-			console.log(`${event.search_engine} is scraping keyword: ${keyword}`);
-		}
-
 		try {
+
 			const input = await page.$('input[name="q"]');
-			// await input.click({ clickCount: 3 });
-            // await sfunctions.sleep(50);
-            //await input.type(keyword);
-            await sfunctions.set_input_value(page, `input[name="q"]`, keyword);
-            await sfunctions.sleep(50);
+			await sfunctions.set_input_value(page, `input[name="q"]`, keyword);
+			await sfunctions.sleep(50);
 			await input.focus();
 			await page.keyboard.press("Enter");

-            if (event.sleep_range) {
-                await sfunctions.random_sleep(event);
-            }
+			let page_num = 1;

-			await page.waitForSelector('#center_col', { timeout: STANDARD_TIMEOUT });
-            await sfunctions.sleep(500);
+			do {
+				if (event.verbose === true) {
+					console.log(`${event.search_engine} is scraping keyword: ${keyword} on page ${page_num}`);
+				}
+				if (event.sleep_range) {
+					await sfunctions.random_sleep(event);
+				}
+				await page.waitForSelector('#center_col', {timeout: STANDARD_TIMEOUT});
+				await sfunctions.sleep(500);
+				let html = await page.content();
+				results[keyword][page_num] = parse_google_results(html);
+
+				page_num += 1;
+
+				let next_page_link = await page.$('#pnnext', {timeout: 1000});
+				if (!next_page_link) {
+					break;
+				}
+				await next_page_link.click();
+				await page.waitForNavigation();
+
+			} while (page_num <= event.num_pages)

 		} catch (e) {
 			console.error(`Problem with scraping ${keyword}.`);
 			console.error(e);

-            if (await scraping_detected(page) === true) {
-                console.error('Google detected the scraping. Aborting.');
+			if (await scraping_detected(page) === true) {
+				console.error('Google detected the scraping. Aborting.');

-                if (event.is_local === true) {
-                    await sfunctions.sleep(SOLVE_CAPTCHA_TIME);
-                    console.error('You have 45 seconds to enter the captcha.');
-                    // expect that user filled out necessary captcha
-                } else {
-                    return results;
-                }
-            } else {
-                // some other error, quit scraping process if stuff is broken
-                if (event.is_local === true) {
-                    console.error('You have 30 seconds to fix this.');
-                    await sfunctions.sleep(30000);
-                } else {
-                    return results;
-                }
-            }
+				if (event.is_local === true) {
+					await sfunctions.sleep(SOLVE_CAPTCHA_TIME);
+					console.error('You have 45 seconds to enter the captcha.');
+					// expect that user filled out necessary captcha
+				} else {
+					return results;
+				}
+			} else {
+				// some other error, quit scraping process if stuff is broken
+				if (event.is_local === true) {
+					console.error('You have 30 seconds to fix this.');
+					await sfunctions.sleep(30000);
+				} else {
+					return results;
+				}
+			}
 		}
-
-        let html = await page.content();
-        results[keyword] = parse_google_results(html);
 	}

 	return results;
--- a/src/modules/metadata.js
+++ b/src/modules/metadata.js
@ -12,13 +12,14 @@ async function get_metadata(browser) {
 	  waitLoad: true, 
 	  waitNetworkIdle: true // defaults to false
 	});
-	let json = await page.content();
+	let json = await page.content({
+		timeout: 20000
+	});
 	const $ = cheerio.load(json);
 	metadata.ipinfo = $('pre').text();
 	return metadata;
 }

-
 async function get_http_headers(browser) {
 	let metadata = {};
 	const page = await browser.newPage();
--- a/src/modules/se_scraper.js
+++ b/src/modules/se_scraper.js
@ -0,0 +1,39 @@
+const start_url = {
+    'google': ''
+};
+
+/*
+    Read this shit: https://javascript.info/class-inheritance
+ */
+
+module.exports = class Scraper {
+    constructor(options = {}) {
+        const {
+            searchEngine = 'google',
+            numPages = 1,
+            pluggable = null,
+        } = options;
+
+        this.pluggable = pluggable;
+        this.searchEngine = searchEngine;
+        this.numPages = numPages;
+        this.results = {}
+    }
+
+    async load_search_engine() {
+    }
+
+    async search_keyword() {
+    }
+
+    parse() {
+
+    }
+
+    async next_page() {
+    }
+
+    async detected() {
+
+    }
+};
--- a/src/modules/user_agents.js
+++ b/src/modules/user_agents.js
@ -3,80 +3,79 @@ module.exports = {
 };

 function random_user_agent() {
-	return user_agents[Math.floor(Math.random()*user_agents.length)];
+	let rand = user_agents[Math.floor(Math.random()*user_agents.length)];
 }

 // updated: 29 Jan 2019
 const user_agents = [
-	['Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-		'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0',
-		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-		'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.2 Safari/605.1.15',
-		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-		'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134',
-		'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0',
-		'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0',
-		'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:64.0) Gecko/20100101 Firefox/64.0',
-		'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-		'Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0',
-		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-		'Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0',
-		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.2 Safari/605.1.15',
-		'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
-		'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
-		'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:64.0) Gecko/20100101 Firefox/64.0',
-		'Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0',
-		'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0',
-		'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
-		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-		'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 YaBrowser/18.11.1.805 Yowser/2.5 Safari/537.36',
-		'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
-		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Safari/605.1.15',
-		'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0',
-		'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/18.17763',
-		'Mozilla/5.0 (iPad; CPU OS 12_1_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1',
-		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.1 Safari/605.1.15',
-		'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 OPR/57.0.3098.116',
-		'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 YaBrowser/18.11.1.805 Yowser/2.5 Safari/537.36',
-		'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299',
-		'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15',
-		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-		'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 OPR/57.0.3098.106',
-		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-		'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:64.0) Gecko/20100101 Firefox/64.0',
-		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.2 Safari/605.1.15',
-		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15',
-		'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0',
-		'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.3 Safari/605.1.15',
-		'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0',
-		'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/71.0.3578.98 Chrome/71.0.3578.98 Safari/537.36',
-		'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; Touch; rv:11.0) like Gecko',
-		'Mozilla/5.0 (Windows NT 6.1; rv:64.0) Gecko/20100101 Firefox/64.0',
-		'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 OPR/57.0.3098.116',
-		'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36',
-		'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
-		'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
-		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.3 Safari/605.1.15',
-		'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0',
-		'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0',
-		'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/71.0.3578.80 Chrome/71.0.3578.80 Safari/537.36',
-		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:64.0) Gecko/20100101 Firefox/64.0',
-		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-		'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36',
-		'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:63.0) Gecko/20100101 Firefox/63.0',
-		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:64.0) Gecko/20100101 Firefox/64.0',
-		'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8',
-		'Mozilla/5.0 (Windows NT 10.0; rv:64.0) Gecko/20100101 Firefox/64.0',
-		'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',
-		'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0',
-		'Mozilla/5.0 (X11; CrOS x86_64 11151.59.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.94 Safari/537.36',
-		'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
-	]
+	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
+	'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0',
+	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
+	'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
+	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.2 Safari/605.1.15',
+	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
+	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134',
+	'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0',
+	'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0',
+	'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
+	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:64.0) Gecko/20100101 Firefox/64.0',
+	'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
+	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
+	'Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0',
+	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
+	'Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0',
+	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
+	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.2 Safari/605.1.15',
+	'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
+	'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
+	'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
+	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:64.0) Gecko/20100101 Firefox/64.0',
+	'Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0',
+	'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0',
+	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
+	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
+	'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 YaBrowser/18.11.1.805 Yowser/2.5 Safari/537.36',
+	'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
+	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Safari/605.1.15',
+	'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0',
+	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/18.17763',
+	'Mozilla/5.0 (iPad; CPU OS 12_1_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1',
+	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.1 Safari/605.1.15',
+	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 OPR/57.0.3098.116',
+	'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 YaBrowser/18.11.1.805 Yowser/2.5 Safari/537.36',
+	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299',
+	'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
+	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15',
+	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
+	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 OPR/57.0.3098.106',
+	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
+	'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:64.0) Gecko/20100101 Firefox/64.0',
+	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.2 Safari/605.1.15',
+	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15',
+	'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0',
+	'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
+	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.3 Safari/605.1.15',
+	'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0',
+	'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/71.0.3578.98 Chrome/71.0.3578.98 Safari/537.36',
+	'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; Touch; rv:11.0) like Gecko',
+	'Mozilla/5.0 (Windows NT 6.1; rv:64.0) Gecko/20100101 Firefox/64.0',
+	'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 OPR/57.0.3098.116',
+	'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36',
+	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
+	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
+	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.3 Safari/605.1.15',
+	'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0',
+	'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0',
+	'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/71.0.3578.80 Chrome/71.0.3578.80 Safari/537.36',
+	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:64.0) Gecko/20100101 Firefox/64.0',
+	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
+	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36',
+	'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:63.0) Gecko/20100101 Firefox/63.0',
+	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:64.0) Gecko/20100101 Firefox/64.0',
+	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8',
+	'Mozilla/5.0 (Windows NT 10.0; rv:64.0) Gecko/20100101 Firefox/64.0',
+	'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',
+	'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0',
+	'Mozilla/5.0 (X11; CrOS x86_64 11151.59.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.94 Safari/537.36',
+	'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
 ];
--- a/src/node_scraper.js
+++ b/src/node_scraper.js
@ -22,7 +22,7 @@ function write_results(fname, data) {

 module.exports.handler = async function handler (event, context, callback) {
 	config = event;
-	pluggable = null;
+	pluggable = {};
 	if (config.custom_func) {
 		if (fs.existsSync(config.custom_func)) {
 			try {
@ -43,8 +43,11 @@ module.exports.handler = async function handler (event, context, callback) {
 			console.log(config);
 		}

-        const ADDITIONAL_CHROME_FLAGS = [
-			//'--proxy-server=' + proxy,
+        var ADDITIONAL_CHROME_FLAGS = [
+			'--disable-infobars',
+			'--window-position=0,0',
+			'--ignore-certifcate-errors',
+			'--ignore-certifcate-errors-spki-list',
 			'--no-sandbox',
 			'--disable-setuid-sandbox',
 			'--disable-dev-shm-usage',
@ -70,16 +73,27 @@ module.exports.handler = async function handler (event, context, callback) {
 			)
 		}

+        if (config.proxy) {
+        	// check this out bubbles
+			// https://www.systutorials.com/241062/how-to-set-google-chromes-proxy-settings-in-command-line-on-linux/
+			// [<proxy-scheme>://]<proxy-host>[:<proxy-port>]
+			// "http", "socks", "socks4", "socks5".
+        	ADDITIONAL_CHROME_FLAGS.push(
+				'--proxy-server=' + config.proxy,
+			)
+		}
+
        let launch_args = {
 			args: ADDITIONAL_CHROME_FLAGS,
 			headless: config.headless,
+			ignoreHTTPSErrors: true,
 		};

 		if (config.debug === true) {
 			console.log("Chrome Args: ", launch_args);
 		}

-        if (pluggable) {
+        if (pluggable.start_browser) {
 			launch_args.config = config;
 			browser = await pluggable.start_browser(launch_args);
 		} else {
@ -91,6 +105,30 @@ module.exports.handler = async function handler (event, context, callback) {
 			console.dir(headers);
 		}

+		let metadata = {};
+
+		if (config.write_meta_data === true) {
+			metadata = await meta.get_metadata(browser);
+		}
+
+		// check that our proxy is working by confirming
+		// that ipinfo.io sees the proxy IP address
+		if (config.proxy && config.write_meta_data === true) {
+			console.log(`${metadata.ipinfo} vs ${config.proxy}`);
+
+			try {
+				let ipdata = JSON.parse(metadata.ipinfo);
+				// if the ip returned by ipinfo is not a substring of our proxystring, get the heck outta here
+				if (!config.proxy.includes(ipdata.ip)) {
+					console.error('Proxy not working properly.');
+					await browser.close();
+					return;
+				}
+			} catch (exception) {
+
+			}
+		}
+
 		const page = await browser.newPage();

 		// block some assets to speed up scraping
@ -127,13 +165,8 @@ module.exports.handler = async function handler (event, context, callback) {
 			marketwatch: tickersearch.scrape_marketwatch_finance_pup,
 		}[config.search_engine](page, config, context, pluggable);

-        let metadata = {};

-        if (config.write_meta_data === true) {
-            metadata = await meta.get_metadata(browser);
-        }
-
-		if (pluggable) {
+		if (pluggable.close_browser) {
 			await pluggable.close_browser();
 		} else {
 			await browser.close();
@ -155,7 +188,7 @@ module.exports.handler = async function handler (event, context, callback) {
 			results = zlib.deflateSync(results).toString('base64');
 		}

-		if (pluggable && pluggable.handle_results) {
+		if (pluggable.handle_results) {
 			await pluggable.handle_results({
 				config: config,
 				results: results,
@ -172,7 +205,7 @@ module.exports.handler = async function handler (event, context, callback) {
 				console.log(metadata);
 			}

-			if (pluggable) {
+			if (pluggable.handle_metadata) {
 				await pluggable.handle_metadata({metadata: metadata, config: config});
 			}
 		}