resolved some issues. proxy possible now. scraping for more than one page possible now

2025-06-21 18:11:28 +02:00 · 2019-01-29 22:48:08 +01:00 · 2019-01-29 22:48:08 +01:00 · 9e62f23451
commit 9e62f23451
parent 89441070cd
14 changed files with 764 additions and 340 deletions
--- a/README.md
+++ b/README.md
@ -34,6 +34,46 @@ Scraping is done with a headless chromium browser using the automation library p
 If you need to deploy scraping to the cloud (AWS or Azure), you can contact me on hire@incolumitas.com
 The chromium browser is started with the following flags to prevent
 scraping detection.
 ```js
 var ADDITIONAL_CHROME_FLAGS = [
    '--disable-infobars',
    '--window-position=0,0',
    '--ignore-certifcate-errors',
    '--ignore-certifcate-errors-spki-list',
    '--no-sandbox',
    '--disable-setuid-sandbox',
    '--disable-dev-shm-usage',
    '--disable-accelerated-2d-canvas',
    '--disable-gpu',
    '--window-size=1920x1080',
    '--hide-scrollbars',
 ];
 ```
 Furthermore, to avoid loading unnecessary ressources and to speed up
 scraping a great deal, we instruct chrome to not load images and css:
 ```js
 await page.setRequestInterception(true);
 page.on('request', (req) => {
    let type = req.resourceType();
    const block = ['stylesheet', 'font', 'image', 'media'];
    if (block.includes(type)) {
        req.abort();
    } else {
        req.continue();
    }
 });
 ```
 #### Making puppeteer and headless chrome undetectable
 Consider the following resources:
 * https://intoli.com/blog/making-chrome-headless-undetectable/
 ### Installation and Usage
@ -53,12 +93,12 @@ let config = {
    // the user agent to scrape with
    user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
    // if random_user_agent is set to True, a random user agent is chosen
-    random_user_agent: false,
+    random_user_agent: true,
    // get meta data of scraping in return object
    write_meta_data: false,
    // how long to sleep between requests. a random sleep interval within the range [a,b]
    // is drawn before every request. empty string for no sleeping.
-    sleep_range: '',
+    sleep_range: '[1,2]',
    // which search engine to scrape
    search_engine: 'google',
    // whether debug information should be printed
@ -68,9 +108,11 @@ let config = {
    // this output is informational
    verbose: false,
    // an array of keywords to scrape
-    keywords: ['scrapeulous.com', ],
+    keywords: ['scraping scrapeulous.com'],
    // alternatively you can specify a keyword_file. this overwrites the keywords array
    keyword_file: '',
    // the number of pages to scrape for each keyword
    num_pages: 2,
    // whether to start the browser in headless mode
    headless: true,
    // path to output file, data will be stored in JSON
@ -84,9 +126,13 @@ let config = {
    // must be an absolute path to the module
    //custom_func: resolve('examples/pluggable.js'),
    custom_func: '',
    // use a proxy for all connections
    // example: 'socks5://78.94.172.42:1080'
    // example: 'http://118.174.233.10:48400'
    //proxy: 'socks5://78.94.172.42:1080',
 };
-se_scraper.scrape(config, (err, response) => {
+function callback(err, response) {
    if (err) { console.error(err) }
    /* response object has the following properties:
@ -97,7 +143,9 @@ se_scraper.scrape(config, (err, response) => {
     */
    console.dir(response.results, {depth: null, colors: true});
-});
+}
 se_scraper.scrape(config, callback);
 ```
 Supported options for the `search_engine` config key:
@ -123,199 +171,179 @@ Supported options for the `search_engine` config key:
 'marketwatch'
 ```
-Output for the above script on my laptop:
+Output for the above script on my machine:
 ```text
-Scraper took 4295ms to scrape 2 keywords.
+{ 'scraping scrapeulous.com':
-On average ms/keyword: 2147.5ms/keyword
+   { '1':
-{ 'incolumitas.com scraping':
+      { time: 'Tue, 29 Jan 2019 21:39:22 GMT',
-   { time: 'Mon, 24 Dec 2018 13:07:43 GMT',
+        num_results: 'Ungefähr 145 Ergebnisse (0,18 Sekunden) ',
     num_results: 'Ungefähr 2’020 Ergebnisse (0.18 Sekunden) ',
        no_results: false,
        effective_query: '',
        results:
-      [ { link:
+         [ { link: 'https://scrapeulous.com/',
           'https://incolumitas.com/2018/10/29/youtube-puppeteer-scraping/',
             title:
-           'Coding, Learning and Business Ideas – Tutorial: Youtube scraping ...',
+              'Scrapeuloushttps://scrapeulous.com/Im CacheDiese Seite übersetzen',
             snippet:
-           '29.10.2018 - In this blog post I am going to show you how to scrape YouTube video data using the handy puppeteer library. Puppeteer is a Node library ...',
+              'Scrapeulous.com allows you to scrape various search engines automatically ... or to find hidden links, Scrapeulous.com enables you to scrape a ever increasing ...',
-          visible_link:
+             visible_link: 'https://scrapeulous.com/',
-           'https://incolumitas.com/2018/10/29/youtube-puppeteer-scraping/',
+             date: '',
          date: '29.10.2018 - ',
             rank: 1 },
-        { link: 'https://incolumitas.com/2018/09/05/googlescraper-tutorial/',
+           { link: 'https://scrapeulous.com/about/',
             title:
-           'GoogleScraper Tutorial - How to scrape 1000 keywords with Google',
+              'About - Scrapeuloushttps://scrapeulous.com/about/Im CacheDiese Seite übersetzen',
             snippet:
-           '05.09.2018 - Tutorial that teaches how to use GoogleScraper to scrape 1000 keywords with 10 selenium browsers.',
+              'Scrapeulous.com allows you to scrape various search engines automatically and in large quantities. The business requirement to scrape information from ...',
-          visible_link: 'https://incolumitas.com/2018/09/05/googlescraper-tutorial/',
+             visible_link: 'https://scrapeulous.com/about/',
-          date: '05.09.2018 - ',
+             date: '',
             rank: 2 },
-        { link: 'https://incolumitas.com/tag/scraping.html',
+           { link: 'https://scrapeulous.com/howto/',
-          title: 'Coding, Learning and Business Ideas – Tag Scraping',
+             title:
              'Howto - Scrapeuloushttps://scrapeulous.com/howto/Im CacheDiese Seite übersetzen',
             snippet:
              'We offer scraping large amounts of keywords for the Google Search Engine. Large means any number of keywords between 40 and 50000. Additionally, we ...',
             visible_link: 'https://scrapeulous.com/howto/',
             date: '',
             rank: 3 },
           { link: 'https://github.com/NikolaiT/se-scraper',
             title:
              'GitHub - NikolaiT/se-scraper: Javascript scraping module based on ...https://github.com/NikolaiT/se-scraperIm CacheDiese Seite übersetzen',
             snippet:
              '24.12.2018 - Javascript scraping module based on puppeteer for many different search ... for many different search engines... https://scrapeulous.com/.',
             visible_link: 'https://github.com/NikolaiT/se-scraper',
             date: '24.12.2018 - ',
             rank: 4 },
           { link:
              'https://github.com/NikolaiT/GoogleScraper/blob/master/README.md',
             title:
              'GoogleScraper/README.md at master · NikolaiT/GoogleScraper ...https://github.com/NikolaiT/GoogleScraper/blob/.../README.mdIm CacheÄhnliche SeitenDiese Seite übersetzen',
             snippet:
              'GoogleScraper - Scraping search engines professionally. Scrapeulous.com - Scraping Service. GoogleScraper is a open source tool and will remain a open ...',
             visible_link:
              'https://github.com/NikolaiT/GoogleScraper/blob/.../README.md',
             date: '',
             rank: 5 },
           { link: 'https://googlescraper.readthedocs.io/',
             title:
              'Welcome to GoogleScraper\'s documentation! — GoogleScraper ...https://googlescraper.readthedocs.io/Im CacheDiese Seite übersetzen',
             snippet:
              'Welcome to GoogleScraper\'s documentation!¶. Contents: GoogleScraper - Scraping search engines professionally · Scrapeulous.com - Scraping Service ...',
             visible_link: 'https://googlescraper.readthedocs.io/',
             date: '',
             rank: 6 },
           { link: 'https://incolumitas.com/pages/scrapeulous/',
             title:
              'Coding, Learning and Business Ideas – Scrapeulous.com - Incolumitashttps://incolumitas.com/pages/scrapeulous/Im CacheDiese Seite übersetzen',
             snippet:
              'A scraping service for scientists, marketing professionals, analysts or SEO folk. In autumn 2018, I created a scraping service called scrapeulous.com. There you ...',
             visible_link: 'https://incolumitas.com/pages/scrapeulous/',
             date: '',
             rank: 7 },
           { link: 'https://incolumitas.com/',
             title:
              'Coding, Learning and Business Ideashttps://incolumitas.com/Im CacheDiese Seite übersetzen',
             snippet:
              'Scraping Amazon Reviews using Headless Chrome Browser and Python3. Posted on Mi ... GoogleScraper Tutorial - How to scrape 1000 keywords with Google.',
-          visible_link: 'https://incolumitas.com/tag/scraping.html',
+             visible_link: 'https://incolumitas.com/',
             date: '',
-          rank: 3 },
+             rank: 8 },
-        { link: 'https://incolumitas.com/category/scraping.html',
+           { link: 'https://en.wikipedia.org/wiki/Search_engine_scraping',
          title: 'Coding, Learning and Business Ideas – Category Scraping',
          snippet:
           'Nikolai Tschacher\'s ideas and projects around IT security and computer science.',
          visible_link: 'https://incolumitas.com/category/scraping.html',
          date: '',
          rank: 4 },
        { link:
           'https://github.com/NikolaiT/incolumitas/blob/master/content/Meta/scraping-and-extracting-links-from-any-major-search-engine-like-google-yandex-baidu-bing-and-duckduckgo.md',
             title:
-           'incolumitas/scraping-and-extracting-links-from-any-major-search ...',
+              'Search engine scraping - Wikipediahttps://en.wikipedia.org/wiki/Search_engine_scrapingIm CacheDiese Seite übersetzen',
             snippet:
-           'Title: Scraping and Extracting Links from any major Search Engine like Google, Yandex, Baidu, Bing and Duckduckgo Date: 2014-11-12 00:47 Author: Nikolai ...',
+              'Search engine scraping is the process of harvesting URLs, descriptions, or other information from search engines such as Google, Bing or Yahoo. This is a ...',
-          visible_link:
+             visible_link: 'https://en.wikipedia.org/wiki/Search_engine_scraping',
           'https://github.com/.../incolumitas/.../scraping-and-extracting-links...',
             date: '',
          rank: 5 },
        { link:
           'https://stackoverflow.com/questions/16955325/scraping-google-results-with-python',
          title: 'Scraping Google Results with Python - Stack Overflow',
          snippet:
           'I found this. incolumitas.com/2013/01/06/… But the author claims it is not ported to 2.7 yet. – user2351394 Jun 6 \'13 at 6:59 ...',
          visible_link:
           'https://stackoverflow.com/.../scraping-google-results-with-python',
          date: '',
          rank: 6 },
        { link: 'https://pypi.org/project/GoogleScraper/0.1.18/',
          title: 'GoogleScraper · PyPI',
          snippet:
           '[5]: http://incolumitas.com/2014/11/12/scraping-and-extracting-links-from-any-major-search-engine-like-google-yandex-baidu-bing-and-duckduckgo/ ...',
          visible_link: 'https://pypi.org/project/GoogleScraper/0.1.18/',
          date: '',
          rank: 7 },
        { link:
           'https://www.reddit.com/r/Python/comments/2m0vyu/scraping_links_on_google_yandex_bing_duckduckgo/',
          title:
           'Scraping links on Google, Yandex, Bing, Duckduckgo, Baidu and ...',
          snippet:
           '12.11.2014 - Scraping links on Google, Yandex, Bing, Duckduckgo, Baidu and other search engines with Python ... submitted 4 years ago by incolumitas.',
          visible_link:
           'https://www.reddit.com/.../scraping_links_on_google_yandex_bi...',
          date: '12.11.2014 - ',
             rank: 9 },
-        { link: 'https://twitter.com/incolumitas_?lang=de',
+           { link:
-          title: 'Nikolai Tschacher (@incolumitas_) | Twitter',
+              'https://readthedocs.org/projects/googlescraper/downloads/pdf/latest/',
             title:
              'GoogleScraper Documentation - Read the Docshttps://readthedocs.org/projects/googlescraper/downloads/.../latest...Im CacheDiese Seite übersetzen',
             snippet:
-           'Embed Tweet. How to use GoogleScraper to scrape images and download them ... Learn how to scrape millions of url from yandex and google or bing with: ...',
+              '23.12.2018 - Contents: 1 GoogleScraper - Scraping search engines professionally. 1. 1.1 ... For this reason, I created the web service scrapeulous.com.',
-          visible_link: 'https://twitter.com/incolumitas_?lang=de',
+             visible_link:
-          date: '',
+              'https://readthedocs.org/projects/googlescraper/downloads/.../latest...',
             date: '23.12.2018 - ',
             rank: 10 } ] },
-  'best scraping framework':
+     '2':
-   { time: 'Mon, 24 Dec 2018 13:07:44 GMT',
+      { time: 'Tue, 29 Jan 2019 21:39:24 GMT',
-     num_results: 'Ungefähr 2’820’000 Ergebnisse (0.36 Sekunden) ',
+        num_results: 'Seite 2 von ungefähr 145 Ergebnissen (0,20 Sekunden) ',
        no_results: false,
        effective_query: '',
        results:
-      [ { link:
+         [ { link: 'https://pypi.org/project/CountryGoogleScraper/',
-           'http://www.aioptify.com/top-web-scraping-frameworks-and-librares.php',
+             title:
-          title: 'Top Web Scraping Frameworks and Libraries - AI Optify',
+              'CountryGoogleScraper · PyPIhttps://pypi.org/project/CountryGoogleScraper/Im CacheDiese Seite übersetzen',
-          snippet: '',
+             snippet:
-          visible_link:
+              'A module to scrape and extract links, titles and descriptions from various search ... Look [here to get an idea how to use asynchronous mode](http://scrapeulous.',
-           'www.aioptify.com/top-web-scraping-frameworks-and-librares.php',
+             visible_link: 'https://pypi.org/project/CountryGoogleScraper/',
             date: '',
             rank: 1 },
-        { link:
+           { link: 'https://www.youtube.com/watch?v=a6xn6rc9GbI',
           'http://www.aioptify.com/top-web-scraping-frameworks-and-librares.php',
          title: 'Top Web Scraping Frameworks and Libraries - AI Optify',
          snippet: '',
          visible_link:
           'www.aioptify.com/top-web-scraping-frameworks-and-librares.php',
          date: '',
          rank: 2 },
        { link:
           'https://www.scrapehero.com/open-source-web-scraping-frameworks-and-tools/',
             title:
-           'Best Open Source Web Scraping Frameworks and Tools - ScrapeHero',
+              'scrapeulous intro - YouTubehttps://www.youtube.com/watch?v=a6xn6rc9GbIDiese Seite übersetzen',
             snippet:
-           '05.06.2018 - List of Open Source Web Scraping Frameworks. Scrapy. MechanicalSoup. PySpider. Portia. Apify SDK. Nodecrawler. Selenium WebDriver. Puppeteer.',
+              'scrapeulous intro. Scrapeulous Scrapeulous. Loading... Unsubscribe from ... on Dec 16, 2018. Introduction ...',
-          visible_link:
+             visible_link: 'https://www.youtube.com/watch?v=a6xn6rc9GbI',
-           'https://www.scrapehero.com/open-source-web-scraping-framewo...',
+             date: '',
          date: '05.06.2018 - ',
             rank: 3 },
           { link:
-           'https://medium.com/datadriveninvestor/best-data-scraping-tools-for-2018-top-10-reviews-558cc5a4992f',
+              'https://www.reddit.com/r/Python/comments/2tii3r/scraping_260_search_queries_in_bing_in_a_matter/',
             title:
-           'Best Data Scraping Tools for 2018 (Top 10 Reviews) – Data Driven ...',
+              'Scraping 260 search queries in Bing in a matter of seconds using ...https://www.reddit.com/.../scraping_260_search_queries_in_bing...Im CacheDiese Seite übersetzen',
             snippet:
-           '05.03.2018 - Pros: Octoparse is the best free data scraping tool I\'ve met. ... your Scrapy (a open-source data extraction framework) web spider\'s activities.',
+              '24.01.2015 - Scraping 260 search queries in Bing in a matter of seconds using asyncio and aiohttp. (scrapeulous.com). submitted 3 years ago by ...',
             visible_link:
-           'https://medium.com/.../best-data-scraping-tools-for-2018-top-10-...',
+              'https://www.reddit.com/.../scraping_260_search_queries_in_bing...',
-          date: '05.03.2018 - ',
+             date: '24.01.2015 - ',
             rank: 4 },
-        { link:
+           { link: 'https://twitter.com/incolumitas_?lang=de',
-           'https://www.quora.com/What-is-the-best-web-scraping-open-source-tool',
+             title:
-          title: 'What is the best web scraping open source tool? - Quora',
+              'Nikolai Tschacher (@incolumitas_) | Twitterhttps://twitter.com/incolumitas_?lang=deIm CacheÄhnliche SeitenDiese Seite übersetzen',
             snippet:
-           '15.06.2015 - My personal favourite is Python Scrapy and it is an excellent framework for building a web data scraper. Why Scrapy? 1) It is an open source framework and cost ...',
+              'Learn how to scrape millions of url from yandex and google or bing with: http://scrapeulous.com/googlescraper-market-analysis.html … 0 replies 0 retweets 0 ...',
-          visible_link:
+             visible_link: 'https://twitter.com/incolumitas_?lang=de',
-           'https://www.quora.com/What-is-the-best-web-scraping-open-sour...',
+             date: '',
          date: '15.06.2015 - ',
             rank: 5 },
           { link:
-           'http://www.aioptify.com/top-web-scraping-frameworks-and-librares.php',
+              'http://blog.shodan.io/hostility-in-the-python-package-index/',
          title: 'Top Web Scraping Frameworks and Libraries - AI Optify',
          snippet:
           '21.05.2018 - Top Web Scraping Frameworks and Libraries. Requests. Scrapy. Beautiful Soup. Selenium with Python. lxml. Webscraping with Selenium - part 1. Extracting data from websites with Scrapy. Scrapinghub.',
          visible_link:
           'www.aioptify.com/top-web-scraping-frameworks-and-librares.php',
          date: '21.05.2018 - ',
          rank: 6 },
        { link: 'https://scrapy.org/',
             title:
-           'Scrapy | A Fast and Powerful Scraping and Web Crawling Framework',
+              'Hostility in the Cheese Shop - Shodan Blogblog.shodan.io/hostility-in-the-python-package-index/Im CacheDiese Seite übersetzen',
             snippet:
-           'An open source and collaborative framework for extracting the data you need from ... Spider): name = \'blogspider\' start_urls = [\'https://blog.scrapinghub.com\'] def ...',
+              '22.02.2015 - https://zzz.scrapeulous.com/r? According to the author of the website, these hostile packages are used as honeypots. Honeypots are usually ...',
-          visible_link: 'https://scrapy.org/',
+             visible_link: 'blog.shodan.io/hostility-in-the-python-package-index/',
             date: '22.02.2015 - ',
             rank: 6 },
           { link: 'https://libraries.io/github/NikolaiT/GoogleScraper',
             title:
              'NikolaiT/GoogleScraper - Libraries.iohttps://libraries.io/github/NikolaiT/GoogleScraperIm CacheDiese Seite übersetzen',
             snippet:
              'A Python module to scrape several search engines (like Google, Yandex, Bing, ... https://scrapeulous.com/ ... You can install GoogleScraper comfortably with pip:',
             visible_link: 'https://libraries.io/github/NikolaiT/GoogleScraper',
             date: '',
             rank: 7 },
-        { link:
+           { link: 'https://pydigger.com/pypi/CountryGoogleScraper',
-           'https://www.scraperapi.com/blog/the-10-best-web-scraping-tools',
+             title:
-          title: 'The 10 Best Web Scraping Tools of 2018 - Scraper API',
+              'CountryGoogleScraper - PyDiggerhttps://pydigger.com/pypi/CountryGoogleScraperDiese Seite übersetzen',
             snippet:
-           '19.07.2018 - The 10 Best Web Scraping Tools of 2018. ParseHub. Scrapy. Diffbot. Cheerio. Website: https://cheerio.js.org. Beautiful Soup. Website: https://www.crummy.com/software/BeautifulSoup/ Puppeteer. Website: https://github.com/GoogleChrome/puppeteer. Content Grabber. Website: http://www.contentgrabber.com/ Mozenda. Website: ...',
+              '19.10.2016 - Look [here to get an idea how to use asynchronous mode](http://scrapeulous.com/googlescraper-260-keywords-in-a-second.html). ### Table ...',
-          visible_link:
+             visible_link: 'https://pydigger.com/pypi/CountryGoogleScraper',
-           'https://www.scraperapi.com/blog/the-10-best-web-scraping-tools',
+             date: '19.10.2016 - ',
          date: '19.07.2018 - ',
             rank: 8 },
-        { link: 'https://elitedatascience.com/python-web-scraping-libraries',
+           { link: 'https://hub.docker.com/r/cimenx/data-mining-penandtest/',
          title: '5 Tasty Python Web Scraping Libraries - EliteDataScience',
          snippet:
           '03.02.2017 - We\'ve decided to feature the 5 Python libraries for web scraping that ... The good news is that you can swap out its parser with a faster one if ... Scrapy is technically not even a library… it\'s a complete web scraping framework.',
          visible_link: 'https://elitedatascience.com/python-web-scraping-libraries',
          date: '03.02.2017 - ',
          rank: 9 },
        { link:
           'https://blog.michaelyin.info/web-scraping-framework-review-scrapy-vs-selenium/',
             title:
-           'Web Scraping Framework Review: Scrapy VS Selenium | MichaelYin ...',
+              'cimenx/data-mining-penandtest - Docker Hubhttps://hub.docker.com/r/cimenx/data-mining-penandtest/Im CacheDiese Seite übersetzen',
             snippet:
-           '01.10.2018 - In this Scrapy tutorial, I will cover the features of Scrapy and Selenium, and help you decide which one is better for your projects.',
+              'Container. OverviewTagsDockerfileBuilds · http://scrapeulous.com/googlescraper-260-keywords-in-a-second.html. Docker Pull Command. Owner. profile ...',
-          visible_link:
+             visible_link: 'https://hub.docker.com/r/cimenx/data-mining-penandtest/',
           'https://blog.michaelyin.info/web-scraping-framework-review-scr...',
          date: '01.10.2018 - ',
          rank: 10 },
        { link: 'https://github.com/lorien/awesome-web-scraping',
          title:
           'GitHub - lorien/awesome-web-scraping: List of libraries, tools and APIs ...',
          snippet:
           'List of libraries, tools and APIs for web scraping and data processing. ... golang.md · add dataflow kit framework, 2 months ago ... Make this list better!',
          visible_link: 'https://github.com/lorien/awesome-web-scraping',
             date: '',
-          rank: 11 },
+             rank: 9 },
-        { link: 'https://www.import.io/post/best-web-scraping-tools-2018/',
+           { link: 'https://www.revolvy.com/page/Search-engine-scraping',
-          title: 'Best Web Scraping Software Tools 2018 | Import.io',
+             title:
              'Search engine scraping | Revolvyhttps://www.revolvy.com/page/Search-engine-scrapingIm CacheDiese Seite übersetzen',
             snippet:
-           '07.08.2018 - List of Best Web Scraping SoftwareThere are hundreds of Web ... it is a fast high-level screen scraping and web crawling framework, used to ...',
+              'Search engine scraping is the process of harvesting URLs, descriptions, or other information from search engines such as Google, Bing or Yahoo. This is a ...',
-          visible_link: 'https://www.import.io/post/best-web-scraping-tools-2018/',
+             visible_link: 'https://www.revolvy.com/page/Search-engine-scraping',
-          date: '07.08.2018 - ',
+             date: '',
-          rank: 12 } ] } }
+             rank: 10 } ] } } }
 ```
--- a/TODO.txt
+++ b/TODO.txt
@ -14,6 +14,17 @@
        https://www.scrapehero.com/how-to-increase-web-scraping-speed-using-puppeteer/
        https://www.scrapehero.com/how-to-build-a-web-scraper-using-puppeteer-and-node-js/
 29.1.2019
    - implement proxy support functionality
        - implement proxy check
    - implement scraping more than 1 page
        - do it for google
        - and bing
    - implement duckduckgo scraping
 TODO:
    - think about implementing ticker search for: https://quotes.wsj.com/MSFT?mod=searchresults_companyquotes
    - add proxy support
@ -24,3 +35,27 @@ TODO:
    - think whether it makes sense to introduce a generic scraping class?
    - is scraping abstractable or is every scraper too unique?
    - dont make the same mistakes as with GoogleScraper
 TODO:
    okay its fucking time to make a generic scraping class like in GoogleScraper
    i feel like history repeats
    class Scraper
        constructor(options = {}) {
        }
        async load_search_engine() {}
        async search_keyword() {}
        async new_page() {}
        async detected() {}
    then each search engine derives from this generic class
    some search engines do not seed such a abstract class, because they are too complex
--- a/data.json
+++ b/data.json
--- a/index.js
+++ b/index.js
@ -35,6 +35,10 @@ exports.scrape = async function(config, callback) {
 		// get_browser, handle_metadata, close_browser
 		//custom_func: resolve('examples/pluggable.js'),
 		custom_func: '',
 		// use a proxy for all connections
 		// example: 'socks5://78.94.172.42:1080'
 		// example: 'http://118.174.233.10:48400'
 		proxy: '',
 	};
 	// overwrite default config
--- a/package-lock.json
+++ b/package-lock.json
@ -1,9 +1,22 @@
 {
  "name": "se-scraper",
-  "version": "1.1.4",
+  "version": "1.1.7",
  "lockfileVersion": 1,
  "requires": true,
  "dependencies": {
    "@sindresorhus/is": {
      "version": "0.14.0",
      "resolved": "https://registry.npmjs.org/@sindresorhus/is/-/is-0.14.0.tgz",
      "integrity": "sha512-9NET910DNaIPngYnLLPeg+Ogzqsi9uM4mSboU5y6p8S5DzMTVEsJZrawi+BoDNUVBa2DhJqQYUFvMDfgU062LQ=="
    },
    "@szmarczak/http-timer": {
      "version": "1.1.2",
      "resolved": "https://registry.npmjs.org/@szmarczak/http-timer/-/http-timer-1.1.2.tgz",
      "integrity": "sha512-XIB2XbzHTN6ieIjfIMV9hlVcfPU26s2vafYWQcZHWXHOxiaRZYEDKEwdl129Zyg50+foYV2jCgtrqSA6qNuNSA==",
      "requires": {
        "defer-to-connect": "^1.0.1"
      }
    },
    "@types/node": {
      "version": "10.12.18",
      "resolved": "https://registry.npmjs.org/@types/node/-/node-10.12.18.tgz",
@ -51,6 +64,20 @@
      "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.1.tgz",
      "integrity": "sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A=="
    },
    "cacheable-request": {
      "version": "6.0.0",
      "resolved": "https://registry.npmjs.org/cacheable-request/-/cacheable-request-6.0.0.tgz",
      "integrity": "sha512-2N7AmszH/WPPpl5Z3XMw1HAP+8d+xugnKQAeKvxFZ/04dbT/CAznqwbl+7eSr3HkwdepNwtb2yx3CAMQWvG01Q==",
      "requires": {
        "clone-response": "^1.0.2",
        "get-stream": "^4.0.0",
        "http-cache-semantics": "^4.0.0",
        "keyv": "^3.0.0",
        "lowercase-keys": "^1.0.1",
        "normalize-url": "^3.1.0",
        "responselike": "^1.0.2"
      }
    },
    "chai": {
      "version": "4.2.0",
      "resolved": "https://registry.npmjs.org/chai/-/chai-4.2.0.tgz",
@ -82,6 +109,14 @@
        "parse5": "^3.0.1"
      }
    },
    "clone-response": {
      "version": "1.0.2",
      "resolved": "https://registry.npmjs.org/clone-response/-/clone-response-1.0.2.tgz",
      "integrity": "sha1-0dyXOSAxTfZ/vrlCI7TuNQI56Ws=",
      "requires": {
        "mimic-response": "^1.0.0"
      }
    },
    "concat-map": {
      "version": "0.0.1",
      "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
@ -151,6 +186,14 @@
        "ms": "^2.1.1"
      }
    },
    "decompress-response": {
      "version": "3.3.0",
      "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-3.3.0.tgz",
      "integrity": "sha1-gKTdMjdIOEv6JICDYirt7Jgq3/M=",
      "requires": {
        "mimic-response": "^1.0.0"
      }
    },
    "deep-eql": {
      "version": "3.0.1",
      "resolved": "https://registry.npmjs.org/deep-eql/-/deep-eql-3.0.1.tgz",
@ -159,6 +202,11 @@
        "type-detect": "^4.0.0"
      }
    },
    "defer-to-connect": {
      "version": "1.0.2",
      "resolved": "https://registry.npmjs.org/defer-to-connect/-/defer-to-connect-1.0.2.tgz",
      "integrity": "sha512-k09hcQcTDY+cwgiwa6PYKLm3jlagNzQ+RSvhjzESOGOx+MNOuXkxTfEvPrO1IOQ81tArCFYQgi631clB70RpQw=="
    },
    "dom-serializer": {
      "version": "0.1.0",
      "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-0.1.0.tgz",
@ -197,6 +245,19 @@
        "domelementtype": "1"
      }
    },
    "duplexer3": {
      "version": "0.1.4",
      "resolved": "https://registry.npmjs.org/duplexer3/-/duplexer3-0.1.4.tgz",
      "integrity": "sha1-7gHdHKwO08vH/b6jfcCo8c4ALOI="
    },
    "end-of-stream": {
      "version": "1.4.1",
      "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.1.tgz",
      "integrity": "sha512-1MkrZNvWTKCaigbn+W15elq2BB/L22nqrSY5DKlo3X6+vclJm8Bb5djXJBmEX6fS3+zCh/F4VBK5Z2KxJt4s2Q==",
      "requires": {
        "once": "^1.4.0"
      }
    },
    "entities": {
      "version": "1.1.2",
      "resolved": "https://registry.npmjs.org/entities/-/entities-1.1.2.tgz",
@ -259,6 +320,14 @@
      "resolved": "https://registry.npmjs.org/get-func-name/-/get-func-name-2.0.0.tgz",
      "integrity": "sha1-6td0q+5y4gQJQzoGY2YCPdaIekE="
    },
    "get-stream": {
      "version": "4.1.0",
      "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-4.1.0.tgz",
      "integrity": "sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w==",
      "requires": {
        "pump": "^3.0.0"
      }
    },
    "glob": {
      "version": "7.1.3",
      "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.3.tgz",
@ -272,6 +341,24 @@
        "path-is-absolute": "^1.0.0"
      }
    },
    "got": {
      "version": "9.6.0",
      "resolved": "https://registry.npmjs.org/got/-/got-9.6.0.tgz",
      "integrity": "sha512-R7eWptXuGYxwijs0eV+v3o6+XH1IqVK8dJOEecQfTmkncw9AV4dcw/Dhxi8MdlqPthxxpZyizMzyg8RTmEsG+Q==",
      "requires": {
        "@sindresorhus/is": "^0.14.0",
        "@szmarczak/http-timer": "^1.1.2",
        "cacheable-request": "^6.0.0",
        "decompress-response": "^3.3.0",
        "duplexer3": "^0.1.4",
        "get-stream": "^4.1.0",
        "lowercase-keys": "^1.0.1",
        "mimic-response": "^1.0.1",
        "p-cancelable": "^1.0.0",
        "to-readable-stream": "^1.0.0",
        "url-parse-lax": "^3.0.0"
      }
    },
    "htmlparser2": {
      "version": "3.10.0",
      "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-3.10.0.tgz",
@ -285,6 +372,11 @@
        "readable-stream": "^3.0.6"
      }
    },
    "http-cache-semantics": {
      "version": "4.0.2",
      "resolved": "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.0.2.tgz",
      "integrity": "sha512-laeSTWIkuFa6lUgZAt+ic9RwOSEwbi9VDQNcCvMFO4sZiDc2Ha8DaZVCJnfpLLQCcS8rvCnIWYmz0POLxt7Dew=="
    },
    "https-proxy-agent": {
      "version": "2.2.1",
      "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-2.2.1.tgz",
@ -323,16 +415,39 @@
      "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
      "integrity": "sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE="
    },
    "json-buffer": {
      "version": "3.0.0",
      "resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.0.tgz",
      "integrity": "sha1-Wx85evx11ne96Lz8Dkfh+aPZqJg="
    },
    "keyv": {
      "version": "3.1.0",
      "resolved": "https://registry.npmjs.org/keyv/-/keyv-3.1.0.tgz",
      "integrity": "sha512-9ykJ/46SN/9KPM/sichzQ7OvXyGDYKGTaDlKMGCAlg2UK8KRy4jb0d8sFc+0Tt0YYnThq8X2RZgCg74RPxgcVA==",
      "requires": {
        "json-buffer": "3.0.0"
      }
    },
    "lodash": {
      "version": "4.17.11",
      "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.11.tgz",
      "integrity": "sha512-cQKh8igo5QUhZ7lg38DYWAxMvjSAKG0A8wGSVimP07SIUEK2UO+arSRKbRZWtelMtN5V0Hkwh5ryOto/SshYIg=="
    },
    "lowercase-keys": {
      "version": "1.0.1",
      "resolved": "https://registry.npmjs.org/lowercase-keys/-/lowercase-keys-1.0.1.tgz",
      "integrity": "sha512-G2Lj61tXDnVFFOi8VZds+SoQjtQC3dgokKdDG2mTm1tx4m50NUHBOZSBwQQHyy0V12A0JTG4icfZQH+xPyh8VA=="
    },
    "mime": {
      "version": "2.4.0",
      "resolved": "https://registry.npmjs.org/mime/-/mime-2.4.0.tgz",
      "integrity": "sha512-ikBcWwyqXQSHKtciCcctu9YfPbFYZ4+gbHEmE0Q8jzcTYQg5dHCr3g2wwAZjPoJfQVXZq6KXAjpXOTf5/cjT7w=="
    },
    "mimic-response": {
      "version": "1.0.1",
      "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-1.0.1.tgz",
      "integrity": "sha512-j5EctnkH7amfV/q5Hgmoal1g2QHFJRraOtmx0JpIqkxhBhI/lJSl1nMpQ45hVarwNETOoWEimndZ4QK0RHxuxQ=="
    },
    "minimatch": {
      "version": "3.0.4",
      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz",
@ -359,6 +474,11 @@
      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz",
      "integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg=="
    },
    "normalize-url": {
      "version": "3.3.0",
      "resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-3.3.0.tgz",
      "integrity": "sha512-U+JJi7duF1o+u2pynbp2zXDW2/PADgC30f0GsHZtRh+HOcXHnw137TrNlyxxRvWW5fjKd3bcLHPxofWuCjaeZg=="
    },
    "nth-check": {
      "version": "1.0.2",
      "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-1.0.2.tgz",
@ -375,6 +495,11 @@
        "wrappy": "1"
      }
    },
    "p-cancelable": {
      "version": "1.0.0",
      "resolved": "https://registry.npmjs.org/p-cancelable/-/p-cancelable-1.0.0.tgz",
      "integrity": "sha512-USgPoaC6tkTGlS831CxsVdmZmyb8tR1D+hStI84MyckLOzfJlYQUweomrwE3D8T7u5u5GVuW064LT501wHTYYA=="
    },
    "parse5": {
      "version": "3.0.3",
      "resolved": "https://registry.npmjs.org/parse5/-/parse5-3.0.3.tgz",
@ -398,6 +523,11 @@
      "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz",
      "integrity": "sha1-elfrVQpng/kRUzH89GY9XI4AelA="
    },
    "prepend-http": {
      "version": "2.0.0",
      "resolved": "https://registry.npmjs.org/prepend-http/-/prepend-http-2.0.0.tgz",
      "integrity": "sha1-6SQ0v6XqjBn0HN/UAddBo8gZ2Jc="
    },
    "process-nextick-args": {
      "version": "2.0.0",
      "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.0.tgz",
@ -413,6 +543,15 @@
      "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.0.0.tgz",
      "integrity": "sha1-M8UDmPcOp+uW0h97gXYwpVeRx+4="
    },
    "pump": {
      "version": "3.0.0",
      "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz",
      "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==",
      "requires": {
        "end-of-stream": "^1.1.0",
        "once": "^1.3.1"
      }
    },
    "puppeteer": {
      "version": "1.11.0",
      "resolved": "https://registry.npmjs.org/puppeteer/-/puppeteer-1.11.0.tgz",
@ -438,6 +577,14 @@
        "util-deprecate": "^1.0.1"
      }
    },
    "responselike": {
      "version": "1.0.2",
      "resolved": "https://registry.npmjs.org/responselike/-/responselike-1.0.2.tgz",
      "integrity": "sha1-kYcg7ztjHFZCvgaPFa3lpG9Loec=",
      "requires": {
        "lowercase-keys": "^1.0.0"
      }
    },
    "rimraf": {
      "version": "2.6.2",
      "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-2.6.2.tgz",
@ -459,6 +606,11 @@
        "safe-buffer": "~5.1.0"
      }
    },
    "to-readable-stream": {
      "version": "1.0.0",
      "resolved": "https://registry.npmjs.org/to-readable-stream/-/to-readable-stream-1.0.0.tgz",
      "integrity": "sha512-Iq25XBt6zD5npPhlLVXGFN3/gyR2/qODcKNNyTMd4vbm39HUaOiAM4PMq0eMVC/Tkxz+Zjdsc55g9yyz+Yq00Q=="
    },
    "type-detect": {
      "version": "4.0.8",
      "resolved": "https://registry.npmjs.org/type-detect/-/type-detect-4.0.8.tgz",
@ -469,6 +621,14 @@
      "resolved": "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz",
      "integrity": "sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c="
    },
    "url-parse-lax": {
      "version": "3.0.0",
      "resolved": "https://registry.npmjs.org/url-parse-lax/-/url-parse-lax-3.0.0.tgz",
      "integrity": "sha1-FrXK/Afb42dsGxmZF3gj1lA6yww=",
      "requires": {
        "prepend-http": "^2.0.0"
      }
    },
    "util-deprecate": {
      "version": "1.0.2",
      "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
--- a/package.json
+++ b/package.json
@ -1,6 +1,6 @@
 {
  "name": "se-scraper",
-  "version": "1.1.7",
+  "version": "1.1.8",
  "description": "A simple module which uses puppeteer to scrape several search engines.",
  "homepage": "https://scrapeulous.com/",
  "main": "index.js",
@ -22,6 +22,7 @@
  "dependencies": {
    "chai": "^4.2.0",
    "cheerio": "^1.0.0-rc.2",
    "got": "^9.6.0",
    "puppeteer": "^1.9.0"
  }
 }
--- a/run.js
+++ b/run.js
@ -10,7 +10,7 @@ let config = {
    write_meta_data: false,
    // how long to sleep between requests. a random sleep interval within the range [a,b]
    // is drawn before every request. empty string for no sleeping.
-    sleep_range: '[1,1]',
+    sleep_range: '[1,2]',
    // which search engine to scrape
    search_engine: 'google',
    // whether debug information should be printed
@ -20,9 +20,11 @@ let config = {
    // this output is informational
    verbose: false,
    // an array of keywords to scrape
-    keywords: ['trump', 'chief'],
+    keywords: ['scraping scrapeulous.com'],
    // alternatively you can specify a keyword_file. this overwrites the keywords array
    keyword_file: '',
    // the number of pages to scrape for each keyword
    num_pages: 1,
    // whether to start the browser in headless mode
    headless: true,
    // path to output file, data will be stored in JSON
@ -35,7 +37,11 @@ let config = {
    // get_browser, handle_metadata, close_browser
    // must be an absolute path to the module
    //custom_func: resolve('examples/pluggable.js'),
-    custom_func: resolve('examples/pluggable.js'),
+    custom_func: '',
    // use a proxy for all connections
    // example: 'socks5://78.94.172.42:1080'
    // example: 'http://118.174.233.10:48400'
    //proxy: 'socks5://78.94.172.42:1080',
 };
 function callback(err, response) {
--- a/src/captcha_solver.js
+++ b/src/captcha_solver.js
@ -0,0 +1,96 @@
 /*
    There are essentially two strategies to handle a search engine showing you a captcha:
    1. Solve the captcha
        https://github.com/ecthros/uncaptcha2
        or use a captcha solving service such as https://anti-captcha.com/mainpage
    2. Switch your IP address with rotating proxies
 */
 /**
 * @name download recaptcha2 audio captcha
 *
 * There are several issues:
 *
 * Google sees that we are using an automated browser.
 *
 * In the worst case we have to completely control the browser ourselves without puppeteer.
 *
 * https://github.com/ecthros/uncaptcha2
 *
 * See here:
 *
 * https://gist.github.com/tegansnyder/c3aeae4d57768c58247ae6c4e5acd3d1
 *
 * https://github.com/GoogleChrome/puppeteer/issues/3039
 *
 * https://intoli.com/blog/making-chrome-headless-undetectable/
 *
 * @desc  Go to the https://www.google.com/recaptcha/api2/demo demo page and download the captcha
 */
 const puppeteer = require('puppeteer');
 const fs = require('fs');
 const got = require('got');
 try {
    (async () => {
        const browser = await puppeteer.launch({
            args:  [
                '--proxy-server=socks5://78.94.172.42:1080',
                '--no-sandbox',
                '--disable-setuid-sandbox',
                '--disable-dev-shm-usage',
                '--disable-accelerated-2d-canvas',
                '--disable-gpu',
                '--window-size=1920x1080',
                '--hide-scrollbars',
                '--user-agent="Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0"',
            ],
            headless: false,
        });
        const page = await browser.newPage()
        await page.goto('https://www.google.com/recaptcha/api2/demo')
        await page.waitFor(1000);
        const frames = page.frames();
        console.info('Available frames', frames.map(frame => frame.name()));
        console.info('Available frame urls', frames.map(frame => frame.url()));
        const frame = frames.find(frame => frame.url().includes('/recaptcha/api2/anchor?'));
        const content_frame = frames.find(frame => frame.url().includes('/recaptcha/api2/bframe?'));
        await frame.waitForSelector('#recaptcha-anchor', { timeout: 10000 });
        await page.waitFor(1000);
        const button = await frame.$('#recaptcha-anchor');
        await button.click();
        await content_frame.waitForSelector('#recaptcha-audio-button');
        const audio_button = await content_frame.$('#recaptcha-audio-button');
        await audio_button.click();
        await page.waitFor(1000);
        await content_frame.waitForSelector('.rc-audiochallenge-tdownload-link');
        let download_link = await content_frame.evaluate(() => {
            return document.querySelectorAll('.rc-audiochallenge-tdownload-link').getAttribute('href');
        });
        console.log('Got audio download link: ', download_link);
        got.stream(download_link).pipe(fs.createWriteStream('audio.mp3'));
        await browser.close();
    })()
 } catch (err) {
    console.error(err)
 }
 /*
    translate this shit into js: https://github.com/ecthros/uncaptcha2/blob/master/queryAPI.py
 */
 async function translate_audio_file() {
 }
--- a/src/modules/bing.js
+++ b/src/modules/bing.js
@ -21,6 +21,7 @@ async function scrape_bing_pup(page, event, context, pluggable) {
 	for (var i = 0; i < keywords.length; i++) {
 		keyword = keywords[i];
 		results[keyword] = {};
 		if (pluggable.before_keyword_scraped) {
 			await pluggable.before_keyword_scraped({
@ -33,23 +34,35 @@ async function scrape_bing_pup(page, event, context, pluggable) {
 		try {
 			const input = await page.$('input[name="q"]');
-			// overwrites last text in input
+			await sfunctions.set_input_value(page, `input[name="q"]`, keyword);
-			await input.click({ clickCount: 3 });
+			await sfunctions.sleep(50);
 			await input.type(keyword);
 			await input.focus();
 			await page.keyboard.press("Enter");
 			let page_num = 1;
 			do {
 				if (event.verbose === true) {
 					console.log(`${event.search_engine} is scraping keyword: ${keyword} on page ${page_num}`);
 				}
 				if (event.sleep_range) {
 					await sfunctions.random_sleep(event);
 				}
 				await page.waitForSelector('#b_content', { timeout: 5000 });
-			if (event.debug === true && event.is_local === true) {
+				await sfunctions.sleep(500);
 				await page.screenshot({path: `debug/${keyword}.png`});
 			}
 				let html = await page.content();
-			results[keyword] = parse(html);
+				results[keyword][page_num] = parse(html);
 				page_num += 1;
 				let next_page_link = await page.$('.sb_pagN', {timeout: 1000});
 				if (!next_page_link) {
 					break;
 				}
 				await next_page_link.click();
 				await page.waitForNavigation();
 			} while (page_num <= event.num_pages)
 		} catch (e) {
 			console.error(`Problem with scraping ${keyword}: ${e}`);
--- a/src/modules/google.js
+++ b/src/modules/google.js
@ -25,8 +25,8 @@ async function scrape_google_pup(page, event, context, pluggable) {
 	var results = {};
 	for (var i = 0; i < keywords.length; i++) {
 		keyword = keywords[i];
 		results[keyword] = {};
 		if (pluggable.before_keyword_scraped) {
 			await pluggable.before_keyword_scraped({
@ -37,26 +37,38 @@ async function scrape_google_pup(page, event, context, pluggable) {
 			});
 		}
 		if (event.verbose === true) {
 			console.log(`${event.search_engine} is scraping keyword: ${keyword}`);
 		}
 		try {
 			const input = await page.$('input[name="q"]');
 			// await input.click({ clickCount: 3 });
            // await sfunctions.sleep(50);
            //await input.type(keyword);
 			await sfunctions.set_input_value(page, `input[name="q"]`, keyword);
 			await sfunctions.sleep(50);
 			await input.focus();
 			await page.keyboard.press("Enter");
 			let page_num = 1;
 			do {
 				if (event.verbose === true) {
 					console.log(`${event.search_engine} is scraping keyword: ${keyword} on page ${page_num}`);
 				}
 				if (event.sleep_range) {
 					await sfunctions.random_sleep(event);
 				}
-
+				await page.waitForSelector('#center_col', {timeout: STANDARD_TIMEOUT});
 			await page.waitForSelector('#center_col', { timeout: STANDARD_TIMEOUT });
 				await sfunctions.sleep(500);
 				let html = await page.content();
 				results[keyword][page_num] = parse_google_results(html);
 				page_num += 1;
 				let next_page_link = await page.$('#pnnext', {timeout: 1000});
 				if (!next_page_link) {
 					break;
 				}
 				await next_page_link.click();
 				await page.waitForNavigation();
 			} while (page_num <= event.num_pages)
 		} catch (e) {
 			console.error(`Problem with scraping ${keyword}.`);
@ -82,9 +94,6 @@ async function scrape_google_pup(page, event, context, pluggable) {
 				}
 			}
 		}
        let html = await page.content();
        results[keyword] = parse_google_results(html);
 	}
 	return results;
--- a/src/modules/metadata.js
+++ b/src/modules/metadata.js
@ -12,13 +12,14 @@ async function get_metadata(browser) {
 	  waitLoad: true, 
 	  waitNetworkIdle: true // defaults to false
 	});
-	let json = await page.content();
+	let json = await page.content({
 		timeout: 20000
 	});
 	const $ = cheerio.load(json);
 	metadata.ipinfo = $('pre').text();
 	return metadata;
 }
 async function get_http_headers(browser) {
 	let metadata = {};
 	const page = await browser.newPage();
--- a/src/modules/se_scraper.js
+++ b/src/modules/se_scraper.js
@ -0,0 +1,39 @@
 const start_url = {
    'google': ''
 };
 /*
    Read this shit: https://javascript.info/class-inheritance
 */
 module.exports = class Scraper {
    constructor(options = {}) {
        const {
            searchEngine = 'google',
            numPages = 1,
            pluggable = null,
        } = options;
        this.pluggable = pluggable;
        this.searchEngine = searchEngine;
        this.numPages = numPages;
        this.results = {}
    }
    async load_search_engine() {
    }
    async search_keyword() {
    }
    parse() {
    }
    async next_page() {
    }
    async detected() {
    }
 };
--- a/src/modules/user_agents.js
+++ b/src/modules/user_agents.js
@ -3,12 +3,12 @@ module.exports = {
 };
 function random_user_agent() {
-	return user_agents[Math.floor(Math.random()*user_agents.length)];
+	let rand = user_agents[Math.floor(Math.random()*user_agents.length)];
 }
 // updated: 29 Jan 2019
 const user_agents = [
-	['Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
+	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
 	'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0',
 	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
 	'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
@ -78,5 +78,4 @@ const user_agents = [
 	'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0',
 	'Mozilla/5.0 (X11; CrOS x86_64 11151.59.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.94 Safari/537.36',
 	'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
 	]
 ];
--- a/src/node_scraper.js
+++ b/src/node_scraper.js
@ -22,7 +22,7 @@ function write_results(fname, data) {
 module.exports.handler = async function handler (event, context, callback) {
 	config = event;
-	pluggable = null;
+	pluggable = {};
 	if (config.custom_func) {
 		if (fs.existsSync(config.custom_func)) {
 			try {
@ -43,8 +43,11 @@ module.exports.handler = async function handler (event, context, callback) {
 			console.log(config);
 		}
-        const ADDITIONAL_CHROME_FLAGS = [
+        var ADDITIONAL_CHROME_FLAGS = [
-			//'--proxy-server=' + proxy,
+			'--disable-infobars',
 			'--window-position=0,0',
 			'--ignore-certifcate-errors',
 			'--ignore-certifcate-errors-spki-list',
 			'--no-sandbox',
 			'--disable-setuid-sandbox',
 			'--disable-dev-shm-usage',
@ -70,16 +73,27 @@ module.exports.handler = async function handler (event, context, callback) {
 			)
 		}
        if (config.proxy) {
        	// check this out bubbles
 			// https://www.systutorials.com/241062/how-to-set-google-chromes-proxy-settings-in-command-line-on-linux/
 			// [<proxy-scheme>://]<proxy-host>[:<proxy-port>]
 			// "http", "socks", "socks4", "socks5".
        	ADDITIONAL_CHROME_FLAGS.push(
 				'--proxy-server=' + config.proxy,
 			)
 		}
        let launch_args = {
 			args: ADDITIONAL_CHROME_FLAGS,
 			headless: config.headless,
 			ignoreHTTPSErrors: true,
 		};
 		if (config.debug === true) {
 			console.log("Chrome Args: ", launch_args);
 		}
-        if (pluggable) {
+        if (pluggable.start_browser) {
 			launch_args.config = config;
 			browser = await pluggable.start_browser(launch_args);
 		} else {
@ -91,6 +105,30 @@ module.exports.handler = async function handler (event, context, callback) {
 			console.dir(headers);
 		}
 		let metadata = {};
 		if (config.write_meta_data === true) {
 			metadata = await meta.get_metadata(browser);
 		}
 		// check that our proxy is working by confirming
 		// that ipinfo.io sees the proxy IP address
 		if (config.proxy && config.write_meta_data === true) {
 			console.log(`${metadata.ipinfo} vs ${config.proxy}`);
 			try {
 				let ipdata = JSON.parse(metadata.ipinfo);
 				// if the ip returned by ipinfo is not a substring of our proxystring, get the heck outta here
 				if (!config.proxy.includes(ipdata.ip)) {
 					console.error('Proxy not working properly.');
 					await browser.close();
 					return;
 				}
 			} catch (exception) {
 			}
 		}
 		const page = await browser.newPage();
 		// block some assets to speed up scraping
@ -127,13 +165,8 @@ module.exports.handler = async function handler (event, context, callback) {
 			marketwatch: tickersearch.scrape_marketwatch_finance_pup,
 		}[config.search_engine](page, config, context, pluggable);
        let metadata = {};
-        if (config.write_meta_data === true) {
+		if (pluggable.close_browser) {
            metadata = await meta.get_metadata(browser);
        }
 		if (pluggable) {
 			await pluggable.close_browser();
 		} else {
 			await browser.close();
@ -155,7 +188,7 @@ module.exports.handler = async function handler (event, context, callback) {
 			results = zlib.deflateSync(results).toString('base64');
 		}
-		if (pluggable && pluggable.handle_results) {
+		if (pluggable.handle_results) {
 			await pluggable.handle_results({
 				config: config,
 				results: results,
@ -172,7 +205,7 @@ module.exports.handler = async function handler (event, context, callback) {
 				console.log(metadata);
 			}
-			if (pluggable) {
+			if (pluggable.handle_metadata) {
 				await pluggable.handle_metadata({metadata: metadata, config: config});
 			}
 		}