updated readme

2025-08-24 11:55:23 +02:00 · 2019-02-07 16:26:11 +01:00
parent 7b5048b8ee
commit 77c332d7c8
1 changed files with 18 additions and 17 deletions
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@

 This node module supports scraping several search engines.

-Right now scraping the search engines 
+Right now scraping the search engines

 * Google
 * Google News
@@ -149,40 +149,42 @@ let config = {
    user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
    // if random_user_agent is set to True, a random user agent is chosen
    random_user_agent: true,
+    // whether to select manual settings in visible mode
+    set_manual_settings: false,
+    // log ip address data
+    log_ip_address: false,
+    // log http headers
+    log_http_headers: false,
    // how long to sleep between requests. a random sleep interval within the range [a,b]
    // is drawn before every request. empty string for no sleeping.
-    sleep_range: '[1,2]',
+    sleep_range: '[1,1]',
    // which search engine to scrape
    search_engine: 'google',
-    // whether debug information should be printed
-    // debug info is useful for developers when debugging
+    compress: false, // compress
    debug: false,
-    // whether verbose program output should be printed
-    // this output is informational
    verbose: false,
-    // an array of keywords to scrape
-    keywords: ['scraping scrapeulous.com'],
-    // alternatively you can specify a keyword_file. this overwrites the keywords array
-    keyword_file: '',
-    // the number of pages to scrape for each keyword
-    num_pages: 2,
+    keywords: ['scrapeulous.com'],
    // whether to start the browser in headless mode
    headless: true,
+    // the number of pages to scrape for each keyword
+    num_pages: 1,
    // path to output file, data will be stored in JSON
-    output_file: 'data.json',
-    // whether to prevent images, css, fonts from being loaded
+    output_file: '',
+    // whether to prevent images, css, fonts and media from being loaded
    // will speed up scraping a great deal
    block_assets: true,
    // path to js module that extends functionality
    // this module should export the functions:
    // get_browser, handle_metadata, close_browser
-    // must be an absolute path to the module
    //custom_func: resolve('examples/pluggable.js'),
    custom_func: '',
    // use a proxy for all connections
    // example: 'socks5://78.94.172.42:1080'
    // example: 'http://118.174.233.10:48400'
-    //proxy: 'socks5://78.94.172.42:1080',
+    proxy: '',
+    // check if headless chrome escapes common detection techniques
+    // this is a quick test and should be used for debugging
+    test_evasion: false,
 };

 function callback(err, response) {
@@ -215,7 +217,6 @@ Supported options for the `search_engine` config key:
 'baidu'
 'youtube'
 'duckduckgo_news'
-'yahoo_news'
 'reuters'
 'cnbc'
 'marketwatch'