added support for amazon

2025-08-27 13:15:19 +02:00 · 2019-03-10 20:02:42 +01:00
parent dd1f36076e
commit 51d617442d
9 changed files with 1066 additions and 440 deletions
--- a/run.js
+++ b/run.js
@@ -9,27 +9,7 @@ let config = {
    // is drawn before every request. empty string for no sleeping.
    sleep_range: '[1,2]',
    // which search engine to scrape
-    search_engine: 'google',
-
-    // use specific search engine parameters for various search engines
-    // google_settings: {
-    //     google_domain: 'google.com',
-    //     gl: 'us', // The gl parameter determines the Google country to use for the query.
-    //     hl: 'us', // The hl parameter determines the Google UI language to return results.
-    //     start: 0, // Determines the results offset to use, defaults to 0.
-    //     num: 100, // Determines the number of results to show, defaults to 10. Maximum is 100.
-    // },
-
-    google_settings: '{"gl": "tr", "hl": "tr", "num": "50", "start": "0"}',
-
-    // https://docs.microsoft.com/en-us/rest/api/cognitiveservices/bing-web-api-v5-reference#query-parameters
-    bing_settings: {
-        count: 50,      // how many results per page
-        safeSearch: 'Off',      // safe search (strict, moderate, off)
-        cc: 'us',        // ISO 3166 country code
-        offset: 0, // The zero-based offset that indicates the number of search results to skip before returning results
-    },
-
+    search_engine: 'amazon',
    // whether debug information should be printed
    // debug info is useful for developers when debugging
    debug: false,
@@ -37,18 +17,18 @@ let config = {
    // this output is informational
    verbose: true,
    // an array of keywords to scrape
-    keywords: ['good news'],
+    keywords: ['drone', 'smartphone'],
    // alternatively you can specify a keyword_file. this overwrites the keywords array
    keyword_file: '',
    // the number of pages to scrape for each keyword
-    num_pages: 2,
+    num_pages: 1,
    // whether to start the browser in headless mode
-    headless: true,
+    headless: false,
    // path to output file, data will be stored in JSON
-    output_file: 'examples/results/advanced.json',
+    output_file: 'examples/results/amazon.json',
    // whether to prevent images, css, fonts from being loaded
    // will speed up scraping a great deal
-    block_assets: true,
+    block_assets: false,
    // path to js module that extends functionality
    // this module should export the functions:
    // get_browser, handle_metadata, close_browser
@@ -66,7 +46,7 @@ let config = {
    // check if headless chrome escapes common detection techniques
    // this is a quick test and should be used for debugging
    test_evasion: false,
-    apply_evasion_techniques: false,
+    apply_evasion_techniques: true,
    // log ip address data
    log_ip_address: false,
    // log http headers