fixed some errors and way better README

This commit is contained in:
Nikolai Tschacher
2019-02-28 15:34:25 +01:00
parent 089e410ec6
commit 79d32a315a
20 changed files with 8817 additions and 366 deletions

134
examples/multiple_tabs.js Normal file
View File

@ -0,0 +1,134 @@
const se_scraper = require('./../index.js');
const Cluster = {
CONCURRENCY_PAGE: 1, // shares cookies, etc.
CONCURRENCY_CONTEXT: 2, // no cookie sharing (uses contexts)
CONCURRENCY_BROWSER: 3, // no cookie sharing and individual processes (uses contexts)
};
let keywords = ['New York',
'Los Angeles',
'Chicago',
'Houston',
'Philadelphia',
'Phoenix',
'San Antonio',
'San Diego',
'Dallas',
'San Jose',
'Austin',
'Indianapolis',
'Jacksonville',
'San Francisco',
'Columbus',
'Charlotte',
'Fort Worth',
'Detroit',
'El Paso',
'Memphis',
'Seattle',
'Denver',
'Washington',
'Boston',
'Nashville-Davidson',
'Baltimore',
'Oklahoma City',
'Louisville/Jefferson County',
'Portland',
'Las Vegas',
'Milwaukee',
'Albuquerque',
'Tucson',
'Fresno',
'Sacramento',
'Long Beach',
'Kansas City',
'Mesa',
'Virginia Beach',
'Atlanta',
'Colorado Springs',
'Omaha',
'Raleigh',
'Miami',
'Oakland',
'Minneapolis',
'Tulsa',
'Cleveland',
'Wichita',
'Arlington',
'New Orleans',
'Bakersfield',
'Tampa',
'Honolulu',
'Aurora',
'Anaheim',
'Santa Ana',
'St. Louis',
'Riverside',
'Corpus Christi',
'Lexington-Fayette',
'Pittsburgh',
'Anchorage',
'Stockton',
'Cincinnati',
'St. Paul',
'Toledo',
'Greensboro',
'Newark',
'Plano',
'Henderson',
'Lincoln',
'Buffalo',
'Jersey City',
'Chula Vista',
'Fort Wayne',
'Orlando',
'St. Petersburg',
'Chandler',
'Laredo',
'Norfolk',
'Durham',
'Madison',
'Lubbock',
'Irvine',
'Winston-Salem',
'Glendale',
'Garland',
'Hialeah',
'Reno',
'Chesapeake',
'Gilbert',
'Baton Rouge',
'Irving',
'Scottsdale',
'North Las Vegas',
'Fremont',
'Boise City',
'Richmond',
'San Bernardino'];
let config = {
search_engine: 'bing',
debug: false,
verbose: true,
keywords: keywords,
num_pages: 1, // how many pages per keyword
output_file: 'examples/results/bing.json',
log_ip_address: false,
headless: true,
puppeteer_cluster_config: {
timeout: 10 * 60 * 1000, // max timeout set to 10 minutes
monitor: false,
concurrency: Cluster.CONCURRENCY_PAGE, // one scraper per tab
maxConcurrency: 7, // scrape with 7 tabs
}
};
function callback(err, response) {
if (err) {
console.error(err)
}
console.dir(response, {depth: null, colors: true});
}
se_scraper.scrape(config, callback);