implemented generic scraping class

This commit is contained in:
Nikolai Tschacher
2019-01-30 16:05:08 +01:00
parent 9e62f23451
commit 4306848657
7 changed files with 463 additions and 320 deletions

8
run.js
View File

@ -15,18 +15,18 @@ let config = {
search_engine: 'google',
// whether debug information should be printed
// debug info is useful for developers when debugging
debug: false,
debug: true,
// whether verbose program output should be printed
// this output is informational
verbose: false,
verbose: true,
// an array of keywords to scrape
keywords: ['scraping scrapeulous.com'],
keywords: ['trump', ],
// alternatively you can specify a keyword_file. this overwrites the keywords array
keyword_file: '',
// the number of pages to scrape for each keyword
num_pages: 1,
// whether to start the browser in headless mode
headless: true,
headless: false,
// path to output file, data will be stored in JSON
output_file: 'data.json',
// whether to prevent images, css, fonts from being loaded