mirror of
https://github.com/NikolaiT/se-scraper.git
synced 2024-11-29 11:03:12 +01:00
num_keywords are counted now. added to pluggable
This commit is contained in:
parent
7572ebd314
commit
7b5048b8ee
20
TODO.txt
20
TODO.txt
@ -32,22 +32,14 @@
|
||||
- Bing, Baidu, Google, Duckduckgo
|
||||
|
||||
7.2.2019
|
||||
- add num_requests to test cases
|
||||
- add num_requests to test cases [done]
|
||||
|
||||
|
||||
|
||||
TODO:
|
||||
- think about implementing ticker search for: https://quotes.wsj.com/MSFT?mod=searchresults_companyquotes
|
||||
- add proxy support
|
||||
- add captcha service solving support
|
||||
- check if news instances run the same browser and if we can have one proxy per tab wokers
|
||||
|
||||
TODO:
|
||||
- think whether it makes sense to introduce a generic scraping class?
|
||||
- is scraping abstractable or is every scraper too unique?
|
||||
- dont make the same mistakes as with GoogleScraper
|
||||
|
||||
|
||||
TODO:
|
||||
okay its fucking time to make a generic scraping class like in GoogleScraper [done]
|
||||
i feel like history repeats
|
||||
|
||||
write good test case for google [done]
|
||||
- write test case for:
|
||||
- pluggable
|
||||
- full metadata (log http headers, log ip address)
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "se-scraper",
|
||||
"version": "1.1.12",
|
||||
"version": "1.1.13",
|
||||
"description": "A simple library using puppeteer to scrape several search engines such as Google, Duckduckgo and Bing.",
|
||||
"homepage": "https://scrapeulous.com/",
|
||||
"main": "index.js",
|
||||
|
@ -30,7 +30,10 @@ module.exports = class Scraper {
|
||||
|
||||
this.results = {};
|
||||
this.result_rank = 1;
|
||||
// keep track of the requests done
|
||||
this.num_requests = 0;
|
||||
// keep track of the keywords searched
|
||||
this.num_keywords = 0;
|
||||
}
|
||||
|
||||
async run() {
|
||||
@ -96,12 +99,15 @@ module.exports = class Scraper {
|
||||
*/
|
||||
async scraping_loop() {
|
||||
for (let keyword of this.config.keywords) {
|
||||
this.num_keywords++;
|
||||
this.keyword = keyword;
|
||||
this.results[keyword] = {};
|
||||
this.result_rank = 1;
|
||||
|
||||
if (this.pluggable.before_keyword_scraped) {
|
||||
await this.pluggable.before_keyword_scraped({
|
||||
num_keywords: this.num_keywords,
|
||||
num_requests: this.num_requests,
|
||||
keyword: keyword,
|
||||
page: this.page,
|
||||
config: this.config,
|
||||
|
Loading…
Reference in New Issue
Block a user