mirror of
https://github.com/NikolaiT/se-scraper.git
synced 2025-06-21 01:57:55 +02:00
Add html_output fonctionality
Pagination support for html output Change return value to keep it compliant to the current version of se-scrapper
This commit is contained in:
parent
0d7f6dcd11
commit
a3ebe357a4
@ -33,6 +33,7 @@ module.exports = class Scraper {
|
|||||||
this.PROXY_TIMEOUT = 15000;
|
this.PROXY_TIMEOUT = 15000;
|
||||||
this.SOLVE_CAPTCHA_TIME = 45000;
|
this.SOLVE_CAPTCHA_TIME = 45000;
|
||||||
|
|
||||||
|
this.html_output = {};
|
||||||
this.results = {};
|
this.results = {};
|
||||||
this.result_rank = 1;
|
this.result_rank = 1;
|
||||||
// keep track of the requests done
|
// keep track of the requests done
|
||||||
@ -69,7 +70,15 @@ module.exports = class Scraper {
|
|||||||
|
|
||||||
await this.scraping_loop();
|
await this.scraping_loop();
|
||||||
|
|
||||||
return this.results;
|
let response = this.results;
|
||||||
|
if (this.config.html_output) {
|
||||||
|
response = {
|
||||||
|
'results': this.results,
|
||||||
|
'html_output': this.html_output
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return response;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -155,6 +164,7 @@ module.exports = class Scraper {
|
|||||||
this.num_keywords++;
|
this.num_keywords++;
|
||||||
this.keyword = keyword;
|
this.keyword = keyword;
|
||||||
this.results[keyword] = {};
|
this.results[keyword] = {};
|
||||||
|
this.html_output[keyword] = {};
|
||||||
this.result_rank = 1;
|
this.result_rank = 1;
|
||||||
|
|
||||||
if (this.pluggable && this.pluggable.before_keyword_scraped) {
|
if (this.pluggable && this.pluggable.before_keyword_scraped) {
|
||||||
@ -189,6 +199,7 @@ module.exports = class Scraper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let html = await this.page.content();
|
let html = await this.page.content();
|
||||||
|
this.html_output[keyword][page_num] = html;
|
||||||
let parsed = this.parse(html);
|
let parsed = this.parse(html);
|
||||||
this.results[keyword][page_num] = parsed ? parsed : await this.parse_async(html);
|
this.results[keyword][page_num] = parsed ? parsed : await this.parse_async(html);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user