mirror of
https://github.com/NikolaiT/se-scraper.git
synced 2025-06-25 03:51:27 +02:00
Take screenshot before modifying HTML
Otherwise the screenshot will be very messed up
This commit is contained in:
parent
d362e4ae2c
commit
77c1bb8372
@ -190,6 +190,13 @@ module.exports = class Scraper {
|
|||||||
let parsed = this.parse(html);
|
let parsed = this.parse(html);
|
||||||
this.results[keyword][this.page_num] = parsed ? parsed : await this.parse_async(html);
|
this.results[keyword][this.page_num] = parsed ? parsed : await this.parse_async(html);
|
||||||
|
|
||||||
|
if (this.config.screen_output) {
|
||||||
|
this.results[keyword][this.page_num].screenshot = await this.page.screenshot({
|
||||||
|
encoding: 'base64',
|
||||||
|
fullPage: false,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
if (this.config.html_output) {
|
if (this.config.html_output) {
|
||||||
|
|
||||||
if (this.config.clean_html_output) {
|
if (this.config.clean_html_output) {
|
||||||
@ -236,13 +243,6 @@ module.exports = class Scraper {
|
|||||||
this.results[keyword][this.page_num].html = html_contents;
|
this.results[keyword][this.page_num].html = html_contents;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this.config.screen_output) {
|
|
||||||
this.results[keyword][this.page_num].screenshot = await this.page.screenshot({
|
|
||||||
encoding: 'base64',
|
|
||||||
fullPage: false,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
this.page_num += 1;
|
this.page_num += 1;
|
||||||
|
|
||||||
// only load the next page when we will pass the next iteration
|
// only load the next page when we will pass the next iteration
|
||||||
|
Loading…
x
Reference in New Issue
Block a user