fix(scrape-manager): keywords propagated through a clone config for not being re-affected

This commit is contained in:
HugoPoi 2020-01-17 15:12:00 +01:00
parent 89dc5c3ebb
commit 4f467abf1e
2 changed files with 9 additions and 17 deletions

View File

@ -50,7 +50,9 @@ module.exports = class Scraper {
}
}
async run({page, data}) {
async run({page, data, worker}) {
debug('worker.id=%s', worker.id, this.config.keywords);
if (page) {
this.page = page;

View File

@ -281,11 +281,6 @@ class ScrapeManager {
perBrowserOptions: perBrowserOptions
}
});
this.cluster.on('taskerror', (err, data) => {
this.logger.error(`Error while scraping ${data}: ${err.message}`);
debug('Error during cluster task', err);
});
}
}
@ -336,26 +331,21 @@ class ScrapeManager {
chunks[k % this.numClusters].push(this.config.keywords[k]);
}
let execPromises = [];
let scraperInstances = [];
for (var c = 0; c < chunks.length; c++) {
this.config.keywords = chunks[c];
debug('chunks=%o', chunks);
if (this.config.use_proxies_only) {
this.config.proxy = this.config.proxies[c]; // every cluster has a dedicated proxy
} else if(c > 0) {
this.config.proxy = this.config.proxies[c-1]; // first cluster uses own ip address
}
let execPromises = [];
for (var c = 0; c < chunks.length; c++) {
const config = _.clone(this.config);
config.keywords = chunks[c];
var obj = getScraper(this.config.search_engine, {
config: this.config,
config: config,
context: {},
pluggable: this.pluggable,
});
var boundMethod = obj.run.bind(obj);
execPromises.push(this.cluster.execute({}, boundMethod));
scraperInstances.push(obj);
}
let promiseReturns = await Promise.all(execPromises);