better pluggable api

This commit is contained in:
Nikolai Tschacher 2019-10-05 19:39:33 +02:00
parent 4953d9da7a
commit 4a3a0e6fd4
4 changed files with 12 additions and 7 deletions

View File

@ -2,9 +2,15 @@ const se_scraper = require('./../index.js');
(async () => { (async () => {
let kws = [
'https://www.linkedin.com/in/aakanksha-majhi-b24a8449',
'https://www.linkedin.com/in/aakash-srivastava-7374a830',
'https://www.linkedin.com/in/aakash-tiwari-019b8569',
];
let scrape_job = { let scrape_job = {
search_engine: 'google', search_engine: 'google',
keywords: ['lets go boys'], keywords: kws,
num_pages: 1, num_pages: 1,
}; };

View File

@ -1,6 +1,6 @@
{ {
"name": "se-scraper", "name": "se-scraper",
"version": "1.5.3", "version": "1.5.4",
"description": "A module using puppeteer to scrape several search engines such as Google, Bing and Duckduckgo", "description": "A module using puppeteer to scrape several search engines such as Google, Bing and Duckduckgo",
"homepage": "https://scrapeulous.com/", "homepage": "https://scrapeulous.com/",
"main": "index.js", "main": "index.js",

View File

@ -1,5 +1,5 @@
'use strict'; 'use strict';
const zlib = require('zlib');
var fs = require('fs'); var fs = require('fs');
var os = require("os"); var os = require("os");
@ -252,7 +252,7 @@ class ScrapeManager {
log(this.config, 2, `Using the following puppeteer configuration: ${launch_args}`); log(this.config, 2, `Using the following puppeteer configuration: ${launch_args}`);
if (this.pluggable) { if (this.pluggable && this.pluggable.start_browser) {
launch_args.config = this.config; launch_args.config = this.config;
this.browser = await this.pluggable.start_browser(launch_args); this.browser = await this.pluggable.start_browser(launch_args);
this.page = await this.browser.newPage(); this.page = await this.browser.newPage();
@ -350,7 +350,7 @@ class ScrapeManager {
`[se-scraper] started at [${(new Date()).toUTCString()}] and scrapes ${this.config.search_engine_name} with ${this.config.keywords.length} keywords on ${this.config.num_pages} pages each.`) `[se-scraper] started at [${(new Date()).toUTCString()}] and scrapes ${this.config.search_engine_name} with ${this.config.keywords.length} keywords on ${this.config.num_pages} pages each.`)
} }
if (this.pluggable) { if (this.pluggable && this.pluggable.start_browser) {
this.scraper = getScraper(this.config.search_engine, { this.scraper = getScraper(this.config.search_engine, {
config: this.config, config: this.config,
@ -407,7 +407,6 @@ class ScrapeManager {
Object.assign(metadata, promiseReturn.metadata); Object.assign(metadata, promiseReturn.metadata);
num_requests += promiseReturn.num_requests; num_requests += promiseReturn.num_requests;
} }
} }
let timeDelta = Date.now() - startTime; let timeDelta = Date.now() - startTime;

@ -1 +1 @@
Subproject commit 03c9a764298f3f55b46bace810f4d3b2e1cb3266 Subproject commit 67bfa1bd613fbe2b63a073ac1fef59131c1d97a3