better pluggable api

This commit is contained in:
Nikolai Tschacher 2019-10-05 19:39:33 +02:00
parent 4953d9da7a
commit 4a3a0e6fd4
4 changed files with 12 additions and 7 deletions

View File

@ -2,9 +2,15 @@ const se_scraper = require('./../index.js');
(async () => {
let kws = [
'https://www.linkedin.com/in/aakanksha-majhi-b24a8449',
'https://www.linkedin.com/in/aakash-srivastava-7374a830',
'https://www.linkedin.com/in/aakash-tiwari-019b8569',
];
let scrape_job = {
search_engine: 'google',
keywords: ['lets go boys'],
keywords: kws,
num_pages: 1,
};

View File

@ -1,6 +1,6 @@
{
"name": "se-scraper",
"version": "1.5.3",
"version": "1.5.4",
"description": "A module using puppeteer to scrape several search engines such as Google, Bing and Duckduckgo",
"homepage": "https://scrapeulous.com/",
"main": "index.js",

View File

@ -1,5 +1,5 @@
'use strict';
const zlib = require('zlib');
var fs = require('fs');
var os = require("os");
@ -252,7 +252,7 @@ class ScrapeManager {
log(this.config, 2, `Using the following puppeteer configuration: ${launch_args}`);
if (this.pluggable) {
if (this.pluggable && this.pluggable.start_browser) {
launch_args.config = this.config;
this.browser = await this.pluggable.start_browser(launch_args);
this.page = await this.browser.newPage();
@ -350,7 +350,7 @@ class ScrapeManager {
`[se-scraper] started at [${(new Date()).toUTCString()}] and scrapes ${this.config.search_engine_name} with ${this.config.keywords.length} keywords on ${this.config.num_pages} pages each.`)
}
if (this.pluggable) {
if (this.pluggable && this.pluggable.start_browser) {
this.scraper = getScraper(this.config.search_engine, {
config: this.config,
@ -407,7 +407,6 @@ class ScrapeManager {
Object.assign(metadata, promiseReturn.metadata);
num_requests += promiseReturn.num_requests;
}
}
let timeDelta = Date.now() - startTime;

@ -1 +1 @@
Subproject commit 03c9a764298f3f55b46bace810f4d3b2e1cb3266
Subproject commit 67bfa1bd613fbe2b63a073ac1fef59131c1d97a3