From 4a3a0e6fd495313ad530eac1d3af93f61fdae508 Mon Sep 17 00:00:00 2001 From: Nikolai Tschacher Date: Sat, 5 Oct 2019 19:39:33 +0200 Subject: [PATCH] better pluggable api --- examples/minimal.js | 8 +++++++- package.json | 2 +- src/node_scraper.js | 7 +++---- src/puppeteer-cluster | 2 +- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/examples/minimal.js b/examples/minimal.js index 03e0311..23a57c0 100644 --- a/examples/minimal.js +++ b/examples/minimal.js @@ -2,9 +2,15 @@ const se_scraper = require('./../index.js'); (async () => { + let kws = [ + 'https://www.linkedin.com/in/aakanksha-majhi-b24a8449', + 'https://www.linkedin.com/in/aakash-srivastava-7374a830', + 'https://www.linkedin.com/in/aakash-tiwari-019b8569', + ]; + let scrape_job = { search_engine: 'google', - keywords: ['lets go boys'], + keywords: kws, num_pages: 1, }; diff --git a/package.json b/package.json index ea5462f..8c738fb 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "se-scraper", - "version": "1.5.3", + "version": "1.5.4", "description": "A module using puppeteer to scrape several search engines such as Google, Bing and Duckduckgo", "homepage": "https://scrapeulous.com/", "main": "index.js", diff --git a/src/node_scraper.js b/src/node_scraper.js index 7238f5f..3b35dd9 100644 --- a/src/node_scraper.js +++ b/src/node_scraper.js @@ -1,5 +1,5 @@ 'use strict'; -const zlib = require('zlib'); + var fs = require('fs'); var os = require("os"); @@ -252,7 +252,7 @@ class ScrapeManager { log(this.config, 2, `Using the following puppeteer configuration: ${launch_args}`); - if (this.pluggable) { + if (this.pluggable && this.pluggable.start_browser) { launch_args.config = this.config; this.browser = await this.pluggable.start_browser(launch_args); this.page = await this.browser.newPage(); @@ -350,7 +350,7 @@ class ScrapeManager { `[se-scraper] started at [${(new Date()).toUTCString()}] and scrapes ${this.config.search_engine_name} with ${this.config.keywords.length} keywords on ${this.config.num_pages} pages each.`) } - if (this.pluggable) { + if (this.pluggable && this.pluggable.start_browser) { this.scraper = getScraper(this.config.search_engine, { config: this.config, @@ -407,7 +407,6 @@ class ScrapeManager { Object.assign(metadata, promiseReturn.metadata); num_requests += promiseReturn.num_requests; } - } let timeDelta = Date.now() - startTime; diff --git a/src/puppeteer-cluster b/src/puppeteer-cluster index 03c9a76..67bfa1b 160000 --- a/src/puppeteer-cluster +++ b/src/puppeteer-cluster @@ -1 +1 @@ -Subproject commit 03c9a764298f3f55b46bace810f4d3b2e1cb3266 +Subproject commit 67bfa1bd613fbe2b63a073ac1fef59131c1d97a3