mirror of
https://github.com/NikolaiT/se-scraper.git
synced 2025-06-20 17:47:49 +02:00
better pluggable api
This commit is contained in:
parent
4953d9da7a
commit
4a3a0e6fd4
@ -2,9 +2,15 @@ const se_scraper = require('./../index.js');
|
|||||||
|
|
||||||
(async () => {
|
(async () => {
|
||||||
|
|
||||||
|
let kws = [
|
||||||
|
'https://www.linkedin.com/in/aakanksha-majhi-b24a8449',
|
||||||
|
'https://www.linkedin.com/in/aakash-srivastava-7374a830',
|
||||||
|
'https://www.linkedin.com/in/aakash-tiwari-019b8569',
|
||||||
|
];
|
||||||
|
|
||||||
let scrape_job = {
|
let scrape_job = {
|
||||||
search_engine: 'google',
|
search_engine: 'google',
|
||||||
keywords: ['lets go boys'],
|
keywords: kws,
|
||||||
num_pages: 1,
|
num_pages: 1,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "se-scraper",
|
"name": "se-scraper",
|
||||||
"version": "1.5.3",
|
"version": "1.5.4",
|
||||||
"description": "A module using puppeteer to scrape several search engines such as Google, Bing and Duckduckgo",
|
"description": "A module using puppeteer to scrape several search engines such as Google, Bing and Duckduckgo",
|
||||||
"homepage": "https://scrapeulous.com/",
|
"homepage": "https://scrapeulous.com/",
|
||||||
"main": "index.js",
|
"main": "index.js",
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
'use strict';
|
'use strict';
|
||||||
const zlib = require('zlib');
|
|
||||||
var fs = require('fs');
|
var fs = require('fs');
|
||||||
var os = require("os");
|
var os = require("os");
|
||||||
|
|
||||||
@ -252,7 +252,7 @@ class ScrapeManager {
|
|||||||
|
|
||||||
log(this.config, 2, `Using the following puppeteer configuration: ${launch_args}`);
|
log(this.config, 2, `Using the following puppeteer configuration: ${launch_args}`);
|
||||||
|
|
||||||
if (this.pluggable) {
|
if (this.pluggable && this.pluggable.start_browser) {
|
||||||
launch_args.config = this.config;
|
launch_args.config = this.config;
|
||||||
this.browser = await this.pluggable.start_browser(launch_args);
|
this.browser = await this.pluggable.start_browser(launch_args);
|
||||||
this.page = await this.browser.newPage();
|
this.page = await this.browser.newPage();
|
||||||
@ -350,7 +350,7 @@ class ScrapeManager {
|
|||||||
`[se-scraper] started at [${(new Date()).toUTCString()}] and scrapes ${this.config.search_engine_name} with ${this.config.keywords.length} keywords on ${this.config.num_pages} pages each.`)
|
`[se-scraper] started at [${(new Date()).toUTCString()}] and scrapes ${this.config.search_engine_name} with ${this.config.keywords.length} keywords on ${this.config.num_pages} pages each.`)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this.pluggable) {
|
if (this.pluggable && this.pluggable.start_browser) {
|
||||||
|
|
||||||
this.scraper = getScraper(this.config.search_engine, {
|
this.scraper = getScraper(this.config.search_engine, {
|
||||||
config: this.config,
|
config: this.config,
|
||||||
@ -407,7 +407,6 @@ class ScrapeManager {
|
|||||||
Object.assign(metadata, promiseReturn.metadata);
|
Object.assign(metadata, promiseReturn.metadata);
|
||||||
num_requests += promiseReturn.num_requests;
|
num_requests += promiseReturn.num_requests;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let timeDelta = Date.now() - startTime;
|
let timeDelta = Date.now() - startTime;
|
||||||
|
@ -1 +1 @@
|
|||||||
Subproject commit 03c9a764298f3f55b46bace810f4d3b2e1cb3266
|
Subproject commit 67bfa1bd613fbe2b63a073ac1fef59131c1d97a3
|
Loading…
x
Reference in New Issue
Block a user