pluggable is now class

This commit is contained in:
Nikolai Tschacher 2019-01-27 19:08:07 +01:00
parent 8e695626b6
commit 86a66a09fd
4 changed files with 51 additions and 43 deletions

View File

@ -1 +1 @@
{"scrapeulous.com":{"time":"Sun, 27 Jan 2019 14:51:54 GMT","num_results":"Ungefähr 169 Ergebnisse (0,23 Sekunden) ","no_results":false,"effective_query":"","results":[{"link":"https://scrapeulous.com/","title":"Scrapeulous","snippet":"Scrapeulous.com allows you to scrape various search engines automatically and in large quantities. Whether you need to analyze your competitors market ...","visible_link":"https://scrapeulous.com/","date":"","rank":1},{"link":"https://scrapeulous.com/about/","title":"About - Scrapeulous","snippet":"Scrapeulous.com allows you to scrape various search engines automatically and in large quantities. The business requirement to scrape information from ...","visible_link":"https://scrapeulous.com/about/","date":"","rank":2},{"link":"https://scrapeulous.com/contact/","title":"Contact - Scrapeulous","snippet":"Contact scrapeulous.com. Your email address. Valid email address where we are going to contact you. We will not send spam mail. Your inquiry.","visible_link":"https://scrapeulous.com/contact/","date":"","rank":3},{"link":"https://scrapeulous.com/howto/","title":"Howto - Scrapeulous","snippet":"We offer scraping large amounts of keywords for the Google Search Engine. Large means any number of keywords between 40 and 50000. Additionally, we ...","visible_link":"https://scrapeulous.com/howto/","date":"","rank":4},{"link":"https://incolumitas.com/","title":"Coding, Learning and Business Ideas","snippet":"About · Contact · GoogleScraper · Lichess Autoplay-Bot · Projects · Scrapeulous.com · Site Notice · SVGCaptcha · Home Archives Categories Tags Atom ...","visible_link":"https://incolumitas.com/","date":"","rank":5},{"link":"https://incolumitas.com/pages/scrapeulous/","title":"Coding, Learning and Business Ideas Scrapeulous.com - Incolumitas","snippet":"In autumn 2018, I created a scraping service called scrapeulous.com. There you can purchase scrape jobs that allow you to upload a keyword file which in turn ...","visible_link":"https://incolumitas.com/pages/scrapeulous/","date":"","rank":6},{"link":"https://www.youtube.com/watch?v=a6xn6rc9GbI","title":"scrapeulous intro - YouTube","snippet":"Introduction for https://scrapeulous.com.","visible_link":"https://www.youtube.com/watch?v=a6xn6rc9GbI","date":"","rank":7},{"link":"https://www.youtube.com/channel/UCJs1Xei5LRefg9GwFYdYhOw","title":"Scrapeulous Scrapeulous - YouTube","snippet":"How to use scrapeulous.com - Duration: 3 minutes, 42 seconds. 32 minutes ago; 4 views. Introduction for https://scrapeulous.com. Show more. This item has ...","visible_link":"https://www.youtube.com/.../UCJs1Xei5LRefg9GwFYdYhOw","date":"","rank":8},{"link":"https://readthedocs.org/projects/googlescraper/downloads/pdf/latest/","title":"GoogleScraper Documentation - Read the Docs","snippet":"23.12.2018 - 1.1 Scrapeulous.com - Scraping Service. GoogleScraper is a open source tool and will remain a open source tool in the future. Some people ...","visible_link":"https://readthedocs.org/projects/googlescraper/downloads/.../latest...","date":"23.12.2018 - ","rank":9}]}}
{"scrapeulous.com":{"time":"Sun, 27 Jan 2019 18:07:33 GMT","num_results":"Ungefähr 101 Ergebnisse","no_results":false,"effective_query":"","results":[]}}

View File

@ -1,39 +1,46 @@
module.exports = {
get_browser: get_browser,
handle_metadata: handle_metadata,
close_browser: close_browser
};
module.exports = class Pluggable {
constructor(options = {}) {
const {
chromeFlags = [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--disable-gpu',
'--window-size=1920x1080',
'--hide-scrollbars',
'--user-agent=Chrome',
],
userAgent = 'Chrome',
headless = true,
} = options;
async function close_browser(browser) {
await browser.close();
}
this.chromeFlags = chromeFlags;
this.userAgent = userAgent;
this.headless = headless;
async function handle_metadata() {
// silence
}
this.chromeFlags.push(this.userAgent);
}
async function get_browser(launch_args) {
const puppeteer = require('puppeteer');
async close_browser() {
await this.browser.close();
}
const ADDITIONAL_CHROME_FLAGS = [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--disable-gpu',
'--window-size=1920x1080',
'--hide-scrollbars',
'--user-agent=Chrome',
];
async handle_metadata(args) {
// silence
}
let custom_args = {
args: ADDITIONAL_CHROME_FLAGS,
headless: true,
};
async start_browser(args={}) {
const puppeteer = require('puppeteer');
browser = await puppeteer.launch(launch_args);
let launch_args = {
args: args.chromeFlags || this.chromeFlags,
headless: args.headless || this.headless,
};
console.log('Loaded custom function get_browser()');
this.browser = await puppeteer.launch(launch_args);
console.log('Loaded custom function get_browser()');
return browser;
}
return this.browser;
}
};

2
run.js
View File

@ -35,7 +35,7 @@ let config = {
// get_browser, handle_metadata, close_browser
// must be an absolute path to the module
//custom_func: resolve('examples/pluggable.js'),
custom_func: '',
custom_func: resolve('examples/pluggable.js'),
};
se_scraper.scrape(config, (err, response) => {

View File

@ -21,13 +21,13 @@ function write_results(fname, data) {
}
module.exports.handler = async function handler (config, context, callback) {
custom_func = null;
pluggable = null;
if (config.custom_func && fs.existsSync(config.custom_func)) {
try {
custom_func = require(config.custom_func);
Pluggable = require(config.custom_func);
pluggable = new Pluggable();
} catch (exception) {
console.error(exception);
}
}
@ -75,8 +75,9 @@ module.exports.handler = async function handler (config, context, callback) {
console.log("Chrome Args: ", launch_args);
}
if (custom_func) {
browser = await custom_func.get_browser(launch_args);
if (pluggable) {
launch_args.config = config;
browser = await pluggable.start_browser(launch_args);
} else {
browser = await puppeteer.launch(launch_args);
}
@ -126,8 +127,8 @@ module.exports.handler = async function handler (config, context, callback) {
metadata = await meta.get_metadata(browser);
}
if (custom_func) {
await custom_func.close_browser(browser);
if (pluggable) {
await pluggable.close_browser();
} else {
await browser.close();
}
@ -158,8 +159,8 @@ module.exports.handler = async function handler (config, context, callback) {
console.log(metadata);
}
if (custom_func) {
await custom_func.handle_metadata(metadata);
if (pluggable) {
await pluggable.handle_metadata({metadata: metadata, config: config});
}
}