pluggable is now class

This commit is contained in:
Nikolai Tschacher 2019-01-27 19:08:07 +01:00
parent 8e695626b6
commit 86a66a09fd
4 changed files with 51 additions and 43 deletions

View File

@ -1 +1 @@
{"scrapeulous.com":{"time":"Sun, 27 Jan 2019 14:51:54 GMT","num_results":"Ungefähr 169 Ergebnisse (0,23 Sekunden) ","no_results":false,"effective_query":"","results":[{"link":"https://scrapeulous.com/","title":"Scrapeulous","snippet":"Scrapeulous.com allows you to scrape various search engines automatically and in large quantities. Whether you need to analyze your competitors market ...","visible_link":"https://scrapeulous.com/","date":"","rank":1},{"link":"https://scrapeulous.com/about/","title":"About - Scrapeulous","snippet":"Scrapeulous.com allows you to scrape various search engines automatically and in large quantities. The business requirement to scrape information from ...","visible_link":"https://scrapeulous.com/about/","date":"","rank":2},{"link":"https://scrapeulous.com/contact/","title":"Contact - Scrapeulous","snippet":"Contact scrapeulous.com. Your email address. Valid email address where we are going to contact you. We will not send spam mail. Your inquiry.","visible_link":"https://scrapeulous.com/contact/","date":"","rank":3},{"link":"https://scrapeulous.com/howto/","title":"Howto - Scrapeulous","snippet":"We offer scraping large amounts of keywords for the Google Search Engine. Large means any number of keywords between 40 and 50000. Additionally, we ...","visible_link":"https://scrapeulous.com/howto/","date":"","rank":4},{"link":"https://incolumitas.com/","title":"Coding, Learning and Business Ideas","snippet":"About · Contact · GoogleScraper · Lichess Autoplay-Bot · Projects · Scrapeulous.com · Site Notice · SVGCaptcha · Home Archives Categories Tags Atom ...","visible_link":"https://incolumitas.com/","date":"","rank":5},{"link":"https://incolumitas.com/pages/scrapeulous/","title":"Coding, Learning and Business Ideas Scrapeulous.com - Incolumitas","snippet":"In autumn 2018, I created a scraping service called scrapeulous.com. There you can purchase scrape jobs that allow you to upload a keyword file which in turn ...","visible_link":"https://incolumitas.com/pages/scrapeulous/","date":"","rank":6},{"link":"https://www.youtube.com/watch?v=a6xn6rc9GbI","title":"scrapeulous intro - YouTube","snippet":"Introduction for https://scrapeulous.com.","visible_link":"https://www.youtube.com/watch?v=a6xn6rc9GbI","date":"","rank":7},{"link":"https://www.youtube.com/channel/UCJs1Xei5LRefg9GwFYdYhOw","title":"Scrapeulous Scrapeulous - YouTube","snippet":"How to use scrapeulous.com - Duration: 3 minutes, 42 seconds. 32 minutes ago; 4 views. Introduction for https://scrapeulous.com. Show more. This item has ...","visible_link":"https://www.youtube.com/.../UCJs1Xei5LRefg9GwFYdYhOw","date":"","rank":8},{"link":"https://readthedocs.org/projects/googlescraper/downloads/pdf/latest/","title":"GoogleScraper Documentation - Read the Docs","snippet":"23.12.2018 - 1.1 Scrapeulous.com - Scraping Service. GoogleScraper is a open source tool and will remain a open source tool in the future. Some people ...","visible_link":"https://readthedocs.org/projects/googlescraper/downloads/.../latest...","date":"23.12.2018 - ","rank":9}]}} {"scrapeulous.com":{"time":"Sun, 27 Jan 2019 18:07:33 GMT","num_results":"Ungefähr 101 Ergebnisse","no_results":false,"effective_query":"","results":[]}}

View File

@ -1,39 +1,46 @@
module.exports = { module.exports = class Pluggable {
get_browser: get_browser, constructor(options = {}) {
handle_metadata: handle_metadata, const {
close_browser: close_browser chromeFlags = [
}; '--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--disable-gpu',
'--window-size=1920x1080',
'--hide-scrollbars',
'--user-agent=Chrome',
],
userAgent = 'Chrome',
headless = true,
} = options;
async function close_browser(browser) { this.chromeFlags = chromeFlags;
await browser.close(); this.userAgent = userAgent;
} this.headless = headless;
async function handle_metadata() { this.chromeFlags.push(this.userAgent);
// silence }
}
async function get_browser(launch_args) { async close_browser() {
const puppeteer = require('puppeteer'); await this.browser.close();
}
const ADDITIONAL_CHROME_FLAGS = [ async handle_metadata(args) {
'--no-sandbox', // silence
'--disable-setuid-sandbox', }
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--disable-gpu',
'--window-size=1920x1080',
'--hide-scrollbars',
'--user-agent=Chrome',
];
let custom_args = { async start_browser(args={}) {
args: ADDITIONAL_CHROME_FLAGS, const puppeteer = require('puppeteer');
headless: true,
};
browser = await puppeteer.launch(launch_args); let launch_args = {
args: args.chromeFlags || this.chromeFlags,
headless: args.headless || this.headless,
};
console.log('Loaded custom function get_browser()'); this.browser = await puppeteer.launch(launch_args);
console.log('Loaded custom function get_browser()');
return browser; return this.browser;
} }
};

2
run.js
View File

@ -35,7 +35,7 @@ let config = {
// get_browser, handle_metadata, close_browser // get_browser, handle_metadata, close_browser
// must be an absolute path to the module // must be an absolute path to the module
//custom_func: resolve('examples/pluggable.js'), //custom_func: resolve('examples/pluggable.js'),
custom_func: '', custom_func: resolve('examples/pluggable.js'),
}; };
se_scraper.scrape(config, (err, response) => { se_scraper.scrape(config, (err, response) => {

View File

@ -21,13 +21,13 @@ function write_results(fname, data) {
} }
module.exports.handler = async function handler (config, context, callback) { module.exports.handler = async function handler (config, context, callback) {
pluggable = null;
custom_func = null;
if (config.custom_func && fs.existsSync(config.custom_func)) { if (config.custom_func && fs.existsSync(config.custom_func)) {
try { try {
custom_func = require(config.custom_func); Pluggable = require(config.custom_func);
pluggable = new Pluggable();
} catch (exception) { } catch (exception) {
console.error(exception);
} }
} }
@ -75,8 +75,9 @@ module.exports.handler = async function handler (config, context, callback) {
console.log("Chrome Args: ", launch_args); console.log("Chrome Args: ", launch_args);
} }
if (custom_func) { if (pluggable) {
browser = await custom_func.get_browser(launch_args); launch_args.config = config;
browser = await pluggable.start_browser(launch_args);
} else { } else {
browser = await puppeteer.launch(launch_args); browser = await puppeteer.launch(launch_args);
} }
@ -126,8 +127,8 @@ module.exports.handler = async function handler (config, context, callback) {
metadata = await meta.get_metadata(browser); metadata = await meta.get_metadata(browser);
} }
if (custom_func) { if (pluggable) {
await custom_func.close_browser(browser); await pluggable.close_browser();
} else { } else {
await browser.close(); await browser.close();
} }
@ -158,8 +159,8 @@ module.exports.handler = async function handler (config, context, callback) {
console.log(metadata); console.log(metadata);
} }
if (custom_func) { if (pluggable) {
await custom_func.handle_metadata(metadata); await pluggable.handle_metadata({metadata: metadata, config: config});
} }
} }