mirror of
https://github.com/NikolaiT/se-scraper.git
synced 2025-06-20 09:38:06 +02:00
pluggable is now class
This commit is contained in:
parent
8e695626b6
commit
86a66a09fd
@ -1 +1 @@
|
|||||||
{"scrapeulous.com":{"time":"Sun, 27 Jan 2019 14:51:54 GMT","num_results":"Ungefähr 169 Ergebnisse (0,23 Sekunden) ","no_results":false,"effective_query":"","results":[{"link":"https://scrapeulous.com/","title":"Scrapeulous","snippet":"Scrapeulous.com allows you to scrape various search engines automatically and in large quantities. Whether you need to analyze your competitors market ...","visible_link":"https://scrapeulous.com/","date":"","rank":1},{"link":"https://scrapeulous.com/about/","title":"About - Scrapeulous","snippet":"Scrapeulous.com allows you to scrape various search engines automatically and in large quantities. The business requirement to scrape information from ...","visible_link":"https://scrapeulous.com/about/","date":"","rank":2},{"link":"https://scrapeulous.com/contact/","title":"Contact - Scrapeulous","snippet":"Contact scrapeulous.com. Your email address. Valid email address where we are going to contact you. We will not send spam mail. Your inquiry.","visible_link":"https://scrapeulous.com/contact/","date":"","rank":3},{"link":"https://scrapeulous.com/howto/","title":"Howto - Scrapeulous","snippet":"We offer scraping large amounts of keywords for the Google Search Engine. Large means any number of keywords between 40 and 50000. Additionally, we ...","visible_link":"https://scrapeulous.com/howto/","date":"","rank":4},{"link":"https://incolumitas.com/","title":"Coding, Learning and Business Ideas","snippet":"About · Contact · GoogleScraper · Lichess Autoplay-Bot · Projects · Scrapeulous.com · Site Notice · SVGCaptcha · Home Archives Categories Tags Atom ...","visible_link":"https://incolumitas.com/","date":"","rank":5},{"link":"https://incolumitas.com/pages/scrapeulous/","title":"Coding, Learning and Business Ideas – Scrapeulous.com - Incolumitas","snippet":"In autumn 2018, I created a scraping service called scrapeulous.com. There you can purchase scrape jobs that allow you to upload a keyword file which in turn ...","visible_link":"https://incolumitas.com/pages/scrapeulous/","date":"","rank":6},{"link":"https://www.youtube.com/watch?v=a6xn6rc9GbI","title":"scrapeulous intro - YouTube","snippet":"Introduction for https://scrapeulous.com.","visible_link":"https://www.youtube.com/watch?v=a6xn6rc9GbI","date":"","rank":7},{"link":"https://www.youtube.com/channel/UCJs1Xei5LRefg9GwFYdYhOw","title":"Scrapeulous Scrapeulous - YouTube","snippet":"How to use scrapeulous.com - Duration: 3 minutes, 42 seconds. 32 minutes ago; 4 views. Introduction for https://scrapeulous.com. Show more. This item has ...","visible_link":"https://www.youtube.com/.../UCJs1Xei5LRefg9GwFYdYhOw","date":"","rank":8},{"link":"https://readthedocs.org/projects/googlescraper/downloads/pdf/latest/","title":"GoogleScraper Documentation - Read the Docs","snippet":"23.12.2018 - 1.1 Scrapeulous.com - Scraping Service. GoogleScraper is a open source tool and will remain a open source tool in the future. Some people ...","visible_link":"https://readthedocs.org/projects/googlescraper/downloads/.../latest...","date":"23.12.2018 - ","rank":9}]}}
|
{"scrapeulous.com":{"time":"Sun, 27 Jan 2019 18:07:33 GMT","num_results":"Ungefähr 101 Ergebnisse","no_results":false,"effective_query":"","results":[]}}
|
@ -1,39 +1,46 @@
|
|||||||
module.exports = {
|
module.exports = class Pluggable {
|
||||||
get_browser: get_browser,
|
constructor(options = {}) {
|
||||||
handle_metadata: handle_metadata,
|
const {
|
||||||
close_browser: close_browser
|
chromeFlags = [
|
||||||
};
|
'--no-sandbox',
|
||||||
|
'--disable-setuid-sandbox',
|
||||||
|
'--disable-dev-shm-usage',
|
||||||
|
'--disable-accelerated-2d-canvas',
|
||||||
|
'--disable-gpu',
|
||||||
|
'--window-size=1920x1080',
|
||||||
|
'--hide-scrollbars',
|
||||||
|
'--user-agent=Chrome',
|
||||||
|
],
|
||||||
|
userAgent = 'Chrome',
|
||||||
|
headless = true,
|
||||||
|
} = options;
|
||||||
|
|
||||||
async function close_browser(browser) {
|
this.chromeFlags = chromeFlags;
|
||||||
await browser.close();
|
this.userAgent = userAgent;
|
||||||
}
|
this.headless = headless;
|
||||||
|
|
||||||
async function handle_metadata() {
|
this.chromeFlags.push(this.userAgent);
|
||||||
// silence
|
}
|
||||||
}
|
|
||||||
|
|
||||||
async function get_browser(launch_args) {
|
async close_browser() {
|
||||||
const puppeteer = require('puppeteer');
|
await this.browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
const ADDITIONAL_CHROME_FLAGS = [
|
async handle_metadata(args) {
|
||||||
'--no-sandbox',
|
// silence
|
||||||
'--disable-setuid-sandbox',
|
}
|
||||||
'--disable-dev-shm-usage',
|
|
||||||
'--disable-accelerated-2d-canvas',
|
|
||||||
'--disable-gpu',
|
|
||||||
'--window-size=1920x1080',
|
|
||||||
'--hide-scrollbars',
|
|
||||||
'--user-agent=Chrome',
|
|
||||||
];
|
|
||||||
|
|
||||||
let custom_args = {
|
async start_browser(args={}) {
|
||||||
args: ADDITIONAL_CHROME_FLAGS,
|
const puppeteer = require('puppeteer');
|
||||||
headless: true,
|
|
||||||
};
|
|
||||||
|
|
||||||
browser = await puppeteer.launch(launch_args);
|
let launch_args = {
|
||||||
|
args: args.chromeFlags || this.chromeFlags,
|
||||||
|
headless: args.headless || this.headless,
|
||||||
|
};
|
||||||
|
|
||||||
console.log('Loaded custom function get_browser()');
|
this.browser = await puppeteer.launch(launch_args);
|
||||||
|
console.log('Loaded custom function get_browser()');
|
||||||
|
|
||||||
return browser;
|
return this.browser;
|
||||||
}
|
}
|
||||||
|
};
|
2
run.js
2
run.js
@ -35,7 +35,7 @@ let config = {
|
|||||||
// get_browser, handle_metadata, close_browser
|
// get_browser, handle_metadata, close_browser
|
||||||
// must be an absolute path to the module
|
// must be an absolute path to the module
|
||||||
//custom_func: resolve('examples/pluggable.js'),
|
//custom_func: resolve('examples/pluggable.js'),
|
||||||
custom_func: '',
|
custom_func: resolve('examples/pluggable.js'),
|
||||||
};
|
};
|
||||||
|
|
||||||
se_scraper.scrape(config, (err, response) => {
|
se_scraper.scrape(config, (err, response) => {
|
||||||
|
@ -21,13 +21,13 @@ function write_results(fname, data) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
module.exports.handler = async function handler (config, context, callback) {
|
module.exports.handler = async function handler (config, context, callback) {
|
||||||
|
pluggable = null;
|
||||||
custom_func = null;
|
|
||||||
if (config.custom_func && fs.existsSync(config.custom_func)) {
|
if (config.custom_func && fs.existsSync(config.custom_func)) {
|
||||||
try {
|
try {
|
||||||
custom_func = require(config.custom_func);
|
Pluggable = require(config.custom_func);
|
||||||
|
pluggable = new Pluggable();
|
||||||
} catch (exception) {
|
} catch (exception) {
|
||||||
|
console.error(exception);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -75,8 +75,9 @@ module.exports.handler = async function handler (config, context, callback) {
|
|||||||
console.log("Chrome Args: ", launch_args);
|
console.log("Chrome Args: ", launch_args);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (custom_func) {
|
if (pluggable) {
|
||||||
browser = await custom_func.get_browser(launch_args);
|
launch_args.config = config;
|
||||||
|
browser = await pluggable.start_browser(launch_args);
|
||||||
} else {
|
} else {
|
||||||
browser = await puppeteer.launch(launch_args);
|
browser = await puppeteer.launch(launch_args);
|
||||||
}
|
}
|
||||||
@ -126,8 +127,8 @@ module.exports.handler = async function handler (config, context, callback) {
|
|||||||
metadata = await meta.get_metadata(browser);
|
metadata = await meta.get_metadata(browser);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (custom_func) {
|
if (pluggable) {
|
||||||
await custom_func.close_browser(browser);
|
await pluggable.close_browser();
|
||||||
} else {
|
} else {
|
||||||
await browser.close();
|
await browser.close();
|
||||||
}
|
}
|
||||||
@ -158,8 +159,8 @@ module.exports.handler = async function handler (config, context, callback) {
|
|||||||
console.log(metadata);
|
console.log(metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (custom_func) {
|
if (pluggable) {
|
||||||
await custom_func.handle_metadata(metadata);
|
await pluggable.handle_metadata({metadata: metadata, config: config});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user