changed api big time

This commit is contained in:
Nikolai Tschacher 2019-06-11 18:16:59 +02:00
parent 3d69f4e249
commit 6825c97790
31 changed files with 2491 additions and 2536 deletions

View File

@ -39,6 +39,11 @@
- write test case and example for multiple tabs with bing
- make README.md nicer. https://github.com/thomasdondorf/puppeteer-cluster/blob/master/README.md as template
### 11.6.2019
- TODO: fix amazon scraping
- change api of remaining test cases
### TODO:
- fix duckduckgo test case!!!
- add test case for infospace

View File

@ -1,21 +1,25 @@
const se_scraper = require('./../index.js');
const se_scraper = require('./../src/node_scraper.js');
let config = {
headless: false,
search_engine: 'amazon',
debug: false,
verbose: false,
keywords: ['iphone', 'drone'],
num_pages: 1,
output_file: 'examples/results/amazon.json',
amazon_settings: {
amazon_domain: 'amazon.com',
}
};
(async () => {
let browser_config = {
headless: true,
debug_level: 1,
output_file: 'examples/results/amazon.json',
amazon_settings: {
amazon_domain: 'amazon.com',
}
};
function callback(err, response) {
if (err) { console.error(err) }
console.dir(response, {depth: null, colors: true});
}
let scrape_job = {
search_engine: 'amazon',
keywords: ['iphone', 'drone'],
num_pages: 1,
};
se_scraper.scrape(config, callback);
var scraper = new se_scraper.ScrapeManager(browser_config);
await scraper.start();
var results = await scraper.scrape(scrape_job);
console.dir(results, {depth: null, colors: true});
await scraper.quit();
})();

View File

@ -1,17 +1,21 @@
const se_scraper = require('./../index.js');
const se_scraper = require('./../src/node_scraper.js');
let config = {
search_engine: 'baidu',
debug: false,
verbose: false,
keywords: ['cat', 'mouse'],
num_pages: 2,
output_file: 'examples/results/baidu.json',
};
(async () => {
let browser_config = {
debug_level: 1,
output_file: 'examples/results/data.json',
};
function callback(err, response) {
if (err) { console.error(err) }
console.dir(response, {depth: null, colors: true});
}
let scrape_job = {
search_engine: 'baidu',
keywords: ['cat', 'mouse'],
num_pages: 1,
};
se_scraper.scrape(config, callback);
var scraper = new se_scraper.ScrapeManager(browser_config);
await scraper.start();
var results = await scraper.scrape(scrape_job);
console.dir(results, {depth: null, colors: true});
await scraper.quit();
})();

View File

@ -1,23 +1,27 @@
const se_scraper = require('./../index.js');
const se_scraper = require('./../src/node_scraper.js');
let config = {
search_engine: 'google_news_old',
debug: false,
verbose: true,
keywords: ['world news'],
num_pages: 1,
output_file: 'examples/results/gnold.json',
google_news_old_settings: {
gl: 'us', // The gl parameter determines the Google country to use for the query.
hl: 'fr', // The hl parameter determines the Google UI language to return results.
start: 0, // Determines the results offset to use, defaults to 0.
num: 100, // Determines the number of results to show, defaults to 10. Maximum is 100.
},
};
(async () => {
let browser_config = {
debug_level: 2,
output_file: 'examples/results/gnold.json',
google_news_old_settings: {
gl: 'us', // The gl parameter determines the Google country to use for the query.
hl: 'fr', // The hl parameter determines the Google UI language to return results.
start: 0, // Determines the results offset to use, defaults to 0.
num: 100, // Determines the number of results to show, defaults to 10. Maximum is 100.
},
};
function callback(err, response) {
if (err) { console.error(err) }
console.dir(response, {depth: null, colors: true});
}
let scrape_job = {
search_engine: 'google_news_old',
keywords: ['news world'],
num_pages: 1,
};
se_scraper.scrape(config, callback);
var scraper = new se_scraper.ScrapeManager(browser_config);
await scraper.start();
var results = await scraper.scrape(scrape_job);
console.dir(results, {depth: null, colors: true});
await scraper.quit();
})();

15
examples/minimal.js Normal file
View File

@ -0,0 +1,15 @@
const se_scraper = require('./../index.js');
(async () => {
let scrape_job = {
search_engine: 'google',
keywords: ['lets go boys'],
num_pages: 1,
};
var results = await se_scraper.scrape({}, scrape_job);
console.dir(results, {depth: null, colors: true});
})();

View File

@ -1,35 +1,30 @@
const se_scraper = require('../index.js');
const se_scraper = require('./../src/node_scraper.js');
async function multiple_search_engines() {
(async () => {
let browser_config = {
random_user_agent: true,
write_meta_data: true,
sleep_range: '[1,1]',
debug_level: 1,
headless: true,
output_file: `multiple_search_engines.json`
};
var searchEnginesList = ['google', 'bing'];
let scrape_job = {
search_engine: 'google',
keywords: ['news', 'se-scraper'],
num_pages: 1,
};
for (let index = 0; index < searchEnginesList.length; index++) {
const searchEngine = searchEnginesList[index];
let config = {
random_user_agent: true,
write_meta_data: true,
sleep_range: '[1,1]',
search_engine: searchEngine,
debug: false,
verbose: false,
// the list of keywords to scrape
keywords: ['scrapeulous.com',],
// whether to start the browser in headless mode
headless: true,
output_file: `${searchEngine}.json`
};
var scraper = new se_scraper.ScrapeManager(browser_config);
await scraper.start();
await se_scraper.scrape(config, (err, response) => {
if (err) {
console.error(err)
}
console.dir(response.results, {
depth: null,
colors: true
});
});
for (var se of ['google', 'bing']) {
scrape_job.search_engine = se;
var results = await scraper.scrape(scrape_job);
console.dir(results, {depth: null, colors: true});
}
}
multiple_search_engines();
await scraper.quit();
})();

View File

@ -1,19 +1,23 @@
const se_scraper = require('./../index.js');
const se_scraper = require('./../src/node_scraper.js');
let config = {
search_engine: 'google',
debug: false,
verbose: true,
keywords: ['news', 'scrapeulous.com', 'incolumitas.com', 'i work too much', 'what to do?', 'javascript is hard'],
num_pages: 1,
output_file: 'examples/results/proxyresults.json',
proxy_file: '/home/nikolai/.proxies', // one proxy per line
log_ip_address: false,
};
(async () => {
let browser_config = {
debug_level: 1,
output_file: 'examples/results/proxyresults.json',
proxy_file: '/home/nikolai/.proxies', // one proxy per line
log_ip_address: true,
};
function callback(err, response) {
if (err) { console.error(err) }
//console.dir(response, {depth: null, colors: true});
}
let scrape_job = {
search_engine: 'google',
keywords: ['news', 'scrapeulous.com', 'incolumitas.com', 'i work too much', 'what to do?', 'javascript is hard'],
num_pages: 1,
};
se_scraper.scrape(config, callback);
var scraper = new se_scraper.ScrapeManager(browser_config);
await scraper.start();
var results = await scraper.scrape(scrape_job);
console.dir(results, {depth: null, colors: true});
await scraper.quit();
})();

View File

@ -1,17 +1,24 @@
const se_scraper = require('./../index.js');
const se_scraper = require('./../src/node_scraper.js');
let config = {
search_engine: 'google',
debug: false,
verbose: false,
keywords: ['news', 'se-scraper'],
num_pages: 1,
output_file: 'examples/results/data.json',
};
(async () => {
let browser_config = {
debug_level: 1,
output_file: 'examples/results/data.json',
};
function callback(err, response) {
if (err) { console.error(err) }
console.dir(response, {depth: null, colors: true});
}
let scrape_job = {
search_engine: 'google',
keywords: ['news', 'se-scraper'],
num_pages: 1,
};
se_scraper.scrape(config, callback);
var scraper = new se_scraper.ScrapeManager(browser_config);
await scraper.start();
var results = await scraper.scrape(scrape_job);
console.dir(results, {depth: null, colors: true});
await scraper.quit();
})();

View File

@ -1,352 +1,20 @@
{
"iphone": {
"1": {
"time": "Sun, 10 Mar 2019 19:02:01 GMT",
"num_results": "\n 1-16 of over 1,000 results for \"iphone\"\n \n \n\n\n\n\n\n\n \n \n \n \n \n \n \n \n Sort by:\n \n Featured\n \n Price: Low to High\n \n Price: High to Low\n \n Avg. Customer Review\n \n Newest Arrivals\n \n Sort by:Featured\n <span class=\"a-button a-button-base\"><span class=\"a-button-inner\"><input class=\"a-button-input\" type=\"submit\" value=\"Go\"><span class=\"a-button-text\" aria-hidden=\"true\">Go</span></span></span>\n \n\n\n ",
"time": "Tue, 11 Jun 2019 15:00:19 GMT",
"num_results": "\n 1-16 of over 1,000 results for \"iphone\"\n \n \n\n\n\n\n\n\n\n \n \n \n \n \n \n \n \n Sort by:\n \n Featured\n \n Price: Low to High\n \n Price: High to Low\n \n Avg. Customer Review\n \n Newest Arrivals\n \n Sort by:Featured\n <span class=\"a-button a-button-base\"><span class=\"a-button-inner\"><input class=\"a-button-input\" type=\"submit\" value=\"Go\"><span class=\"a-button-text\" aria-hidden=\"true\">Go</span></span></span>\n \n\n\n ",
"no_results": false,
"effective_query": "\"iphone\"",
"results": [
{
"image": "/gp/slredirect/picassoRedirect.html/ref=pa_sp_atf_aps_sr_pg1_1?ie=UTF8&adId=A0875484UWI8SLQ4J64Y&url=%2FBandolier-Natalie-Wallet-Compatible-iPhone%2Fdp%2FB079YDTRKV%2Fref%3Dsr_1_1_sspa%3Fkeywords%3Diphone%26qid%3D1552244519%26s%3Dgateway%26sr%3D8-1-spons%26psc%3D1&qualifier=1552244519&id=2869484585385390&widgetName=sp_atf",
"seller": "by Bandolier",
"link": "/gp/slredirect/picassoRedirect.html/ref=pa_sp_atf_aps_sr_pg1_1?ie=UTF8&adId=A0875484UWI8SLQ4J64Y&url=%2FBandolier-Natalie-Wallet-Compatible-iPhone%2Fdp%2FB079YDTRKV%2Fref%3Dsr_1_1_sspa%3Fkeywords%3Diphone%26qid%3D1552244519%26s%3Dgateway%26sr%3D8-1-spons%26psc%3D1&qualifier=1552244519&id=2869484585385390&widgetName=sp_atf",
"title": "Bandolier [Natalie] Phone Case with Strap & Wallet Compatible w/iPhone 8 +, 7 + & 6 + - Gold Details & Crossbody Leather Shoulder Purse Belt. Handsfree Carrying Hard Cover. Travel Friendly Accessory",
"stars": "3.8 out of 5 stars",
"num_reviews": "6",
"price": "$98.00",
"oldprice": "",
"rank": 1
},
{
"image": "/Apple-iPhone-XR-64GB-PRODUCT/dp/B07K97BQDF/ref=sr_1_2?keywords=iphone&qid=1552244519&s=gateway&sr=8-2",
"seller": "by Apple",
"link": "/Apple-iPhone-XR-64GB-PRODUCT/dp/B07K97BQDF/ref=sr_1_2?keywords=iphone&qid=1552244519&s=gateway&sr=8-2",
"title": "Apple iPhone XR (64GB) - (PRODUCT)RED [Locked to Simple Mobile Prepaid]",
"stars": "3.0 out of 5 stars",
"num_reviews": "7",
"price": "$749.99",
"oldprice": "",
"rank": 2
},
{
"image": "/Apple-iPhone-64GB-Silver-Prepaid/dp/B078HVJB69/ref=sr_1_3?keywords=iphone&qid=1552244519&s=gateway&sr=8-3",
"seller": "by Apple",
"link": "/Apple-iPhone-64GB-Silver-Prepaid/dp/B078HVJB69/ref=sr_1_3?keywords=iphone&qid=1552244519&s=gateway&sr=8-3",
"title": "Apple iPhone X (64GB) - Silver [Locked to Simple Mobile Prepaid]",
"stars": "3.4 out of 5 stars",
"num_reviews": "3",
"price": "$899.00",
"oldprice": "",
"rank": 3
},
{
"image": "/Apple-iPhone-Silver-Locked-Prepaid/dp/B076MP43X5/ref=sr_1_4?keywords=iphone&qid=1552244519&s=gateway&sr=8-4",
"seller": "by Apple",
"link": "/Apple-iPhone-Silver-Locked-Prepaid/dp/B076MP43X5/ref=sr_1_4?keywords=iphone&qid=1552244519&s=gateway&sr=8-4",
"title": "Apple iPhone 8 (64GB) - Silver [Locked to Simple Mobile Prepaid]",
"stars": "2.4 out of 5 stars",
"num_reviews": "3",
"price": "$599.99",
"oldprice": "",
"rank": 4
},
{
"image": "/Apple-iPhone-Plus-Unlocked-Version/dp/B01LY5U2X3/ref=sr_1_5?keywords=iphone&qid=1552244519&s=gateway&sr=8-5",
"seller": "by Apple",
"link": "/Apple-iPhone-Plus-Unlocked-Version/dp/B01LY5U2X3/ref=sr_1_5?keywords=iphone&qid=1552244519&s=gateway&sr=8-5",
"title": "Apple iPhone 7 Plus (32GB) - Silver [Locked to Simple Mobile Prepaid]",
"stars": "3.1 out of 5 stars",
"num_reviews": "132",
"price": "$399.99$569.99",
"oldprice": "$569.99$569.99",
"rank": 5
},
{
"image": "/Apple-iPhone-32GB-Black-Prepaid/dp/B01N2K14U7/ref=sr_1_6?keywords=iphone&qid=1552244519&s=gateway&sr=8-6",
"seller": "by Apple",
"link": "/Apple-iPhone-32GB-Black-Prepaid/dp/B01N2K14U7/ref=sr_1_6?keywords=iphone&qid=1552244519&s=gateway&sr=8-6",
"title": "Apple iPhone 7 (32GB) - Black - [Locked to Simple Mobile Prepaid]",
"stars": "3.2 out of 5 stars",
"num_reviews": "32",
"price": "$299.99$449.99",
"oldprice": "$449.99$449.99",
"rank": 6
},
{
"image": "/Apple-iPhone-6S-Unlocked-Refurbished/dp/B0731JJCRZ/ref=sr_1_7?keywords=iphone&qid=1552244519&s=gateway&sr=8-7",
"seller": "by Apple",
"link": "/Apple-iPhone-6S-Unlocked-Refurbished/dp/B0731JJCRZ/ref=sr_1_7?keywords=iphone&qid=1552244519&s=gateway&sr=8-7",
"title": "Apple iPhone 6S - 32GB GSM Unlocked - (Rose Gold) (Refurbished)",
"stars": "3.8 out of 5 stars",
"num_reviews": "3,966",
"price": "$174.97",
"oldprice": "",
"rank": 7
},
{
"image": "/Apple-iPhone-Fully-Unlocked-64GB/dp/B06XRHJWNC/ref=sr_1_8?keywords=iphone&qid=1552244519&s=gateway&sr=8-8",
"seller": "by Apple",
"link": "/Apple-iPhone-Fully-Unlocked-64GB/dp/B06XRHJWNC/ref=sr_1_8?keywords=iphone&qid=1552244519&s=gateway&sr=8-8",
"title": "Apple iPhone 6S, Fully Unlocked, 64GB - Silver (Refurbished)",
"stars": "3.5 out of 5 stars",
"num_reviews": "385",
"price": "$204.83",
"oldprice": "",
"rank": 8
},
{
"image": "/Apple-iPhone-GSM-Unlocked-16GB/dp/B00YD547Q6/ref=sr_1_9?keywords=iphone&qid=1552244519&s=gateway&sr=8-9",
"seller": "by Apple",
"link": "/Apple-iPhone-GSM-Unlocked-16GB/dp/B00YD547Q6/ref=sr_1_9?keywords=iphone&qid=1552244519&s=gateway&sr=8-9",
"title": "Apple iPhone 6, GSM Unlocked, 16GB - Space Gray (Refurbished)",
"stars": "3.0 out of 5 stars",
"num_reviews": "2,509",
"price": "$149.99$194.99",
"oldprice": "$194.99$194.99",
"rank": 9
},
{
"image": "/gp/slredirect/picassoRedirect.html/ref=pa_sp_mtf_aps_sr_pg1_1?ie=UTF8&adId=A0145807DUMZAO43XNKF&url=%2FCharger-ONTWIE-Qualcomm-Certified-Compatible%2Fdp%2FB07KKD4832%2Fref%3Dsr_1_10_sspa%3Fkeywords%3Diphone%26qid%3D1552244519%26s%3Dgateway%26sr%3D8-10-spons%26psc%3D1&qualifier=1552244519&id=2869484585385390&widgetName=sp_mtf",
"seller": "by ONTWIE",
"link": "/gp/slredirect/picassoRedirect.html/ref=pa_sp_mtf_aps_sr_pg1_1?ie=UTF8&adId=A0145807DUMZAO43XNKF&url=%2FCharger-ONTWIE-Qualcomm-Certified-Compatible%2Fdp%2FB07KKD4832%2Fref%3Dsr_1_10_sspa%3Fkeywords%3Diphone%26qid%3D1552244519%26s%3Dgateway%26sr%3D8-10-spons%26psc%3D1&qualifier=1552244519&id=2869484585385390&widgetName=sp_mtf",
"title": "USB Wall Charger Quick Charge 3.0, ONTWIE Qualcomm Certified 18W QC 3.0 Charger Adapter, UL Certified Travel Adapte Compatible iPhone XS/X/8/7/6/Plus/iPad, Samsung, LG, Nexus, HTC and More",
"stars": "4.5 out of 5 stars",
"num_reviews": "6",
"price": "$12.19",
"oldprice": "",
"rank": 10
},
{
"image": "/gp/slredirect/picassoRedirect.html/ref=pa_sp_mtf_aps_sr_pg1_2?ie=UTF8&adId=A03784982YI18C25BT179&url=%2FHeadphone-Adapter-Splitter-Earphone-Connector%2Fdp%2FB07P11PYPH%2Fref%3Dsr_1_11_sspa%3Fkeywords%3Diphone%26qid%3D1552244519%26s%3Dgateway%26sr%3D8-11-spons%26psc%3D1&qualifier=1552244519&id=2869484585385390&widgetName=sp_mtf",
"seller": "by Alcoco",
"link": "/gp/slredirect/picassoRedirect.html/ref=pa_sp_mtf_aps_sr_pg1_2?ie=UTF8&adId=A03784982YI18C25BT179&url=%2FHeadphone-Adapter-Splitter-Earphone-Connector%2Fdp%2FB07P11PYPH%2Fref%3Dsr_1_11_sspa%3Fkeywords%3Diphone%26qid%3D1552244519%26s%3Dgateway%26sr%3D8-11-spons%26psc%3D1&qualifier=1552244519&id=2869484585385390&widgetName=sp_mtf",
"title": "Headphone Adapter for iPhone 8 3.5mm Splitter Jack Dongle Earphone Cable Charge and Aux Audio Connector for iPhone X/Xs/XS max/8/8 Plus/7/7 Plus 2 in 1 Headphone for Music and Charge Support iOS 12",
"stars": "4.8 out of 5 stars",
"num_reviews": "1,009",
"price": "$9.89",
"oldprice": "",
"rank": 11
},
{
"image": "/Apple-iPhone-Fully-Unlocked-32GB/dp/B01NAW98VS/ref=sr_1_12?keywords=iphone&qid=1552244519&s=gateway&sr=8-12",
"seller": "by Apple",
"link": "/Apple-iPhone-Fully-Unlocked-32GB/dp/B01NAW98VS/ref=sr_1_12?keywords=iphone&qid=1552244519&s=gateway&sr=8-12",
"title": "Apple iPhone 7, Fully Unlocked, 32GB - Gold (Refurbished)",
"stars": "3.5 out of 5 stars",
"num_reviews": "298",
"price": "$265.83$349.99",
"oldprice": "$349.99$349.99",
"rank": 12
},
{
"image": "/Apple-iPhone-GSM-Unlocked-16GB/dp/B0774T8DC6/ref=sr_1_13?keywords=iphone&qid=1552244519&s=gateway&sr=8-13",
"seller": "by Apple",
"link": "/Apple-iPhone-GSM-Unlocked-16GB/dp/B0774T8DC6/ref=sr_1_13?keywords=iphone&qid=1552244519&s=gateway&sr=8-13",
"title": "Apple iPhone SE, GSM Unlocked, 16GB - Space Gray (Refurbished)",
"stars": "3.9 out of 5 stars",
"num_reviews": "636",
"price": "$134.00",
"oldprice": "",
"rank": 13
},
{
"image": "/Apple-iPhone-GSM-Unlocked-256GB/dp/B07753NSQZ/ref=sr_1_14?keywords=iphone&qid=1552244519&s=gateway&sr=8-14",
"seller": "by Apple",
"link": "/Apple-iPhone-GSM-Unlocked-256GB/dp/B07753NSQZ/ref=sr_1_14?keywords=iphone&qid=1552244519&s=gateway&sr=8-14",
"title": "Apple iPhone 8, GSM Unlocked, 256GB - Space Gray (Refurbished)",
"stars": "3.9 out of 5 stars",
"num_reviews": "44",
"price": "$529.99",
"oldprice": "",
"rank": 14
},
{
"image": "/Apple-iPhone-Plus-Unlocked-16GB/dp/B00YD54J8W/ref=sr_1_15?keywords=iphone&qid=1552244519&s=gateway&sr=8-15",
"seller": "by Apple",
"link": "/Apple-iPhone-Plus-Unlocked-16GB/dp/B00YD54J8W/ref=sr_1_15?keywords=iphone&qid=1552244519&s=gateway&sr=8-15",
"title": "Apple iPhone 6 Plus, GSM Unlocked, 16GB - Space Gray (Refurbished)",
"stars": "3.3 out of 5 stars",
"num_reviews": "1,408",
"price": "$190.00",
"oldprice": "",
"rank": 15
},
{
"image": "/Apple-iPhone-GSM-Unlocked-64GB/dp/B014Z8HDWU/ref=sr_1_16?keywords=iphone&qid=1552244519&s=gateway&sr=8-16",
"seller": "by Apple",
"link": "/Apple-iPhone-GSM-Unlocked-64GB/dp/B014Z8HDWU/ref=sr_1_16?keywords=iphone&qid=1552244519&s=gateway&sr=8-16",
"title": "Apple iPhone 6, GSM Unlocked, 64GB - Space Gray (Refurbished)",
"stars": "2.9 out of 5 stars",
"num_reviews": "968",
"price": "$156.94",
"oldprice": "",
"rank": 16
},
{
"image": "/Apple-iPhone-GSM-Unlocked-128GB/dp/B01N9YO1DS/ref=sr_1_18?keywords=iphone&qid=1552244519&s=gateway&sr=8-18",
"seller": "by Apple",
"link": "/Apple-iPhone-GSM-Unlocked-128GB/dp/B01N9YO1DS/ref=sr_1_18?keywords=iphone&qid=1552244519&s=gateway&sr=8-18",
"title": "Apple iPhone 7, GSM Unlocked, 128GB - Gold (Refurbished)",
"stars": "3.5 out of 5 stars",
"num_reviews": "623",
"price": "$309.89$399.99",
"oldprice": "$399.99$399.99",
"rank": 17
},
{
"image": "/Apple-iPhone-Fully-Unlocked-16GB/dp/B06XRG6S73/ref=sr_1_19?keywords=iphone&qid=1552244519&s=gateway&sr=8-19",
"seller": "by Apple",
"link": "/Apple-iPhone-Fully-Unlocked-16GB/dp/B06XRG6S73/ref=sr_1_19?keywords=iphone&qid=1552244519&s=gateway&sr=8-19",
"title": "Apple iPhone 6S, Fully Unlocked, 16GB - Rose Gold (Refurbished)",
"stars": "3.9 out of 5 stars",
"num_reviews": "205",
"price": "$168.88",
"oldprice": "",
"rank": 18
},
{
"image": "/gp/slredirect/picassoRedirect.html/ref=pa_sp_btf_aps_sr_pg1_1?ie=UTF8&adId=A058166516UYKTSX41DBD&url=%2FBluetooth-Headphones-Retractable-Sweat-Proof-Earphones%2Fdp%2FB07K197VKQ%2Fref%3Dsr_1_20_sspa%3Fkeywords%3Diphone%26qid%3D1552244519%26s%3Dgateway%26sr%3D8-20-spons%26psc%3D1&qualifier=1552244519&id=2869484585385390&widgetName=sp_btf",
"seller": "by ZSW Tech",
"link": "/gp/slredirect/picassoRedirect.html/ref=pa_sp_btf_aps_sr_pg1_1?ie=UTF8&adId=A058166516UYKTSX41DBD&url=%2FBluetooth-Headphones-Retractable-Sweat-Proof-Earphones%2Fdp%2FB07K197VKQ%2Fref%3Dsr_1_20_sspa%3Fkeywords%3Diphone%26qid%3D1552244519%26s%3Dgateway%26sr%3D8-20-spons%26psc%3D1&qualifier=1552244519&id=2869484585385390&widgetName=sp_btf",
"title": "Bluetooth Headphones, Wireless Neckband Earbuds Retractable Headset Stereo Sweat-Proof Sports Earphones with Mic for iPhone X/8/7/6, Android and Other Bluetooth Devices (Rose Gold)",
"stars": "3.9 out of 5 stars",
"num_reviews": "17",
"price": "$21.99",
"oldprice": "",
"rank": 19
},
{
"image": "/gp/slredirect/picassoRedirect.html/ref=pa_sp_btf_aps_sr_pg1_2?ie=UTF8&adId=A00322032TZ6RHIDCJFI7&url=%2FCancelling-Headphone-Bluetooth-Headphones-Microphone%2Fdp%2FB077YG22Y9%2Fref%3Dsr_1_21_sspa%3Fkeywords%3Diphone%26qid%3D1552244519%26s%3Dgateway%26sr%3D8-21-spons%26psc%3D1&qualifier=1552244519&id=2869484585385390&widgetName=sp_btf",
"seller": "by COWIN",
"link": "/gp/slredirect/picassoRedirect.html/ref=pa_sp_btf_aps_sr_pg1_2?ie=UTF8&adId=A00322032TZ6RHIDCJFI7&url=%2FCancelling-Headphone-Bluetooth-Headphones-Microphone%2Fdp%2FB077YG22Y9%2Fref%3Dsr_1_21_sspa%3Fkeywords%3Diphone%26qid%3D1552244519%26s%3Dgateway%26sr%3D8-21-spons%26psc%3D1&qualifier=1552244519&id=2869484585385390&widgetName=sp_btf",
"title": "COWIN E7 Pro [2018 Upgraded] Active Noise Cancelling Headphone Bluetooth Headphones Microphone Hi-Fi Deep Bass Wireless Headphones Over Ear 30H Playtime Travel Work TV Computer Phone - Black",
"stars": "4.3 out of 5 stars",
"num_reviews": "2,580",
"price": "$89.99",
"oldprice": "",
"rank": 20
},
{
"image": "/gp/slredirect/picassoRedirect.html/ref=pa_sp_btf_aps_sr_pg1_3?ie=UTF8&adId=A05897013R5R41MQFZSIF&url=%2FBluetooth-Headphones-Retractable-Sweat-Proof-Earphones%2Fdp%2FB07K166QXV%2Fref%3Dsr_1_22_sspa%3Fkeywords%3Diphone%26qid%3D1552244519%26s%3Dgateway%26sr%3D8-22-spons%26psc%3D1&qualifier=1552244519&id=2869484585385390&widgetName=sp_btf",
"seller": "by ZSW Tech",
"link": "/gp/slredirect/picassoRedirect.html/ref=pa_sp_btf_aps_sr_pg1_3?ie=UTF8&adId=A05897013R5R41MQFZSIF&url=%2FBluetooth-Headphones-Retractable-Sweat-Proof-Earphones%2Fdp%2FB07K166QXV%2Fref%3Dsr_1_22_sspa%3Fkeywords%3Diphone%26qid%3D1552244519%26s%3Dgateway%26sr%3D8-22-spons%26psc%3D1&qualifier=1552244519&id=2869484585385390&widgetName=sp_btf",
"title": "Bluetooth Headphones, Wireless Neckband Earbuds Retractable Headset Stereo Sweat-Proof Sports Earphones with Mic for iPhone X/8/7/6, Android and Other Bluetooth Devices (Black)",
"stars": "3.6 out of 5 stars",
"num_reviews": "15",
"price": "$21.99",
"oldprice": "",
"rank": 21
}
]
"results": []
}
},
"drone": {
"1": {
"time": "Sun, 10 Mar 2019 19:02:02 GMT",
"num_results": "\n 1-48 of over 50,000 results for \"drone\"\n \n \n\n\n\n\n\n\n \n \n \n \n \n \n \n \n Sort by:\n \n Featured\n \n Price: Low to High\n \n Price: High to Low\n \n Avg. Customer Review\n \n Newest Arrivals\n \n Sort by:Featured\n <span class=\"a-button a-button-base\"><span class=\"a-button-inner\"><input class=\"a-button-input\" type=\"submit\" value=\"Go\"><span class=\"a-button-text\" aria-hidden=\"true\">Go</span></span></span>\n \n\n\n ",
"time": "Tue, 11 Jun 2019 15:00:21 GMT",
"num_results": "\n 1-48 of over 50,000 results for \"drone\"\n \n \n\n\n\n\n\n\n\n \n \n \n \n \n \n \n \n Sort by:\n \n Featured\n \n Price: Low to High\n \n Price: High to Low\n \n Avg. Customer Review\n \n Newest Arrivals\n \n Sort by:Featured\n <span class=\"a-button a-button-base\"><span class=\"a-button-inner\"><input class=\"a-button-input\" type=\"submit\" value=\"Go\"><span class=\"a-button-text\" aria-hidden=\"true\">Go</span></span></span>\n \n\n\n ",
"no_results": false,
"effective_query": "\"drone\"",
"results": [
{
"image": "/gp/slredirect/picassoRedirect.html/ref=pa_sp_phone_search_atf_aps_sr_pg1_1?ie=UTF8&adId=A07746892PFE2ZAVP8GLM&url=%2FHS120D-Quadcotper-Helicopter-Beginners-Functions%2Fdp%2FB07GTJ31ZM%2Fref%3Dsr_1_1_sspa%3Fkeywords%3Ddrone%26qid%3D1552244521%26s%3Dgateway%26sr%3D8-1-spons%26psc%3D1&qualifier=1552244521&id=7643069347457012&widgetName=sp_phone_search_atf",
"seller": "by DEERC",
"link": "/gp/slredirect/picassoRedirect.html/ref=pa_sp_phone_search_atf_aps_sr_pg1_1?ie=UTF8&adId=A07746892PFE2ZAVP8GLM&url=%2FHS120D-Quadcotper-Helicopter-Beginners-Functions%2Fdp%2FB07GTJ31ZM%2Fref%3Dsr_1_1_sspa%3Fkeywords%3Ddrone%26qid%3D1552244521%26s%3Dgateway%26sr%3D8-1-spons%26psc%3D1&qualifier=1552244521&id=7643069347457012&widgetName=sp_phone_search_atf",
"title": "Holy Stone HS120D FPV Drone with Camera for Adults 1080p HD Live Video and GPS Return Home, RC Quadcotper Helicopter for Kids Beginners 16 Min Flight Time Long Range with Follow Me Selfie Functions",
"stars": "4.9 out of 5 stars",
"num_reviews": "20",
"price": "$169.99",
"oldprice": "",
"rank": 1
},
{
"image": "/gp/slredirect/picassoRedirect.html/ref=pa_sp_phone_search_atf_aps_sr_pg1_2?ie=UTF8&adId=A07499583F006NLTR338J&url=%2FHoly-Stone-Quadcopter-Beginners-Intelligent%2Fdp%2FB07B6TZ575%2Fref%3Dsr_1_2_sspa%3Fkeywords%3Ddrone%26qid%3D1552244521%26s%3Dgateway%26sr%3D8-2-spons%26psc%3D1&qualifier=1552244521&id=7643069347457012&widgetName=sp_phone_search_atf",
"seller": "by Holy Stone",
"link": "/gp/slredirect/picassoRedirect.html/ref=pa_sp_phone_search_atf_aps_sr_pg1_2?ie=UTF8&adId=A07499583F006NLTR338J&url=%2FHoly-Stone-Quadcopter-Beginners-Intelligent%2Fdp%2FB07B6TZ575%2Fref%3Dsr_1_2_sspa%3Fkeywords%3Ddrone%26qid%3D1552244521%26s%3Dgateway%26sr%3D8-2-spons%26psc%3D1&qualifier=1552244521&id=7643069347457012&widgetName=sp_phone_search_atf",
"title": "Holy Stone HS100G Drone with 1080p FHD Camera 5G FPV Live Video and GPS Return Home Function RC Quadcopter for Beginners Kids Adults with Follow Me, Altitude Hold, Intelligent Battery",
"stars": "4.4 out of 5 stars",
"num_reviews": "225",
"price": "$249.99",
"oldprice": "",
"rank": 2
},
{
"image": "/Holy-Stone-Predator-Helicopter-Quadcopter/dp/B0157IHJMQ/ref=sr_1_3?keywords=drone&qid=1552244521&s=gateway&sr=8-3",
"seller": "by Holy Stone",
"link": "/Holy-Stone-Predator-Helicopter-Quadcopter/dp/B0157IHJMQ/ref=sr_1_3?keywords=drone&qid=1552244521&s=gateway&sr=8-3",
"title": "Holy Stone HS170 Predator Mini RC Helicopter Drone 2.4Ghz 6-Axis Gyro 4 Channels Quadcopter Good Choice for Drone Training",
"stars": "4.3 out of 5 stars",
"num_reviews": "5,003",
"price": "$35.99",
"oldprice": "",
"rank": 3
},
{
"image": "/TOPE-Wide-Angle-Quadcopter-Beginners-Brushless/dp/B07MTZPWL7/ref=sr_1_4?keywords=drone&qid=1552244521&s=gateway&sr=8-4",
"seller": "by TOPE",
"link": "/TOPE-Wide-Angle-Quadcopter-Beginners-Brushless/dp/B07MTZPWL7/ref=sr_1_4?keywords=drone&qid=1552244521&s=gateway&sr=8-4",
"title": "TOPE GPS FPV RC Drones with 1080P FHD Camera Live Video 150° Wide-Angle 5Ghz WiFi Quadcopter for Beginners Kids Adults with Follow Me,Brushless Motor,GPS Return Home and Foldable Arms, Black",
"stars": "4.0 out of 5 stars",
"num_reviews": "7",
"price": "$198.99",
"oldprice": "",
"rank": 4
},
{
"image": "/SNAPTAIN-Wide-Angle-Quadcopter-Altitude-Compatible/dp/B07GPNZSMY/ref=sr_1_5?keywords=drone&qid=1552244521&s=gateway&sr=8-5",
"seller": "by SNAPTAIN",
"link": "/SNAPTAIN-Wide-Angle-Quadcopter-Altitude-Compatible/dp/B07GPNZSMY/ref=sr_1_5?keywords=drone&qid=1552244521&s=gateway&sr=8-5",
"title": "SNAPTAIN S5C WiFi FPV Drone with 720P HD Camera,Voice Control, Wide-Angle Live Video RC Quadcopter with Altitude Hold, Gravity Sensor Function, RTF One Key Take Off/Landing, Compatible w/VR Headset",
"stars": "4.3 out of 5 stars",
"num_reviews": "462",
"price": "$74.99$159.99",
"oldprice": "$159.99$159.99",
"rank": 5
},
{
"image": "/SIMREX-Foldable-Quadcopter-Headless-Altitude/dp/B07HSQJ387/ref=sr_1_6?keywords=drone&qid=1552244521&s=gateway&sr=8-6",
"seller": "by SIMREX",
"link": "/SIMREX-Foldable-Quadcopter-Headless-Altitude/dp/B07HSQJ387/ref=sr_1_6?keywords=drone&qid=1552244521&s=gateway&sr=8-6",
"title": "SIMREX X300C 8816 Mini Drone with Camera WiFi HD FPV Foldable RC Quadcopter RTF 4CH 2.4Ghz Remote Control Headless [Altitude Hold] Super Easy Fly for Training - White",
"stars": "4.0 out of 5 stars",
"num_reviews": "160",
"price": "$39.99",
"oldprice": "",
"rank": 6
},
{
"image": "/Holy-Stone-Wide-Angle-Quadcopter-Altitude/dp/B078WKT1HL/ref=sr_1_7?keywords=drone&qid=1552244521&s=gateway&sr=8-7",
"seller": "by Holy Stone",
"link": "/Holy-Stone-Wide-Angle-Quadcopter-Altitude/dp/B078WKT1HL/ref=sr_1_7?keywords=drone&qid=1552244521&s=gateway&sr=8-7",
"title": "Holy Stone HS110D FPV RC Drone with 720P HD Camera Live Video 120° Wide-Angle WiFi Quadcopter with Altitude Hold Headless Mode 3D Flips RTF with Modular Battery, Color Black",
"stars": "4.5 out of 5 stars",
"num_reviews": "633",
"price": "$129.99",
"oldprice": "",
"rank": 7
},
{
"image": "/Quadcopter-Drone-Camera-EACHINE-Foldable/dp/B07HMWK4C2/ref=sr_1_8?keywords=drone&qid=1552244521&s=gateway&sr=8-8",
"seller": "by EACHINE",
"link": "/Quadcopter-Drone-Camera-EACHINE-Foldable/dp/B07HMWK4C2/ref=sr_1_8?keywords=drone&qid=1552244521&s=gateway&sr=8-8",
"title": "Quadcopter Drone With Camera Live Video, EACHINE E58 WiFi FPV Quadcopter with 120° FOV 720P HD Camera Foldable Drone RTF - Altitude Hold, One Key Take Off/Landing, 3D Flip, APP Control3Pcs Batteries",
"stars": "4.0 out of 5 stars",
"num_reviews": "72",
"price": "$92.99$100.00",
"oldprice": "$100.00$100.00",
"rank": 8
},
{
"image": "/DROCON-Portable-Quadcopter-Altitude-Beginners/dp/B07FCCGXDL/ref=sr_1_9?keywords=drone&qid=1552244521&s=gateway&sr=8-9",
"seller": "by DROCON",
"link": "/DROCON-Portable-Quadcopter-Altitude-Beginners/dp/B07FCCGXDL/ref=sr_1_9?keywords=drone&qid=1552244521&s=gateway&sr=8-9",
"title": "DROCON Mini RC Drone for Kids, Portable Pocket Quadcopter with Altitude Hold Mode, One-Key Take-Off & Landing, 3D Flips and Headless Mode, Easy to Fly for Beginners, Great Gift",
"stars": "4.3 out of 5 stars",
"num_reviews": "344",
"price": "$32.99$59.99",
"oldprice": "$59.99$59.99",
"rank": 9
}
]
"results": []
}
}
}

View File

@ -1,130 +1,288 @@
{
"cat": {
"1": {
"time": "Mon, 01 Apr 2019 13:18:15 GMT",
"time": "Tue, 11 Jun 2019 14:06:00 GMT",
"no_results": false,
"num_results": "百度为您找到相关结果约31,500,000个",
"effective_query": "",
"num_results": "43.100.000 Ergebnisse",
"results": [
{
"link": "http://www.baidu.com/link?url=avcUt9MXynjmbQR7BYlcLGQYKwEWNT2YnAme4J-nvpSug_6ehqEfL-NOly6gXzXjx7SFBDIrcR-vcyPYKHh5Lq",
"title": "cat_百度百科",
"snippet": "2017年7月30日 - CAT鞋也叫catfootwear。公司成立于1904年,出产工业制造工具和全世界闻名的CAT品牌各类休闲衣服与鞋业。CAT制造...",
"visible_link": "百度百科 - 百度快照",
"link": "https://www.cat.com/de_DE.html",
"title": "Cat | Produkte & Dienstleistungen | Caterpillar",
"snippet": "Diese Website verwendet \"Cookies\" und legt Sie auf Ihrem Computer ab, um die Benutzerfreundlichkeit der Website zu optimieren. Klicken Sie hier, um mehr über diese Cookies zu erfahren und allgemeine Informationen darüber zu erhalten, wie Sie Ihre Cookie-Einstellungen ändern können.",
"visible_link": "https://www.cat.com/de_DE.html",
"rank": 1
},
{
"link": "http://www.baidu.com/link?url=HjCnKN4wOfTizwb3nCOIX6ek-TMNX0giZzRerLJmmuNqVh7xJ7ziVfXx5-sJHuFc",
"title": "Cat | 亚太区 | Caterpillar",
"snippet": "CAT 实干成就梦想。卡特彼勒,全球实干家的强大伙伴。欢迎访问Cat (卡特) 官网,产品和服务价格查询中心。Cat是卡特彼勒公司旗舰品牌。产品涵盖:卡特挖掘机、卡特推土机...",
"visible_link": "https://www.cat.com/zh_...html  - 百度快照 - 36条评价",
"link": "https://www.cat.com/de_DE/products/new/equipment.html",
"title": "Cat | Maschinen | Caterpillar",
"snippet": "Unsere Produktpalette umfasst über 300 Maschinen, und wir verbessern und aktualisieren unser Produktangebot ständig, um auf Ihre wechselnden Bedürfnisse einzugehen.",
"visible_link": "https://www.cat.com/de_DE/products/new/equipment.html",
"rank": 2
},
{
"link": "http://www.baidu.com/link?url=kr1lCQKntgYzgSWM2FhLL4BUcTj1ISpnsjzxXMWTnKC",
"title": "Cat | global-selector | Caterpillar",
"snippet": "global-selector Caterpillar Worldwide Genuine enabler of sustainable world progress and opportunity, ...",
"visible_link": "https://www.cat.com/  - 百度快照 - 36条评价 - 翻译此页",
"link": "https://www.catphones.com/de-de/",
"title": "Cat phones: Rugged Phones",
"snippet": "Das Cat® S31 ist ein verlässliches Rugged Phone, das für den Einsatz in extremen Umgebungsbedingungen perfekt gerüstet ist: Stürze, Staub, Schmutz …",
"visible_link": "https://www.catphones.com/de-de",
"rank": 3
},
{
"link": "http://www.baidu.com/link?url=6_ipy_cKkyswOXxWARa3kf20yEV2VmXxH3scHlMeLsQ0hVvJjuLCP6IIYx_-gGMQ",
"title": "CAT - 京东",
"snippet": "京东JD.COM是国内专业的网上购物商城,为您提供CAT价格、CAT评论、CAT导购、CAT图片等相关信息",
"visible_link": "京东 - 百度快照",
"link": "https://de.wikipedia.org/wiki/CAT",
"title": "CAT Wikipedia",
"snippet": "Dies ist eine Begriffsklärungsseite zur Unterscheidung mehrerer mit demselben Wort bezeichneter Begriffe.",
"visible_link": "https://de.wikipedia.org/wiki/CAT",
"rank": 4
},
{
"link": "http://www.baidu.com/link?url=9coejP6ciBEc0jK3sM14almpjYzLhO9s0YZcN1VICTgyioKyftrowla7fv21bGN5nd0jerHWHBq66ED0tIAKv_",
"title": "Linux cat命令 | 菜鸟教程",
"snippet": "2019年3月6日 - Linux cat命令 Linux 命令大全 命令:cat cat 命令用于连接文件并打印到标准输出设备上。 使用权限 所有使用者 语法格式 cat [-AbeEnstTuv] [--help] [...",
"visible_link": "www.runoob.com/linux/l...  - 百度快照",
"link": "https://www.outdoor-handys.com/marke/cat",
"title": "CAT-Outdoor-Handy: 9 robuste Handys von Caterpillar im ...",
"snippet": "Über CAT. Der Grundstein für die Marke Caterpillar, kurz CAT, wurde 1925 mit Caterpillar Truck Company gelegt. Sie wurde von Benjamin Holt, der 1904 einen raupenartigen Traktor erfand, und Daniel Best, seinem größten Mitbewerber gegründet.",
"visible_link": "https://www.outdoor-handys.com/marke/cat",
"rank": 5
},
{
"link": "http://www.baidu.com/link?url=2vC0vVJSgAVojvV7XlPDZnrg3xRvOoWCx_aw8d1BUN6JSm7XdCyj_NVKf-4zdKMxXDDd4BsZykxal-ZcMs5OCxvBtWeVdSkjmPj4oKgj88K",
"title": "cat 分布式框架 - java零基础的外行人 - CSDN博客",
"snippet": "2017年12月27日 - CAT系统原型和理念来源于eBay的CAL的系统,CAT系统第一代设计者吴其敏在eBay工作长达十几年,对CAL系统有深刻...",
"visible_link": "CSDN博客号 - 百度快照",
"link": "https://www.zeppelin-cat.de/produkte/cat-bagger.html",
"title": "Cat Bagger - zeppelin-cat.de",
"snippet": "Cat Kettenbagger 13 bis 40 t. Die legendären Kettenbagger der Serie 300 mit toller Ausstattung und noch sparsamer. mehr",
"visible_link": "https://www.zeppelin-cat.de/produkte/cat-bagger.html",
"rank": 6
},
}
]
},
"2": {
"time": "Tue, 11 Jun 2019 14:06:02 GMT",
"no_results": false,
"effective_query": "",
"num_results": "7-16 von 43.100.000 Ergebnissen",
"results": [
{
"link": "http://www.baidu.com/link?url=3NzEhiI-CjzXMfiNv-0LSgzlOkmsAzK7v7J9lUNnSp-J9nkA60KdO2oWujlda4NR",
"title": "Cat | 中东 | Caterpillar",
"snippet": "Cat 机器和发动机为我们所服务的行业树立了标准,我们广泛的产品线也体现了我们对客户的成功的日益重视",
"visible_link": "https://www.cat.com/zh_...html  - 百度快照",
"link": "https://de.wikipedia.org/wiki/CAT",
"title": "CAT Wikipedia",
"snippet": "Dies ist eine Begriffsklärungsseite zur Unterscheidung mehrerer mit demselben Wort bezeichneter Begriffe.",
"visible_link": "https://de.wikipedia.org/wiki/CAT",
"rank": 7
},
{
"link": "http://www.baidu.com/link?url=fIHiCWZXnNgU_oFOKHgKZavP-TA1y-CGvtepweW6pc8LqPmPBl3zYzYmInj3QLBPXNFmcOBfL3mypg2bxxoXH_",
"title": "Linux指令之cat - Hubz131的博客 - CSDN博客",
"snippet": "2018年4月5日 - cat命令用于连接文件并打印到标准输出设备上。 语法: cat [-AbeEnstTuv] [--help] [--version] fileName参数: -n或--number:由1开始对所有输出的行数...",
"visible_link": "CSDN博客号 - 百度快照",
"link": "https://www.zeppelin-cat.de/produkte/cat-bagger.html",
"title": "Cat Bagger - zeppelin-cat.de",
"snippet": "Cat Kettenbagger 13 bis 40 t. Die legendären Kettenbagger der Serie 300 mit toller Ausstattung und noch sparsamer. mehr",
"visible_link": "https://www.zeppelin-cat.de/produkte/cat-bagger.html",
"rank": 8
},
{
"link": "https://www.outdoor-handys.com/marke/cat",
"title": "CAT-Outdoor-Handy: 9 robuste Handys von Caterpillar im ...",
"snippet": "Über CAT. Der Grundstein für die Marke Caterpillar, kurz CAT, wurde 1925 mit Caterpillar Truck Company gelegt. Sie wurde von Benjamin Holt, der 1904 einen raupenartigen Traktor erfand, und Daniel Best, seinem größten Mitbewerber gegründet.",
"visible_link": "https://www.outdoor-handys.com/marke/cat",
"rank": 9
},
{
"link": "https://www.cat-europe.com/de/",
"title": "Civil Aviation Training Europe PPL CPL ATPL EIR/CB-IR",
"snippet": "“Hallo liebes CAT Team. Ich erwerbe den ATPL und war letzte Woche bei der Prüfung. Das Ergebnis: Alle Fächer beim ersten Versuch bestanden! Ich möchte Euch danken, da die Vorbereitung echt klasse ist und ich mir das Lernen super einteilen konnte.",
"visible_link": "https://www.cat-europe.com/de",
"rank": 10
},
{
"link": "https://cat.eduroam.de/",
"title": "eduroam Configuration Assistant Tool",
"snippet": "Willkommen zu DFN eduroam CAT Diese Seite anzeigen in Български Català Čeština Deutsch Ελληνικά English(GB) Español Euskara Français Galego Hrvatski Italiano lietuvių Norsk Polski Slovenščina Srpski Suomi Magyar Português Slovenčina",
"visible_link": "https://cat.eduroam.de",
"rank": 11
},
{
"link": "http://www.cat-shop.ch/",
"title": "Startseite - Cat Shop",
"snippet": "Der Name Caterpillar steht nicht nur für die wohl bekanntesten Baumaschinen der Welt, Cat Schuhe und Kleider geniessen den selben Ruf. Wir haben für Sie eine Auswahl der beliebtesten Caterpillar-Artikel zusammengestellt.",
"visible_link": "www.cat-shop.ch",
"rank": 12
},
{
"link": "https://de.wikipedia.org/wiki/Twisted-Pair-Kabel",
"title": "Twisted-Pair-Kabel Wikipedia",
"snippet": "Category 6 augmented (Cat 6 A bzw. Cat 6A) ist ein Standard, der aus dem erhöhten Bandbreitenbedarf von 10-Gigabit-Ethernet (10GBASE-T) resultiert, für Übertragungsfrequenzen bis 500 MHz und Strecken bis 100 m ausgelegt sowie abwärtskompatibel zu bestehenden Kategorien ist.",
"visible_link": "https://de.wikipedia.org/wiki/Twisted-Pair-Kabel",
"rank": 13
},
{
"link": "http://www.cat-meldorf.de/",
"title": "Centrum für Angewandte Technologien :: CAT Meldorf ...",
"snippet": "08. Mai 2019 INFO-VERANSTALTUNG EXISTENZGRÜNDUNG. Die nächste Info-Veranstaltung zum Thema Existenzgründung findet am Mittwoch, den 12. Juni von 09.00 15.00 Uhr im CAT in Meldorf zu verschiedenen Themen der Existenzgründung statt und ist kostenfrei.",
"visible_link": "www.cat-meldorf.de",
"rank": 14
},
{
"link": "https://www.ara.cat/",
"title": "Ara.cat - El diari líder en català amb l'última hora i ...",
"snippet": "Notícies, reportatges, vídeos i articles per informar-vos i formar-vos la vostra opinió",
"visible_link": "https://www.ara.cat",
"rank": 15
},
{
"link": "https://www.vilaweb.cat/",
"title": "vilaweb.cat - Diari digital líder en català. Última ...",
"snippet": "Diari digital independent en català. Notícies nacionals i internacionals, opinió, política, esports, cultura i economia dels Països Catalans. Vídeos, blocs i xarxes socials.",
"visible_link": "https://www.vilaweb.cat",
"rank": 16
}
]
}
},
"mouse": {
"1": {
"time": "Mon, 01 Apr 2019 13:18:15 GMT",
"time": "Tue, 11 Jun 2019 14:06:03 GMT",
"no_results": false,
"num_results": "百度为您找到相关结果约31,500,000个",
"effective_query": "",
"num_results": "134.000.000 Ergebnisse",
"results": [
{
"link": "http://www.baidu.com/link?url=avcUt9MXynjmbQR7BYlcLGQYKwEWNT2YnAme4J-nvpSug_6ehqEfL-NOly6gXzXjx7SFBDIrcR-vcyPYKHh5Lq",
"title": "cat_百度百科",
"snippet": "2017年7月30日 - CAT鞋也叫catfootwear。公司成立于1904年,出产工业制造工具和全世界闻名的CAT品牌各类休闲衣服与鞋业。CAT制造...",
"visible_link": "百度百科 - 百度快照",
"link": "https://www.logitech.com/de-de/mice",
"title": "Logitech Computermäuse für PC und Macintosh ...",
"snippet": "Wireless Mouse M545 Kompaktes Format für großen Komfort EUR 44.99 Vergleichen USB Unifying receiver USB-Empfänger für den Einsatz mit einer Unifying-Maus oder -Tastatur.",
"visible_link": "https://www.logitech.com/de-de/mice",
"rank": 1
},
{
"link": "http://www.baidu.com/link?url=HjCnKN4wOfTizwb3nCOIX6ek-TMNX0giZzRerLJmmuNqVh7xJ7ziVfXx5-sJHuFc",
"title": "Cat | 亚太区 | Caterpillar",
"snippet": "CAT 实干成就梦想。卡特彼勒,全球实干家的强大伙伴。欢迎访问Cat (卡特) 官网,产品和服务价格查询中心。Cat是卡特彼勒公司旗舰品牌。产品涵盖:卡特挖掘机、卡特推土机...",
"visible_link": "https://www.cat.com/zh_...html  - 百度快照 - 36条评价",
"link": "https://de.wikipedia.org/wiki/Mouse",
"title": "Mouse Wikipedia",
"snippet": "Mouse steht für: die englische Bezeichnung für ein Computereingabegerät, siehe Maus (Computer) Mouse (Manga), eine Manga-Serie; Mouse (Programmiersprache), eine Programmiersprache",
"visible_link": "https://de.wikipedia.org/wiki/Mouse",
"rank": 2
},
{
"link": "http://www.baidu.com/link?url=kr1lCQKntgYzgSWM2FhLL4BUcTj1ISpnsjzxXMWTnKC",
"title": "Cat | global-selector | Caterpillar",
"snippet": "global-selector Caterpillar Worldwide Genuine enabler of sustainable world progress and opportunity, ...",
"visible_link": "https://www.cat.com/  - 百度快照 - 36条评价 - 翻译此页",
"link": "https://www.chip.de/downloads/Move-Mouse_102198245.html",
"title": "Move Mouse - Download - CHIP",
"snippet": "08.01.2017 · \"Move Mouse\" ist ein nützliches kleines Tool, mit dem Sie automatisch Ihre Maus bewegen und dadurch beispielsweise die Aktivierung des Ruhezustands verhindern.",
"visible_link": "https://www.chip.de/downloads/Move-Mouse_102198245.html",
"rank": 3
},
{
"link": "http://www.baidu.com/link?url=6_ipy_cKkyswOXxWARa3kf20yEV2VmXxH3scHlMeLsQ0hVvJjuLCP6IIYx_-gGMQ",
"title": "CAT - 京东",
"snippet": "京东JD.COM是国内专业的网上购物商城,为您提供CAT价格、CAT评论、CAT导购、CAT图片等相关信息",
"visible_link": "京东 - 百度快照",
"link": "https://www.duden.de/rechtschreibung/Mouse",
"title": "Duden | Mouse | Rechtschreibung, Bedeutung, Definition ...",
"snippet": "Definition, Rechtschreibung, Synonyme und Grammatik von 'Mouse' auf Duden online nachschlagen. Wörterbuch der deutschen Sprache.",
"visible_link": "https://www.duden.de/rechtschreibung/Mouse",
"rank": 4
},
{
"link": "http://www.baidu.com/link?url=9coejP6ciBEc0jK3sM14almpjYzLhO9s0YZcN1VICTgyioKyftrowla7fv21bGN5nd0jerHWHBq66ED0tIAKv_",
"title": "Linux cat命令 | 菜鸟教程",
"snippet": "2019年3月6日 - Linux cat命令 Linux 命令大全 命令:cat cat 命令用于连接文件并打印到标准输出设备上。 使用权限 所有使用者 语法格式 cat [-AbeEnstTuv] [--help] [...",
"visible_link": "www.runoob.com/linux/l...  - 百度快照",
"link": "https://www.chip.de/downloads/Mouse-Recorder-Premium_77202175.html",
"title": "Mouse Recorder Premium - Download - CHIP",
"snippet": "12.02.2016 · Mouse Recorder Premium wurde zuletzt am 02.12.2016 aktualisiert und steht Ihnen hier in der Version 1.0.51 zum Download zur Verfügung.",
"visible_link": "https://www.chip.de/downloads/Mouse-Recorder-Premium_77202175.html",
"rank": 5
},
{
"link": "http://www.baidu.com/link?url=2vC0vVJSgAVojvV7XlPDZnrg3xRvOoWCx_aw8d1BUN6JSm7XdCyj_NVKf-4zdKMxXDDd4BsZykxal-ZcMs5OCxvBtWeVdSkjmPj4oKgj88K",
"title": "cat 分布式框架 - java零基础的外行人 - CSDN博客",
"snippet": "2017年12月27日 - CAT系统原型和理念来源于eBay的CAL的系统,CAT系统第一代设计者吴其敏在eBay工作长达十几年,对CAL系统有深刻...",
"visible_link": "CSDN博客号 - 百度快照",
"link": "https://www.mouse-sensitivity.com/",
"title": "Mouse Sensitivity | Same Aim - Different Game",
"snippet": "Version 7.7.a (May 30, 2019) - Added a copy button next to the sensitivity calculations so you can copy the entire sensitivity output without any formatting.",
"visible_link": "https://www.mouse-sensitivity.com",
"rank": 6
},
{
"link": "http://www.baidu.com/link?url=3NzEhiI-CjzXMfiNv-0LSgzlOkmsAzK7v7J9lUNnSp-J9nkA60KdO2oWujlda4NR",
"title": "Cat | 中东 | Caterpillar",
"snippet": "Cat 机器和发动机为我们所服务的行业树立了标准,我们广泛的产品线也体现了我们对客户的成功的日益重视",
"visible_link": "https://www.cat.com/zh_...html  - 百度快照",
"link": "https://de.wikipedia.org/wiki/Maus_%28Computer%29",
"title": "Maus (Computer) Wikipedia",
"snippet": "„Das eigentliche Kennzeichen der Lisa ist die Maus. Dieses kleine Handgerät, durch eine dünne Schnur mit dem Computer verbunden, ist Lisas Mensch/Maschine-Schnittstelle.",
"visible_link": "https://de.wikipedia.org/wiki/Maus_(Computer)",
"rank": 7
},
{
"link": "http://www.baidu.com/link?url=fIHiCWZXnNgU_oFOKHgKZavP-TA1y-CGvtepweW6pc8LqPmPBl3zYzYmInj3QLBPXNFmcOBfL3mypg2bxxoXH_",
"title": "Linux指令之cat - Hubz131的博客 - CSDN博客",
"snippet": "2018年4月5日 - cat命令用于连接文件并打印到标准输出设备上。 语法: cat [-AbeEnstTuv] [--help] [--version] fileName参数: -n或--number:由1开始对所有输出的行数...",
"visible_link": "CSDN博客号 - 百度快照",
"link": "https://www.roccat.org/",
"title": "ROCCAT® | Gaming Mice | RGB Keyboards | …",
"snippet": "At ROCCAT we focus on high-end design and development of gaming mice, headsets, keyboards and accessories. Designed in Germany.",
"visible_link": "https://www.roccat.org",
"rank": 8
},
{
"link": "https://www.microsoft.com/en-us/download/details.aspx?id=35460",
"title": "Download Microsoft Garage Mouse without …",
"snippet": "17.01.2018 · Mouse Without Borders is a Microsoft Garage project by Truong Do. Garage projects are side projects that Microsoft employees like Truong build for fun on their nights and weekends.",
"visible_link": "https://www.microsoft.com/en-us/download/details.aspx?id=35460",
"rank": 9
},
{
"link": "https://cookie.riimu.net/speed/",
"title": "Clicking Speed Test - Riimu's Cookie Clicker …",
"snippet": "Clicking Speed Test. Test how fast you can click the virtual virtual cookie. Cookies per click is based on what you've entered in the Optimizer.",
"visible_link": "https://cookie.riimu.net/speed",
"rank": 10
}
]
},
"2": {
"time": "Tue, 11 Jun 2019 14:06:05 GMT",
"no_results": false,
"effective_query": "",
"num_results": "11-20 von 134.000.000 Ergebnissen",
"results": [
{
"link": "https://www.wdrmaus.de/",
"title": "wdrmaus.de - Die Seite mit der Maus - WDR",
"snippet": "Entdecke die Seite der Sendung mit der Maus. Schaue dir Lach- und Sachgeschichten an, spiele spannende Spiele, entdecke Lustiges zum Basteln oder schöne Bilder zum Ausmalen., Die Sendung mit der Maus, WDR, Das Erste",
"visible_link": "https://www.wdrmaus.de",
"rank": 11
},
{
"link": "https://www.chip.de/downloads/Mouse-Recorder-Premium_77202175.html",
"title": "Mouse Recorder Premium - Download - CHIP",
"snippet": "Mouse Recorder Premium wurde zuletzt am 02.12.2016 aktualisiert und steht Ihnen hier in der Version 1.0.51 zum Download zur Verfügung.",
"visible_link": "https://www.chip.de/downloads/Mouse-Recorder-Premium_77202175.html",
"rank": 12
},
{
"link": "https://www.mous.co/",
"title": "Mous | Slim Protective iPhone & Samsung Cases …",
"snippet": "The Only Case You'll Ever Need. Protective iPhone & Samsung Galaxy phone cases with our thoroughly tested AiroShock™ Technology. Our cases come in real materials including Aramid Carbon Fibre, Leather Bamboo, Walnut and Shell. Free shipping within the USA.",
"visible_link": "https://www.mous.co",
"rank": 13
},
{
"link": "https://free-mouse-auto-clicker.de.uptodown.com/windows",
"title": "Free Mouse Auto Clicker 3.8.2 - Download auf Deutsch",
"snippet": "Free Mouse Auto Clicker ist eine Anwendung mit der man ganz einfach einstellen kann, dass nach einem bestimmten Zeitintervall ein Mausklick ausgeführt wird.",
"visible_link": "https://free-mouse-auto-clicker.de.uptodown.com/windows",
"rank": 14
},
{
"link": "https://www.lifewire.com/what-is-a-mouse-2618156",
"title": "What Is a Computer Mouse? - Lifewire",
"snippet": "The mouse is a computer input device used to move a cursor around a screen. The mouse buttons are used to interact with whatever is being pointed at.",
"visible_link": "https://www.lifewire.com/what-is-a-mouse-2618156",
"rank": 15
},
{
"link": "https://www.mouse-sensitivity.com/",
"title": "Mouse Sensitivity | Same Aim - Different Game",
"snippet": "Version 7.7.a (May 30, 2019) - Added a copy button next to the sensitivity calculations so you can copy the entire sensitivity output without any formatting.",
"visible_link": "https://www.mouse-sensitivity.com",
"rank": 16
},
{
"link": "https://www.roccat.org/",
"title": "ROCCAT® | Gaming Mice | RGB Keyboards | …",
"snippet": "At ROCCAT we focus on high-end design and development of gaming mice, headsets, keyboards and accessories. Designed in Germany.",
"visible_link": "https://www.roccat.org",
"rank": 17
},
{
"link": "https://www.microsoft.com/en-us/download/details.aspx?id=35460",
"title": "Download Microsoft Garage Mouse without …",
"snippet": "17.01.2018 · Mouse Without Borders is a Microsoft Garage project by Truong Do. Garage projects are side projects that Microsoft employees like Truong build for fun on their nights and weekends.",
"visible_link": "https://www.microsoft.com/en-us/download/details.aspx?id=35460",
"rank": 18
},
{
"link": "https://evoluent.com/",
"title": "Evoluent VerticalMouse Vertical Mouse …",
"snippet": "I just recently got to use the keyboard and mouse myself. Ive probably recommended the keyboard to about 7 or more clients in the past 2 weeks.",
"visible_link": "https://evoluent.com",
"rank": 19
},
{
"link": "https://cookie.riimu.net/speed/",
"title": "Clicking Speed Test - Riimu's Cookie Clicker …",
"snippet": "Clicking Speed Test. Test how fast you can click the virtual virtual cookie. Cookies per click is based on what you've entered in the Optimizer.",
"visible_link": "https://cookie.riimu.net/speed",
"rank": 20
}
]
}

View File

@ -1,8 +1,8 @@
{
"news": {
"1": {
"time": "Fri, 01 Mar 2019 15:04:34 GMT",
"num_results": "Ungefähr 13.620.000.000 Ergebnisse (0,45 Sekunden) ",
"time": "Tue, 11 Jun 2019 15:43:51 GMT",
"num_results": "Ungefähr 25.270.000.000 Ergebnisse (0,36 Sekunden) ",
"no_results": false,
"effective_query": "",
"results": [
@ -16,19 +16,27 @@
},
{
"link": "https://www.bild.de/news/startseite/news/news-16804530.bild.html",
"title": "News aktuell aus Deutschland und der Welt - Bild.dehttps://www.bild.de/news/startseite/news/news-16804530.bild.html",
"title": "News aktuell: Nachrichten aus Deutschland und der Welt - Bild.dehttps://www.bild.de/news/startseite/news/news-16804530.bild.html",
"snippet": "Aktuelle News aus Deutschland, Europa und der Welt. Alle Informationen, Bilder und Videos zu Skandalen, Krisen und Sensationen bei BILD.de.",
"visible_link": "https://www.bild.de/news/startseite/news/news-16804530.bild.html",
"date": "",
"rank": 2
},
{
"link": "https://news.google.com/topics/CAAqJggKIiBDQkFTRWdvSUwyMHZNRFZxYUdjU0FtUmxHZ0pFUlNnQVAB?hl=de&gl=DE&ceid=DE%3Ade",
"title": "Schlagzeilen - Neueste - Google Newshttps://news.google.com/.../CAAqJggKIiBDQkFTRWdvSUwyMHZNRFZxYUdjU0FtUm...",
"snippet": "Mit Google News kannst du zum Thema Schlagzeilen vollständige Artikel lesen, Videos ansehen und in Tausenden von Titeln stöbern.",
"visible_link": "https://news.google.com/.../CAAqJggKIiBDQkFTRWdvSUwyMHZNRFZxYUdjU0FtUm...",
"date": "",
"rank": 3
},
{
"link": "https://www.rtl.de/cms/news.html",
"title": "News: Aktuelle Nachrichten, Schlagzeilen und Videos | RTL.dehttps://www.rtl.de/cms/news.html",
"snippet": "Aktuelle Nachrichten aus Deutschland und der Welt auf einen Blick: Bei RTL.de finden Sie die News von heute, spannende Hintergründe und Videos.",
"visible_link": "https://www.rtl.de/cms/news.html",
"date": "",
"rank": 3
"rank": 4
},
{
"link": "https://www.zeit.de/news/index",
@ -36,7 +44,7 @@
"snippet": "Aktuelle News und Schlagzeilen im Newsticker von ZEIT ONLINE. Lesen Sie hier die neuesten Nachrichten.",
"visible_link": "https://www.zeit.de/news/index",
"date": "",
"rank": 4
"rank": 5
},
{
"link": "http://www.news.de/",
@ -44,72 +52,56 @@
"snippet": "Promi News und Aktuelles aus Sport, TV & Web. Jetzt Sportnachrichten von Fußball bis Boxen und das Neueste aus Klatsch und Tratsch per Newsticker, Fotos ...",
"visible_link": "www.news.de/",
"date": "",
"rank": 5
},
{
"link": "https://www.mopo.de/news",
"title": "News - Aktuelle Nachrichten aus Deutschland und der Welt. | MOPO.dehttps://www.mopo.de/news",
"snippet": "News - Aktuelle Nachrichten aus Hamburg, der Welt, zum HSV und der Welt der Promis.",
"visible_link": "https://www.mopo.de/news",
"date": "",
"rank": 6
},
{
"link": "https://www.t-online.de/nachrichten/",
"title": "Politik aktuell: Nachrichten aus Deutschland, Europa und der Welthttps://www.t-online.de/nachrichten/",
"snippet": "Neuigkeiten aus der Welt des Wintersports · Der Bachelor: Alle Informationen zur aktuellen Staffel · GNTM Staffel 14: News zu Germany's Next Topmodel 2019 ...",
"snippet": "Frauen-WM 2019: Ticker, Ergebnisse und News zum Fußball-Event · Let's Dance 2019: Promis, Profis und die ... E-Mails und News unterwegs immer dabei.",
"visible_link": "https://www.t-online.de/nachrichten/",
"date": "",
"rank": 7
},
{
"link": "https://www.n-tv.de/",
"title": "Nachrichten, aktuelle Schlagzeilen und Videos - n-tv.dehttps://www.n-tv.de/",
"snippet": "Nachrichten seriös, schnell und kompetent. Artikel und Videos aus Politik, Wirtschaft, Börse, Sport und News aus aller Welt.",
"visible_link": "https://www.n-tv.de/",
"date": "",
"rank": 8
},
{
"link": "https://www.stern.de/news/",
"title": "News - Sternhttps://www.stern.de/news/Im Cache",
"snippet": "News und aktuelle Schlagzeilen im Nachrichten-Ticker von STERN.de. Alle Informationen, Reportagen und Hintergründe im Überblick.",
"visible_link": "https://www.stern.de/news/",
"date": "",
"rank": 9
"rank": 8
},
{
"link": "https://www.stern.de/panorama/weltgeschehen/news--russland-sagt-venezuela-massive-hilfslieferungen-zu-8601942.html",
"title": "News: Russland sagt Venezuela massive Hilfslieferungen zu | STERN ...https://www.stern.de Panorama WeltgeschehenIm Cache",
"snippet": "vor 1 Stunde - News des TagesPutin will Venezuela massiv unter die Arme greifen. Maserninfektionen steigen weltweit an +++ 20 Jahre Haft für Magier Jan ...",
"visible_link": "https://www.stern.de Panorama Weltgeschehen",
"date": "vor 1 Stunde - ",
"rank": 10
"link": "https://www.mopo.de/news",
"title": "News - Aktuelle Nachrichten aus Deutschland und der Welt. | MOPO.dehttps://www.mopo.de/news",
"snippet": "News - Aktuelle Nachrichten aus Hamburg, der Welt, zum HSV und der Welt der Promis.",
"visible_link": "https://www.mopo.de/news",
"date": "",
"rank": 9
}
]
}
},
"se-scraper": {
"1": {
"time": "Fri, 01 Mar 2019 15:04:34 GMT",
"num_results": "Ungefähr 17.500.000 Ergebnisse (0,36 Sekunden) ",
"time": "Tue, 11 Jun 2019 15:43:52 GMT",
"num_results": "Ungefähr 19.300.000 Ergebnisse (0,32 Sekunden) ",
"no_results": false,
"effective_query": "",
"results": [
{
"link": "https://www.npmjs.com/package/se-scraper",
"title": "se-scraper - npmhttps://www.npmjs.com/package/se-scraperIm CacheDiese Seite übersetzen",
"snippet": "vor 1 Tag - se-scraper will create one browser instance per proxy. So the maximal amount of concurrency is equivalent to the number of proxies plus one ...",
"visible_link": "https://www.npmjs.com/package/se-scraper",
"date": "vor 1 Tag - ",
"rank": 1
},
{
"link": "https://github.com/NikolaiT/se-scraper",
"title": "GitHub - NikolaiT/se-scraper: Javascript scraping module based on ...https://github.com/NikolaiT/se-scraperIm CacheDiese Seite übersetzen",
"title": "NikolaiT/se-scraper: Javascript scraping module based on ... - GitHubhttps://github.com/NikolaiT/se-scraperIm CacheDiese Seite übersetzen",
"snippet": "Javascript scraping module based on puppeteer for many different search engines... - NikolaiT/se-scraper.",
"visible_link": "https://github.com/NikolaiT/se-scraper",
"date": "",
"rank": 1
},
{
"link": "https://github.com/NikolaiT/se-scraper/issues/5",
"title": "NikolaiT/se-scraper - GitHubhttps://github.com/NikolaiT/se-scraper/issues/5Im CacheDiese Seite übersetzen",
"snippet": "24.01.2019 - I'm trying to scrape by using multiple search engines successively. e.g. var searchEnginesList = ['google','bing'] for (let index = 0; index ...",
"visible_link": "https://github.com/NikolaiT/se-scraper/issues/5",
"date": "24.01.2019 - ",
"rank": 2
},
{
@ -121,44 +113,60 @@
"rank": 3
},
{
"link": "https://swedishicescraper.se/",
"title": "Swedish Ice Scraper: Onlinehttps://swedishicescraper.se/Im CacheDiese Seite übersetzen",
"snippet": "The original Swedish Ice Scraper - best in test. ... solid Acrylic Glass and use diamond polishing to sharpen the scraping edges. ... info@swedishicescraper.se.",
"visible_link": "https://swedishicescraper.se/",
"date": "",
"link": "https://www.quora.com/How-do-I-collect-Information-from-Google-SERP-into-my-own-web-app-scrape-or-API",
"title": "How to collect Information from Google SERP into my own web app ...https://www.quora.com/How-do-I-collect-Information-from-Goo...Diese Seite übersetzen",
"snippet": "01.11.2018 - I'd like to recommend you checking out Netpeak Checker which got a new feature called SE (Search Engines) Scraper in the latest 3.0 update.",
"visible_link": "https://www.quora.com/How-do-I-collect-Information-from-Goo...",
"date": "01.11.2018 - ",
"rank": 4
},
{
"link": "https://www.blackhatworld.com/seo/any-yandex-scrapers-available-or-universal-se-scraper.243421/",
"title": "Any yandex scrapers available? Or universal SE scraper ...https://www.blackhatworld.com ... Black Hat SEO ToolsIm CacheDiese Seite übersetzen",
"snippet": "10.10.2010 - Mostly blogs & stuff like that. Is Hrefer for yandex only or there are other SEs? How much is it? Advertise on BHW ...",
"visible_link": "https://www.blackhatworld.com ... Black Hat SEO Tools",
"date": "10.10.2010 - ",
"link": "https://netpeaksoftware.com/blog/netpeak-checker-3-0-serp-scraping",
"title": "Netpeak Checker 3.0: SERP Scraping Netpeak Software Bloghttps://netpeaksoftware.com/.../netpeak-checker-3-0-serp-scrapin...Im CacheDiese Seite übersetzen",
"snippet": "19.09.2018 - With a new tool under an 'SE Scraper' nickname you can get Google, Bing, Yahoo, and Yandex search results in a structured table with a lot of ...",
"visible_link": "https://netpeaksoftware.com/.../netpeak-checker-3-0-serp-scrapin...",
"date": "19.09.2018 - ",
"rank": 5
},
{
"link": "https://www.amazon.de/Calli-Edelstahl-Schokolade-Flugzeug-Werkzeug/dp/B01JJ96DJE",
"title": "Calli Edelstahl Käse Scraper Schokolade reiben Messer Flugzeug ...https://www.amazon.de/Calli-Edelstahl-Schokolade-Flugzeug.../dp/B01JJ96DJEIm Cache",
"snippet": "Amazon.de: Küchen- und Haushaltsartikel online - Calli Edelstahl Käse Scraper Schokolade reiben Messer Flugzeug Cutter. Beschreibung: Edelstahl Käse ...",
"visible_link": "https://www.amazon.de/Calli-Edelstahl-Schokolade-Flugzeug.../dp/B01JJ96DJE",
"date": "",
"rank": 6
},
{
"link": "https://www.friatec.de/content/friatec/en/Technical-Plastics/FRIATOOLS-Technical-Equipment/Mechanical-tools/index.html",
"title": "FRIATOOLS Scraper tools and mechanical tooling - Friatec AGhttps://www.friatec.de/content/friatec/en/...tools/index.htmlIm CacheDiese Seite übersetzen",
"snippet": "FRIATOOLS Scraper tools and mechanical tooling. ... FWSG SE 63 - 315, 613562 - 613574, saddle area, pipe ends, d 63 - d 315, SDR 11 - SDR 33. FWSG 710 ...",
"visible_link": "https://www.friatec.de/content/friatec/en/...tools/index.html",
"date": "",
"rank": 7
},
{
"link": "https://books.google.de/books?id=IAjyQdFwh4UC&pg=PA1024&lpg=PA1024&dq=se-scraper&source=bl&ots=_3OxiKnP4G&sig=ACfU3U1kZgZPdNlnGGWHRWjU0gG2OVHY1g&hl=de&sa=X&ved=2ahUKEwjU5evLm-HgAhWw1uAKHTEUB1IQ6AEwDnoECAMQAQ",
"link": "https://books.google.de/books?id=IAjyQdFwh4UC&pg=PA1024&lpg=PA1024&dq=se-scraper&source=bl&ots=_3PwgLsR0E&sig=ACfU3U3VITLhYJ1dIBedlAQdTWiqnFKYqA&hl=de&sa=X&ved=2ahUKEwis0uXO4uHiAhWDaFAKHeUIBGsQ6AEwBXoECAYQAQ",
"title": "A Dictionary of Slang and Unconventional Englishhttps://books.google.de/books?isbn=1134963653Diese Seite übersetzen",
"snippet": "1861 (OED); 1873, Rhoda Broughton, 'Happiness thinly spread over their whole lives, like bread and scrape!' Ex S.E. scrape, a thin layer.-——4. Hence, bread ...",
"visible_link": "https://books.google.de/books?isbn=1134963653",
"date": "",
"rank": 6
},
{
"link": "https://swedishicescraper.se/",
"title": "Swedish Ice Scraper: Onlinehttps://swedishicescraper.se/Im CacheDiese Seite übersetzen",
"snippet": "The original Swedish Ice Scraper - best in test. We laser cut our scrapers from thick, solid Acrylic Glass and use diamond polishing to sharpen the scraping ...",
"visible_link": "https://swedishicescraper.se/",
"date": "",
"rank": 7
},
{
"link": "https://books.google.de/books?id=HJxNXRr1NigC&pg=PA4616&lpg=PA4616&dq=se-scraper&source=bl&ots=JULi7G_1ix&sig=ACfU3U2cuZ55ETubLcLE6LF-xuZighUCbg&hl=de&sa=X&ved=2ahUKEwis0uXO4uHiAhWDaFAKHeUIBGsQ6AEwB3oECAkQAQ",
"title": "The Routledge Dictionary of Historical Slanghttps://books.google.de/books?isbn=0710077610Diese Seite übersetzen",
"snippet": "scrape . A shave: jocular coll (1859). cf. v. and SCRAPER. 2. Cheap butter: 1859, H., 1st ed. 3. See SCRAPE, BREAD AND. 4. ... Ex S.E. scrape, a thin layer. 2.",
"visible_link": "https://books.google.de/books?isbn=0710077610",
"date": "",
"rank": 8
},
{
"link": "https://books.google.de/books?id=U3oTAAAAYAAJ&pg=RA17-PA6&lpg=RA17-PA6&dq=se-scraper&source=bl&ots=B4I9G7YzO9&sig=ACfU3U2fIrgc8ADJ4Ff3rzQxprZqe_UplA&hl=de&sa=X&ved=2ahUKEwis0uXO4uHiAhWDaFAKHeUIBGsQ6AEwCHoECAgQAQ",
"title": "Latin Dictionary: Morell's Abridgmenthttps://books.google.de/books?id=U3oTAAAAYAAJDiese Seite übersetzen",
"snippet": "To scrape, or make an aukward bow, Poplitem inepte, vel inconcinne, inflectêre. To scrape acquaintance, Se in alicujus familiaritatem insinuare. A srrape-penny ...",
"visible_link": "https://books.google.de/books?id=U3oTAAAAYAAJ",
"date": "",
"rank": 9
},
{
"link": "https://books.google.de/books?id=XP4ana3LqK4C&pg=RA1-PA696&lpg=RA1-PA696&dq=se-scraper&source=bl&ots=mfE7zI1I7u&sig=ACfU3U2T4aVHnyTNdsc3XVcWl2YiM7ZgXw&hl=de&sa=X&ved=2ahUKEwis0uXO4uHiAhWDaFAKHeUIBGsQ6AEwCXoECAcQAQ",
"title": "Dictionnaire Anglais-français - Seite 696 - Google Books-Ergebnisseitehttps://books.google.de/books?isbn=087779166XDiese Seite übersetzen",
"snippet": "2 : griffer (se dit d'un chat) scratch2 n I SCRAPE t eraflure / egratignure / grafignure / Can 2 SCRATCHING : grattement m scratchy I'sknci | i ] ad scratchier; -est c ...",
"visible_link": "https://books.google.de/books?isbn=087779166X",
"date": "",
"rank": 10
}
]
}

File diff suppressed because it is too large Load Diff

View File

@ -1,88 +1,88 @@
{
"news": {
"1": {
"time": "Mon, 06 May 2019 19:39:17 GMT",
"num_results": "Ungefähr 25.270.000.000 Ergebnisse (0,31 Sekunden) ",
"time": "Tue, 11 Jun 2019 15:35:19 GMT",
"num_results": "Ongeveer 25.270.000.000 resultaten (0,38 seconden) ",
"no_results": false,
"effective_query": "",
"results": [
{
"link": "https://news.google.de/",
"title": "Google Newshttps://news.google.de/Ähnliche Seiten",
"snippet": "Ausführliche und aktuelle Beiträge - von Google News aus verschiedenen Nachrichtenquellen aus aller Welt zusammengetragen.",
"visible_link": "https://news.google.de/",
"link": "https://news.google.nl/",
"title": "Google Nieuwshttps://news.google.nl/In cacheVergelijkbaar",
"snippet": "Uitgebreide up-to-date berichtgeving, verzameld uit bronnen vanuit de hele wereld door Google Nieuws.",
"visible_link": "https://news.google.nl/",
"date": "",
"rank": 1
},
{
"link": "https://www.bild.de/news/startseite/news/news-16804530.bild.html",
"title": "News aktuell: Nachrichten aus Deutschland und der Welt - Bild.dehttps://www.bild.de/news/startseite/news/news-16804530.bild.html",
"snippet": "Aktuelle News aus Deutschland, Europa und der Welt. Alle Informationen, Bilder und Videos zu Skandalen, Krisen und Sensationen bei BILD.de.",
"visible_link": "https://www.bild.de/news/startseite/news/news-16804530.bild.html",
"link": "https://news.google.com/",
"title": "Google Newshttps://news.google.com/In cacheVergelijkbaarVertaal deze pagina",
"snippet": "Comprehensive up-to-date news coverage, aggregated from sources all over the world by Google News.",
"visible_link": "https://news.google.com/",
"date": "",
"rank": 2
},
{
"link": "https://www.bild.de/news/ausland/news-ausland/unfall-bei-stierkampf-stier-spiesst-torera-im-gesicht-auf-61711792.bild.html",
"title": "Unfall bei Stierkampf: Stier spießt Torera im Gesicht auf - News ...https://www.bild.de/news/.../news.../unfall-bei-stierkampf-stier-spiesst-torera-im-gesicht-...",
"snippet": "vor 45 Minuten - Hilda Tenorio kämpfte am Sonntag gegen einen Stier, der sie in einem Moment der Unaufmerksamkeit mitten im Gesicht mit einem seiner ...",
"visible_link": "https://www.bild.de/news/.../news.../unfall-bei-stierkampf-stier-spiesst-torera-im-gesicht-...",
"date": "vor 45 Minuten - ",
"link": "https://www.bbc.com/news/world",
"title": "World - BBC Newshttps://www.bbc.com/news/world",
"snippet": "Amnesty International says it has evidence that Sudanese government forces have continued to commit war crimes in the Darfur region. The rights group says at ...",
"visible_link": "https://www.bbc.com/news/world",
"date": "",
"rank": 3
},
{
"link": "https://www.zeit.de/news/index",
"title": "Schlagzeilen, News und Newsticker | ZEIT ONLINE - Die Zeithttps://www.zeit.de/news/index",
"snippet": "Aktuelle News und Schlagzeilen im Newsticker von ZEIT ONLINE. Lesen Sie hier die neuesten Nachrichten.",
"visible_link": "https://www.zeit.de/news/index",
"link": "https://www.foxnews.com/",
"title": "Fox News - Breaking News Updates | Latest News Headlines | Photos ...https://www.foxnews.com/Vertaal deze pagina",
"snippet": "Breaking News, Latest News and Current News from FOXNews.com. Breaking news and video. Latest Current News: U.S., World, Entertainment, Health, ...",
"visible_link": "https://www.foxnews.com/",
"date": "",
"rank": 4
},
{
"link": "https://news.google.com/topics/CAAqJggKIiBDQkFTRWdvSUwyMHZNRFZxYUdjU0FtUmxHZ0pFUlNnQVAB?hl=de&gl=DE&ceid=DE%3Ade",
"title": "Google News - Schlagzeilen - Neuestehttps://news.google.com/.../CAAqJggKIiBDQkFTRWdvSUwyMHZNRFZxYUdjU0FtUm...",
"snippet": "Mit Google News kannst du zum Thema Schlagzeilen vollständige Artikel lesen, Videos ansehen und in Tausenden von Titeln stöbern.",
"visible_link": "https://news.google.com/.../CAAqJggKIiBDQkFTRWdvSUwyMHZNRFZxYUdjU0FtUm...",
"link": "https://metro.co.uk/news/",
"title": "News - Latest breaking news and top headlines | Metro UKhttps://metro.co.uk/news/In cacheVertaal deze pagina",
"snippet": "We finally know the 10 MPs in the official Tory leadership race · thumbnail for post ID 9896909 · Ten MPs will go through to the first round of voting by MPs.",
"visible_link": "https://metro.co.uk/news/",
"date": "",
"rank": 5
},
{
"link": "http://www.news.de/",
"title": "news.de - mehr als Nachrichten und News, die Sie bewegenwww.news.de/Ähnliche Seiten",
"snippet": "Promi News und Aktuelles aus Sport, TV & Web. Jetzt Sportnachrichten von Fußball bis Boxen und das Neueste aus Klatsch und Tratsch per Newsticker, Fotos ...",
"visible_link": "www.news.de/",
"link": "https://www.msn.com/en-us/news",
"title": "Breaking News Stories from US and Around the World | MSN Newshttps://www.msn.com/en-us/newsIn cacheVergelijkbaarVertaal deze pagina",
"snippet": "Get the latest news and follow the coverage of breaking news events, local news, weird news, national and global politics, and more from the world's top trusted ...",
"visible_link": "https://www.msn.com/en-us/news",
"date": "",
"rank": 6
},
{
"link": "https://www.t-online.de/nachrichten/",
"title": "Politik aktuell: Nachrichten aus Deutschland, Europa und der Welthttps://www.t-online.de/nachrichten/",
"snippet": "Europawahl 2019: Alle Infos und News zur Wahl des Europaparlaments · Game of Thrones: Alle Neuigkeiten zur Kult-Serie · GNTM Staffel 14: News zu ...",
"visible_link": "https://www.t-online.de/nachrichten/",
"link": "https://www.nu.nl/breaking-news.html",
"title": "Breaking News | NU - Het laatste nieuws het eerst op NU.nlhttps://www.nu.nl/breaking-news.htmlIn cache",
"snippet": "Wil je als eerste op de hoogte zijn van Breaking News? Meld je dan aan voor de Breaking News SMS Service van NU.nl. Vul je 06-nummer in en ontvang direct ...",
"visible_link": "https://www.nu.nl/breaking-news.html",
"date": "",
"rank": 7
},
{
"link": "https://www.rtl.de/cms/news.html",
"title": "News: Aktuelle Nachrichten, Schlagzeilen und Videos | RTL.dehttps://www.rtl.de/cms/news.html",
"snippet": "Aktuelle Nachrichten aus Deutschland und der Welt auf einen Blick: Bei RTL.de finden Sie die News von heute, spannende Hintergründe und Videos.",
"visible_link": "https://www.rtl.de/cms/news.html",
"link": "https://www.independent.ie/news/",
"title": "News - Latest Breaking News & Headlines - Independent.iehttps://www.independent.ie/news/In cacheVertaal deze pagina",
"snippet": "News, video, photos and commentary from your Irish Independent newspaper including Breaking, National, World, Sport and ... Irish News ... Irish News ...",
"visible_link": "https://www.independent.ie/news/",
"date": "",
"rank": 8
},
{
"link": "https://www.mopo.de/news",
"title": "News - Aktuelle Nachrichten aus Deutschland und der Welt. | MOPO.dehttps://www.mopo.de/news",
"snippet": "News - Aktuelle Nachrichten aus Hamburg, der Welt, zum HSV und der Welt der Promis.",
"visible_link": "https://www.mopo.de/news",
"link": "https://news.sky.com/world",
"title": "World News - Breaking international news and headlines | Sky Newshttps://news.sky.com/worldIn cacheVertaal deze pagina",
"snippet": "The latest international news from Sky, featuring top stories from around the world and breaking news, as it happens.",
"visible_link": "https://news.sky.com/world",
"date": "",
"rank": 9
},
{
"link": "https://www.n-tv.de/",
"title": "Nachrichten, aktuelle Schlagzeilen und Videos - n-tv.dehttps://www.n-tv.de/",
"snippet": "Nachrichten seriös, schnell und kompetent. Artikel und Videos aus Politik, Wirtschaft, Börse, Sport und News aus aller Welt.",
"visible_link": "https://www.n-tv.de/",
"link": "https://www.cnn.com/us",
"title": "US News Top national stories and latest headlines - CNN - CNN.comhttps://www.cnn.com/us",
"snippet": "View the latest US news, top stories, photos and videos from around the nation. To get the day's top headlines delivered to your inbox every morning, sign up for ...",
"visible_link": "https://www.cnn.com/us",
"date": "",
"rank": 10
}
@ -91,14 +91,14 @@
},
"i work too much": {
"1": {
"time": "Mon, 06 May 2019 19:39:19 GMT",
"num_results": "Ungefähr 4.280.000.000 Ergebnisse (0,44 Sekunden) ",
"time": "Tue, 11 Jun 2019 15:35:20 GMT",
"num_results": "Ongeveer 4.980.000.000 resultaten (0,34 seconden) ",
"no_results": false,
"effective_query": "",
"results": [
{
"link": "https://www.themuse.com/advice/3-reasons-you-work-too-muchand-how-to-overcome-each-one",
"title": "3 Reasons You Work Too Much and How to Stop- The Musehttps://www.themuse.com/.../3-reasons-you-work-too-muchand-h...Im CacheÄhnliche SeitenDiese Seite übersetzen",
"title": "3 Reasons You Work Too Much and How to Stop- The Musehttps://www.themuse.com/.../3-reasons-you-work-too-muchand-h...In cacheVergelijkbaarVertaal deze pagina",
"snippet": "There are three main reasons people work too much. Here's how to fight back against each one and attain better work-life balance.",
"visible_link": "https://www.themuse.com/.../3-reasons-you-work-too-muchand-h...",
"date": "",
@ -106,66 +106,66 @@
},
{
"link": "https://www.themuse.com/advice/6-signs-youre-giving-way-too-much-of-yourself-to-your-job",
"title": "6 Signs You're Working Too Hard - The Musehttps://www.themuse.com/.../6-signs-youre-giving-way-too-much...Im CacheÄhnliche SeitenDiese Seite übersetzen",
"snippet": "6 Signs You're Giving Way Too Much of Yourself to Your Job ... And, as soon as you prove you're willing to scoop up that extra work and run with it, you're going ...",
"visible_link": "https://www.themuse.com/.../6-signs-youre-giving-way-too-much...",
"title": "Work-Life Balance 6 Signs You're Giving Way Too Much of ... - The Musehttps://www.themuse.com/.../6-signs-youre-giving-way-too-much-...In cacheVergelijkbaarVertaal deze pagina",
"snippet": "Here are six symptoms that your job is consuming your entire life and it's probably good for you to take a step back, relax, and reevaluate.",
"visible_link": "https://www.themuse.com/.../6-signs-youre-giving-way-too-much-...",
"date": "",
"rank": 2
},
{
"link": "https://www.lifehack.org/articles/lifestyle/ask-the-entrepreneurs-15-signs-youre-working-too-much-and-burning-out.html",
"title": "15 Signs You're Working Too Much and Burning Out - Lifehackhttps://www.lifehack.org/.../ask-the-entrepreneurs-15-signs-youre...Im CacheDiese Seite übersetzen",
"snippet": "If you're having trouble focusing because you've taken on too many different things, your work will suffer. You'll notice a lack of enthusiasm, a lack of interest, ...",
"visible_link": "https://www.lifehack.org/.../ask-the-entrepreneurs-15-signs-youre...",
"title": "15 Signs You're Working Too Much and Burning Out - Lifehackhttps://www.lifehack.org/.../ask-the-entrepreneurs-15-signs-youre-...In cacheVertaal deze pagina",
"snippet": "If you're not able to deliver what your client expects, you're probably taking on too much. Focus on what you can and should be doing, and find a way to cut out ...",
"visible_link": "https://www.lifehack.org/.../ask-the-entrepreneurs-15-signs-youre-...",
"date": "",
"rank": 3
},
{
"link": "https://context.reverso.net/%C3%BCbersetzung/englisch-deutsch/I+work+too+much",
"title": "I work too much - Deutsch Übersetzung - Englisch Beispiele | Reverso ...https://context.reverso.net/übersetzung/englisch-deutsch/I+work+too+muchIm Cache",
"snippet": "Übersetzung im Kontext von „I work too much“ in Englisch-Deutsch von Reverso Context: My wife says I work too much.",
"visible_link": "https://context.reverso.net/übersetzung/englisch-deutsch/I+work+too+much",
"link": "https://www.theodysseyonline.com/16-signs-you-work-too-much",
"title": "16 Signs You Work Too Much - Odysseyhttps://www.theodysseyonline.com/16-signs-you-work-too-much",
"snippet": "You try to get coverage but because you're one of the few people at work who works too much, no one really wants to come in any more than their normal 8-15 ...",
"visible_link": "https://www.theodysseyonline.com/16-signs-you-work-too-much",
"date": "",
"rank": 4
},
{
"link": "https://www.bustle.com/p/am-i-working-too-much-7-signs-its-time-to-slow-down-76583",
"title": "Am I Working Too Much? 7 Signs It's Time To Slow Down - Bustlehttps://www.bustle.com/.../am-i-working-too-much-7-signs-its-ti...Im CacheDiese Seite übersetzen",
"snippet": "28.08.2017 - Our society prides hard work so much, it can seem like there's no such thing as working too much. But there absolutely is. An overly demanding ...",
"visible_link": "https://www.bustle.com/.../am-i-working-too-much-7-signs-its-ti...",
"date": "28.08.2017 - ",
"link": "https://www.healthline.com/health/working-too-much-health-effects",
"title": "7 Health Effects of Working Too Much - Healthlinehttps://www.healthline.com/health/working-too-much-health-effectsIn cacheVertaal deze pagina",
"snippet": "3 mei 2017 - From increased risk of heart disease to poor sleep, working too much can take a toll on your health. Here are some of the side effects, along ...",
"visible_link": "https://www.healthline.com/health/working-too-much-health-effects",
"date": "3 mei 2017 - ",
"rank": 5
},
{
"link": "https://www.thealternativedaily.com/how-too-much-work-ruins-health/",
"title": "How Much Work Is Too Much For Your Mental And Physical Health?https://www.thealternativedaily.com/how-too-much-work-ruins-h...Im CacheDiese Seite übersetzen",
"title": "How Much Work Is Too Much For Your Mental And Physical Health?https://www.thealternativedaily.com/how-too-much-work-ruins-he...In cacheVertaal deze pagina",
"snippet": "Full time workers in the U.S. will typically clock up 47 hours per week of work — and that only includes paid work. Meanwhile, Aussies at the Australian National ...",
"visible_link": "https://www.thealternativedaily.com/how-too-much-work-ruins-h...",
"visible_link": "https://www.thealternativedaily.com/how-too-much-work-ruins-he...",
"date": "",
"rank": 6
},
{
"link": "https://www.huffpost.com/entry/24-things-only-people-who-work-entirely-too-much-will-understand_b_5510723",
"title": "24 Things Only People Who Work Entirely Too Much Will Understand ...https://www.huffpost.com/.../24-things-only-people-who-work-e...Im CacheDiese Seite übersetzen",
"snippet": "20.06.2014 - To all the people who are on a first-name basis with the office cleaning crew, are unfazed by empty parking lots on dark nights and can't go ...",
"visible_link": "https://www.huffpost.com/.../24-things-only-people-who-work-e...",
"date": "20.06.2014 - ",
"title": "24 Things Only People Who Work Entirely Too Much Will Understand ...https://www.huffpost.com/.../24-things-only-people-who-work-ent...In cacheVertaal deze pagina",
"snippet": "20 jun. 2014 - To all the people who are on a first-name basis with the office cleaning crew, are unfazed by empty parking lots on dark nights and can't go ...",
"visible_link": "https://www.huffpost.com/.../24-things-only-people-who-work-ent...",
"date": "20 jun. 2014 - ",
"rank": 7
},
{
"link": "https://www.rd.com/advice/work-career/workaholic-signs/",
"title": "Workaholic Signs: Are You Working Too Much? | Reader's Digesthttps://www.rd.com/advice/work-career/workaholic-signs/Im CacheDiese Seite übersetzen",
"title": "Workaholic Signs: Are You Working Too Much? | Reader's Digesthttps://www.rd.com/advice/work-career/workaholic-signs/In cacheVertaal deze pagina",
"snippet": "Enjoying your job is one thing, but here are some undeniable warning signs of workaholism that you may be taking your work a little too far.",
"visible_link": "https://www.rd.com/advice/work-career/workaholic-signs/",
"date": "",
"rank": 8
},
{
"link": "https://medium.com/an-idea-for-you/how-i-stopped-working-so-much-and-what-i-learned-from-it-16e7c76a0519",
"title": "How I Stopped Working So Much And What I Learned From Doing Sohttps://medium.com/.../how-i-stopped-working-so-much-and-wh...Im CacheDiese Seite übersetzen",
"snippet": "20.10.2017 - We know when we work too much—we just don't know what to do about it. Whether we love or hate our job, it's easy to become consumed by it ...",
"visible_link": "https://medium.com/.../how-i-stopped-working-so-much-and-wh...",
"date": "20.10.2017 - ",
"link": "https://www.bustle.com/p/am-i-working-too-much-7-signs-its-time-to-slow-down-76583",
"title": "Am I Working Too Much? 7 Signs It's Time To Slow Down - Bustlehttps://www.bustle.com/.../am-i-working-too-much-7-signs-its-tim...In cacheVertaal deze pagina",
"snippet": "28 aug. 2017 - Our society prides hard work so much, it can seem like there's no such thing as working too much. But there absolutely is. An overly demanding ...",
"visible_link": "https://www.bustle.com/.../am-i-working-too-much-7-signs-its-tim...",
"date": "28 aug. 2017 - ",
"rank": 9
}
]
@ -173,14 +173,14 @@
},
"scrapeulous.com": {
"1": {
"time": "Mon, 06 May 2019 19:39:16 GMT",
"num_results": "Ongeveer 224 resultaten (0,19 seconden) ",
"time": "Tue, 11 Jun 2019 15:35:19 GMT",
"num_results": "Ungefähr 256 Ergebnisse (0,24 Sekunden) ",
"no_results": false,
"effective_query": "",
"results": [
{
"link": "https://scrapeulous.com/",
"title": "Scrapeuloushttps://scrapeulous.com/In cacheVertaal deze paginaAboutTerms of ServiceContactPrivacy PolicyNews Api for the MSCI World ...Email FinderAdvanced Scraping ServicesScraping search engines with ...",
"title": "Scrapeuloushttps://scrapeulous.com/Im CacheDiese Seite übersetzenContactScraping search engines with ...AboutNews Api for the MSCI World ...",
"snippet": "Scraping search engines like Google, Bing and Duckduckgo in large quantities from many geographical regions with real browsers.",
"visible_link": "https://scrapeulous.com/",
"date": "",
@ -188,248 +188,140 @@
},
{
"link": "https://www.crunchbase.com/organization/scrapeulous",
"title": "Scrapeulous | Crunchbasehttps://www.crunchbase.com/organization/scrapeulousIn cacheVertaal deze pagina",
"title": "Scrapeulous | Crunchbasehttps://www.crunchbase.com/organization/scrapeulousIm CacheDiese Seite übersetzen",
"snippet": "Scrapeulous.com allows you to scrape various search engines automatically and in large quantities. Whether you need to analyze your competitors' market ...",
"visible_link": "https://www.crunchbase.com/organization/scrapeulous",
"date": "",
"rank": 2
},
{
"link": "https://www.youtube.com/watch?v=uyV0eChCe1c",
"title": "Scrapeulous.com Howto - YouTubehttps://www.youtube.com/watch?v=uyV0eChCe1cVertaal deze pagina",
"snippet": "You can inspect the Scrape Job i am talking about in the video here: https://scrapeulous.com/status ...",
"visible_link": "https://www.youtube.com/watch?v=uyV0eChCe1c",
"link": "https://incolumitas.com/",
"title": "Coding, Learning and Business Ideashttps://incolumitas.com/Im CacheDiese Seite übersetzen",
"snippet": "About · Contact · GoogleScraper · Lichess Autoplay-Bot · Projects · Scrapeulous.com · Site Notice · SVGCaptcha · Home Archives Categories Tags Atom ...",
"visible_link": "https://incolumitas.com/",
"date": "",
"rank": 3
},
{
"link": "https://twitter.com/scrapeulous",
"title": "Scrapeulous.com (@scrapeulous) | Twitterhttps://twitter.com/scrapeulousIn cacheVertaal deze pagina",
"title": "Scrapeulous.com (@scrapeulous) | Twitterhttps://twitter.com/scrapeulousDiese Seite übersetzen",
"snippet": "The latest Tweets from Scrapeulous.com (@scrapeulous): \"Creating software to realize the best scraping service at https://t.co/R5NUqSSrB5\"",
"visible_link": "https://twitter.com/scrapeulous",
"date": "",
"rank": 4
},
{
"link": "https://incolumitas.com/",
"title": "Coding, Learning and Business Ideashttps://incolumitas.com/In cacheVertaal deze pagina",
"snippet": "About · Contact · GoogleScraper · Lichess Autoplay-Bot · Projects · Scrapeulous.com · Site Notice · SVGCaptcha · Home Archives Categories Tags Atom ...",
"visible_link": "https://incolumitas.com/",
"link": "https://de.linkedin.com/in/nikolai-tschacher-71b237181",
"title": "Nikolai Tschacher Freelance Software Developer scrapeulous ...https://de.linkedin.com/in/nikolai-tschacher-71b237181",
"snippet": "Sehen Sie sich das Profil von Nikolai Tschacher auf LinkedIn an, dem weltweit größten beruflichen Netzwerk. 2 Jobs sind im Profil von Nikolai Tschacher ...",
"visible_link": "https://de.linkedin.com/in/nikolai-tschacher-71b237181",
"date": "",
"rank": 5
},
{
"link": "https://incolumitas.com/pages/scrapeulous/",
"title": "Coding, Learning and Business Ideas Scrapeulous.comhttps://incolumitas.com/pages/scrapeulous/In cacheVertaal deze pagina",
"snippet": "In autumn 2018, I created a scraping service called scrapeulous.com. There you can purchase scrape jobs that allow you to upload a keyword file which in turn ...",
"visible_link": "https://incolumitas.com/pages/scrapeulous/",
"link": "https://www.youtube.com/watch?v=uyV0eChCe1c",
"title": "Scrapeulous.com Howto - YouTubehttps://www.youtube.com/watch?v=uyV0eChCe1cDiese Seite übersetzen",
"snippet": "You can inspect the Scrape Job i am talking about in the video here: https://scrapeulous.com/status ...",
"visible_link": "https://www.youtube.com/watch?v=uyV0eChCe1c",
"date": "",
"rank": 6
},
{
"link": "https://github.com/NikolaiT/se-scraper",
"title": "NikolaiT/se-scraper: Javascript scraping module based on ... - GitHubhttps://github.com/NikolaiT/se-scraperIm CacheDiese Seite übersetzen",
"snippet": "const se_scraper = require('se-scraper'); let config = { search_engine: 'google', debug: false, verbose: false, keywords: ['news', 'scraping scrapeulous.com'], ...",
"visible_link": "https://github.com/NikolaiT/se-scraper",
"date": "",
"rank": 7
},
{
"link": "https://www.reddit.com/domain/scrapeulous.com/",
"title": "scrapeulous.com on reddit.comhttps://www.reddit.com/domain/scrapeulous.com/Im CacheDiese Seite übersetzen",
"snippet": "0. 0. Scraping 260 search queries in Bing in a matter of seconds using asyncio and aiohttp. (scrapeulous.com). submitted 4 years ago by incolumitas to r/Python.",
"visible_link": "https://www.reddit.com/domain/scrapeulous.com/",
"date": "",
"rank": 8
}
]
}
},
"what to do?": {
"1": {
"time": "Mon, 06 May 2019 19:39:19 GMT",
"num_results": "Ongeveer 25.270.000.000 resultaten (0,46 seconden) ",
"time": "Tue, 11 Jun 2019 15:35:21 GMT",
"num_results": "Ungefähr 20.190.000.000 Ergebnisse (0,58 Sekunden) ",
"no_results": false,
"effective_query": "",
"results": [
{
"link": "https://www.mydomaine.com/things-to-do-when-bored",
"title": "96 Things to Do When You're Bored - MyDomainehttps://www.mydomaine.com/things-to-do-when-bored",
"snippet": "",
"visible_link": "https://www.mydomaine.com/things-to-do-when-bored",
"link": "https://www.thecrazytourist.com/25-best-things-frankfurt-germany/",
"title": "25 Best Things to Do in Frankfurt (Germany) - The Crazy Touristhttps://www.thecrazytourist.com Travel Guides GermanyIm CacheDiese Seite übersetzen",
"snippet": "Germany's big financial centre is a city of many sides. The central business district, Bankenviertel, captures your attention right away and has all ten of the tallest ...",
"visible_link": "https://www.thecrazytourist.com Travel Guides Germany",
"date": "",
"rank": 1
},
{
"link": "https://www.mydomaine.com/things-to-do-when-bored",
"title": "96 Things to Do When You're Bored - MyDomainehttps://www.mydomaine.com/things-to-do-when-bored",
"snippet": "",
"visible_link": "https://www.mydomaine.com/things-to-do-when-bored",
"link": "https://www.tripadvisor.com/Attractions-g187337-Activities-Frankfurt_Hesse.html",
"title": "THE 15 BEST Things to Do in Frankfurt - 2019 (with Photos ...https://www.tripadvisor.com/Attractions-g187337-Activities-Frankfurt_Hesse.htmlÄhnliche Seiten",
"snippet": "Book your tickets online for the top things to do in Frankfurt, Germany on TripAdvisor: See 49136 traveler reviews and photos of Frankfurt tourist attractions.",
"visible_link": "https://www.tripadvisor.com/Attractions-g187337-Activities-Frankfurt_Hesse.html",
"date": "",
"rank": 2
},
{
"link": "https://www.mydomaine.com/things-to-do-when-bored",
"title": "96 Things to Do When You're Bored - MyDomainehttps://www.mydomaine.com Wellness Self-CareIn cacheVertaal deze pagina",
"snippet": "29 apr. 2019 - Go on a walk. Challenge yourself to leave your cell phone in your purse or pocket. Order a small set of hand weights from Amazon. Organize something. Do your laundry. Speaking of the gym, go! Visit Unroll.me and unsubscribe from all those emails you never read nor want to get. Paint your nails. Do sit-ups.",
"visible_link": "https://www.mydomaine.com Wellness Self-Care",
"date": "29 apr. 2019 - ",
"link": "https://www.likealocalguide.com/frankfurt/things-to-do",
"title": "Top 29 Things To Do in Frankfurt 2019 - Best Activities in Frankfurthttps://www.likealocalguide.com/frankfurt/things-to-doIm CacheÄhnliche SeitenDiese Seite übersetzen",
"snippet": "Frankfurt city guide featuring 29 best local sights, things to do & tours recommended by Frankfurt locals. Skip the tourist traps & explore Frankfurt like a local.",
"visible_link": "https://www.likealocalguide.com/frankfurt/things-to-do",
"date": "",
"rank": 3
},
{
"link": "https://www.lonelyplanet.com/germany/frankfurt-am-main/top-things-to-do/a/poi/1003203",
"title": "Top things to do in Frankfurt am Main, Germany - Lonely Planethttps://www.lonelyplanet.com/germany/...things-to-do/.../100320...Im CacheÄhnliche SeitenDiese Seite übersetzen",
"snippet": "Discover the best top things to do in Frankfurt am Main including Städel Museum, Kaiserdom, Senckenberg Museum.",
"visible_link": "https://www.lonelyplanet.com/germany/...things-to-do/.../100320...",
"date": "",
"rank": 4
},
{
"link": "https://www.mydomaine.com/things-to-do-when-bored",
"title": "96 Things to Do When You're Bored - MyDomainehttps://www.mydomaine.com Wellness Self-CareIm CacheDiese Seite übersetzen",
"snippet": "16.03.2016 - This book changed my life in many ways, but one of my key takeaways has to do with boredom. I am never bored. In fact, the word bored ...",
"visible_link": "https://www.mydomaine.com Wellness Self-Care",
"date": "16.03.2016 - ",
"rank": 5
},
{
"link": "https://www.timeout.com/frankfurt/things-to-do/best-things-to-do-in-frankfurt",
"title": "10 Best Things to do in Frankfurt for Locals and Tourists - Time Outhttps://www.timeout.com/.../things-to-do/best-things-to-do-in-fra...Im CacheDiese Seite übersetzen",
"snippet": "09.07.2018 - Looking for the best things to do in Frankfurt? Check out our guide to local-approved restaurants, tours and more can't-miss activities in the ...",
"visible_link": "https://www.timeout.com/.../things-to-do/best-things-to-do-in-fra...",
"date": "09.07.2018 - ",
"rank": 6
},
{
"link": "https://www.atlasobscura.com/things-to-do/frankfurt-germany",
"title": "9 Cool and Unusual Things to Do in Frankfurt - Atlas Obscurahttps://www.atlasobscura.com/things-to-do/frankfurt-germanyIm CacheDiese Seite übersetzen",
"snippet": "Discover 9 hidden attractions, cool sights, and unusual things to do in Frankfurt, Germany from Pinkelbaum (Peeing Tree) to Henninger Turm.",
"visible_link": "https://www.atlasobscura.com/things-to-do/frankfurt-germany",
"date": "",
"rank": 7
},
{
"link": "https://theculturetrip.com/europe/germany/articles/7-cool-and-unusual-things-to-do-in-frankfurt/",
"title": "7 Cool and Unusual Things to Do in Frankfurt - Culture Triphttps://theculturetrip.com Germany See & DoIm CacheDiese Seite übersetzen",
"snippet": "27.06.2018 - Frankfurt is the busiest airport in Germany, though unfortunately, not everyone realises it's worth stopping to spend time in the city. They don't ...",
"visible_link": "https://theculturetrip.com Germany See & Do",
"date": "27.06.2018 - ",
"rank": 8
},
{
"link": "https://lifehacks.io/what-to-do-when-your-bored/",
"title": "23 [REALLY] Fun Things To Do When You Are Bored - Life Hackshttps://lifehacks.io/what-to-do-when-your-bored/In cacheVertaal deze pagina",
"title": "23 [REALLY] Fun Things To Do When You Are Bored - Life Hackshttps://lifehacks.io/what-to-do-when-your-bored/Im CacheDiese Seite übersetzen",
"snippet": "What to Do When Bored? ― These twenty-three things are sure to pass your time, as well as make you happy at the end of the day.",
"visible_link": "https://lifehacks.io/what-to-do-when-your-bored/",
"date": "",
"rank": 4
},
{
"link": "https://guidetoiceland.is/nature-info/what-to-do-in-iceland",
"title": "What to Do & Where to Go | Top 10 Places to See in Icelandhttps://guidetoiceland.is Explore Iceland Nature In IcelandVergelijkbaar",
"snippet": "What are the most exciting and unique things to do in Iceland? Where can you find the country's most beautiful locations? Read on to discover our suggestions ...",
"visible_link": "https://guidetoiceland.is Explore Iceland Nature In Iceland",
"date": "",
"rank": 5
},
{
"link": "https://www.tripadvisor.co.uk/Attractions",
"title": "Top Things To Do Near Me - TripAdvisorhttps://www.tripadvisor.co.uk/AttractionsIn cacheVergelijkbaarVertaal deze pagina",
"snippet": "Find things to do near you. Explore the top-rated attractions, tours, and activities nearby and read reviews from TripAdvisor travellers.",
"visible_link": "https://www.tripadvisor.co.uk/Attractions",
"date": "",
"rank": 6
},
{
"link": "https://www.nytimes.com/2019/03/29/smarter-living/what-to-do-when-youre-bored-with-your-routines.html",
"title": "What to Do When You're Bored With Your Routines - The New York ...https://www.nytimes.com/.../what-to-do-when-youre-bored-with-y...Vertaal deze pagina",
"snippet": "29 mrt. 2019 - Blame hedonic adaptation: the tendency for us to get used to things over time.",
"visible_link": "https://www.nytimes.com/.../what-to-do-when-youre-bored-with-y...",
"date": "29 mrt. 2019 - ",
"rank": 7
},
{
"link": "https://www.timeout.com/london/things-to-do",
"title": "Things to Do in London - Events, Attractions and Activities - Time Out ...https://www.timeout.com/london/things-to-doIn cacheVergelijkbaarVertaal deze pagina",
"snippet": "The ultimate guide to things to do in London. All the events, festivals and activites happening in London.",
"visible_link": "https://www.timeout.com/london/things-to-do",
"date": "",
"rank": 8
},
{
"link": "https://www.visitflanders.com/en/destinations/mechelen/what-to-do/",
"title": "What to do in Mechelen | VISITFLANDERShttps://www.visitflanders.com/en/destinations/mechelen/what-to-do/In cache",
"snippet": "Don't know what to do in Mechelen? Check out our database filled with information! From events to points of interest, from pubs to restaurants. Find it here.",
"visible_link": "https://www.visitflanders.com/en/destinations/mechelen/what-to-do/",
"date": "",
"rank": 9
},
{
"link": "https://www.visitphilly.com/articles/philadelphia/most-essential-things-to-do-in-philadelphia/",
"title": "The 10 Most Essential Things to Do on Your (First) Visit to Philly ...https://www.visitphilly.com/.../most-essential-things-to-do-in-phila...In cacheVertaal deze pagina",
"snippet": "There's never a shortage of awesome things to do in Philadelphia — but there are some things you just can't miss while you're here. Whether it's running like ...",
"visible_link": "https://www.visitphilly.com/.../most-essential-things-to-do-in-phila...",
"date": "",
"rank": 10
}
]
}
},
"incolumitas.com": {
"1": {
"time": "Mon, 06 May 2019 19:39:17 GMT",
"num_results": "Ungefähr 3.450.000 Ergebnisse (0,26 Sekunden) ",
"no_results": false,
"effective_query": "",
"results": [
{
"link": "https://incolumitas.com/",
"title": "Coding, Learning and Business Ideashttps://incolumitas.com/Im CacheDiese Seite übersetzenContactwpa_supplicantSite NoticeCategoriesScrapeulous.comIntroductionGoogleScraperMachine LearningAboutTags",
"snippet": "Tutorial that teaches how scrape amazon reviews. Continue reading · Older Posts. © Nikolai Tschacher - incolumitas.com 2018. Powered by Pelican - Flex ...",
"visible_link": "https://incolumitas.com/",
"date": "",
"rank": 1
},
{
"link": "https://de-de.facebook.com/pages/category/Transportation-Service/incolumitas/posts/",
"title": "Incolumitas - Beiträge | Facebookhttps://de-de.facebook.com/pages/category/Transportation-Service/incolumitas/posts/",
"snippet": "Incolumitas, Paterna. Gefällt 133 Mal. Incolumitas Consejeros. Consultoría mercancías peligrosas.",
"visible_link": "https://de-de.facebook.com/pages/category/Transportation-Service/incolumitas/posts/",
"date": "",
"rank": 2
},
{
"link": "https://www.frag-caesar.de/lateinwoerterbuch/incolumitas-uebersetzung.html",
"title": "incolumitas-Übersetzung im Latein Wörterbuch - Frag Caesarhttps://www.frag-caesar.de/lateinwoerterbuch/incolumitas-uebersetzung.htmlIm Cache",
"snippet": "Übersetzung und Formen zu incolumitas im Latein Wörterbuch.",
"visible_link": "https://www.frag-caesar.de/lateinwoerterbuch/incolumitas-uebersetzung.html",
"date": "",
"rank": 3
}
]
}
},
"javascript is hard": {
"1": {
"time": "Mon, 06 May 2019 19:39:19 GMT",
"num_results": "Ungefähr 1.260.000.000 Ergebnisse (0,38 Sekunden) ",
"no_results": false,
"effective_query": "",
"results": [
{
"link": "https://www.thoughtco.com/how-hard-is-javascript-to-learn-2037676",
"title": "How Hard Is JavaScript to Learn? HTML Comparison - ThoughtCohttps://www.thoughtco.com/how-hard-is-javascript-to-learn-2037676",
"snippet": "",
"visible_link": "https://www.thoughtco.com/how-hard-is-javascript-to-learn-2037676",
"date": "",
"rank": 1
},
{
"link": "https://www.thoughtco.com/how-hard-is-javascript-to-learn-2037676",
"title": "How Hard Is JavaScript to Learn? HTML Comparison - ThoughtCohttps://www.thoughtco.com/how-hard-is-javascript-to-learn-2037676",
"snippet": "",
"visible_link": "https://www.thoughtco.com/how-hard-is-javascript-to-learn-2037676",
"date": "",
"rank": 2
},
{
"link": "https://skillcrush.com/2018/06/27/how-hard-is-it-to-learn-javascript/",
"title": "How Hard Is it to Learn JavaScript? The Pros Weigh In - Skillcrushhttps://skillcrush.com/2018/06/.../how-hard-is-it-to-learn-javascri...Im CacheDiese Seite übersetzen",
"snippet": "27.06.2018 - Are you thinking about learning JavaScript but concerned about how hard of a task that might be? Allow these developers with JavaScript ...",
"visible_link": "https://skillcrush.com/2018/06/.../how-hard-is-it-to-learn-javascri...",
"date": "27.06.2018 - ",
"rank": 3
},
{
"link": "https://www.quora.com/Is-JavaScript-hard-to-learn",
"title": "Is JavaScript hard to learn? - Quorahttps://www.quora.com/Is-JavaScript-hard-to-learnÄhnliche SeitenDiese Seite übersetzen",
"snippet": "16.12.2015 - That is partially because programming is hard, but also because JavaScript itself has unique concerns that do not affect other languages. As with any ...",
"visible_link": "https://www.quora.com/Is-JavaScript-hard-to-learn",
"date": "16.12.2015 - ",
"rank": 4
},
{
"link": "https://www.thoughtco.com/how-hard-is-javascript-to-learn-2037676",
"title": "How Hard Is JavaScript to Learn? HTML Comparison - ThoughtCohttps://www.thoughtco.com ... JavaScript ProgrammingIm CacheÄhnliche SeitenDiese Seite übersetzen",
"snippet": "28.01.2019 - JavaScript, however, is not a markup language; rather, it is a programming language. That by itself is enough to make learning JavaScript a lot more difficult than HTML. ... It will, however, take you a lot longer to learn everything that can be done with JavaScript compared to HTML.",
"visible_link": "https://www.thoughtco.com ... JavaScript Programming",
"date": "28.01.2019 - ",
"rank": 5
},
{
"link": "http://blog.thefirehoseproject.com/posts/why-is-javascript-so-hard-to-learn/",
"title": "Why is JavaScript So Hard To Learn? - Firehose Projectblog.thefirehoseproject.com/.../why-is-javascript-so-hard-to-learn...Im CacheÄhnliche SeitenDiese Seite übersetzen",
"snippet": "29.08.2016 - We'll get into the 7 reasons why JavaScript is so hard to learn and why it's a useful programming language for modern programmers.",
"visible_link": "blog.thefirehoseproject.com/.../why-is-javascript-so-hard-to-learn...",
"date": "29.08.2016 - ",
"rank": 6
},
{
"link": "https://develoger.com/why-is-javascript-so-hard-bd3648db51a5",
"title": "Why is JavaScript so hard? Develogerhttps://develoger.com/why-is-javascript-so-hard-bd3648db51a5Im CacheÄhnliche SeitenDiese Seite übersetzen",
"snippet": "03.10.2016 - If you feel comfortable working with html but find it hard to experience ... Looking at the JavaScript and programming trough the eyes of CSS.",
"visible_link": "https://develoger.com/why-is-javascript-so-hard-bd3648db51a5",
"date": "03.10.2016 - ",
"rank": 7
},
{
"link": "https://www.reddit.com/r/webdev/comments/80zcx1/javascript_is_hard/",
"title": "Javascript IS hard. : webdev - Reddithttps://www.reddit.com/r/webdev/comments/.../javascript_is_hard...Im CacheDiese Seite übersetzen",
"snippet": "28.02.2018 - I'm sure some of you may have seen the disaster of the thread stating that Javascript isn't hard. I'm here to tell you the opposite. Javascript is...",
"visible_link": "https://www.reddit.com/r/webdev/comments/.../javascript_is_hard...",
"date": "28.02.2018 - ",
"rank": 8
},
{
"link": "https://teamtreehouse.com/community/is-learning-javascript-supposed-to-be-this-difficult-or-am-i-not-cut-out-for-this",
"title": "Is learning JavaScript supposed to be this difficult or am I not cut out ...https://teamtreehouse.com/.../is-learning-javascript-supposed-to-b...Im CacheÄhnliche SeitenDiese Seite übersetzen",
"snippet": "03.12.2015 - I haven't been able to complete any of Dave McFarland's \"programming challenges\" like building quizzes etc. I have to just watch his solution ...",
"visible_link": "https://teamtreehouse.com/.../is-learning-javascript-supposed-to-b...",
"date": "03.12.2015 - ",
"rank": 9
}
]

31
examples/reusing.js Normal file
View File

@ -0,0 +1,31 @@
const se_scraper = require('./../src/node_scraper.js');
(async () => {
let browser_config = {
debug_level: 1,
output_file: 'examples/results/data.json',
};
let scrape_job = {
search_engine: 'google',
keywords: ['news', 'se-scraper'],
num_pages: 1,
};
let scrape_job2 = {
search_engine: 'bing',
keywords: ['test', 'what a wonderful world'],
num_pages: 1,
};
var scraper = new se_scraper.ScrapeManager(browser_config);
await scraper.start();
var results = await scraper.scrape(scrape_job);
console.dir(results, {depth: null, colors: true});
var results2 = await scraper.scrape(scrape_job2);
console.dir(results2, {depth: null, colors: true});
await scraper.quit();
})();

105
index.js
View File

@ -1,100 +1,19 @@
const { Cluster } = require('./src/puppeteer-cluster/dist/index.js');
const handler = require('./src/node_scraper.js');
var fs = require('fs');
var os = require("os");
const se_scraper = require('./src/node_scraper.js');
exports.scrape = async function(user_config, callback) {
async function scrape(user_config, scrape_config) {
// options for scraping
let config = {
// the user agent to scrape with
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
// if random_user_agent is set to True, a random user agent is chosen
random_user_agent: false,
// whether to select manual settings in visible mode
set_manual_settings: false,
// log ip address data
log_ip_address: false,
// log http headers
log_http_headers: false,
// how long to sleep between requests. a random sleep interval within the range [a,b]
// is drawn before every request. empty string for no sleeping.
sleep_range: '',
// which search engine to scrape
search_engine: 'google',
compress: false, // compress
debug: false,
verbose: true,
keywords: ['search engine scraping scrapeulous.com'],
// whether to start the browser in headless mode
headless: true,
// specify flags passed to chrome here
chrome_flags: [],
// the number of pages to scrape for each keyword
num_pages: 1,
// path to output file, data will be stored in JSON
output_file: '',
// whether to prevent images, css, fonts and media from being loaded
// will speed up scraping a great deal
block_assets: true,
// path to js module that extends functionality
// this module should export the functions:
// get_browser, handle_metadata, close_browser
//custom_func: resolve('examples/pluggable.js'),
custom_func: '',
// path to a proxy file, one proxy per line. Example:
// socks5://78.94.172.42:1080
// http://118.174.233.10:48400
proxy_file: '',
proxies: [],
// check if headless chrome escapes common detection techniques
// this is a quick test and should be used for debugging
test_evasion: false,
apply_evasion_techniques: true,
// settings for puppeteer-cluster
puppeteer_cluster_config: {
timeout: 30 * 60 * 1000, // max timeout set to 30 minutes
monitor: false,
concurrency: Cluster.CONCURRENCY_BROWSER,
maxConcurrency: 1,
}
};
var scraper = new se_scraper.ScrapeManager(user_config);
// overwrite default config
for (var key in user_config) {
config[key] = user_config[key];
}
await scraper.start();
if (fs.existsSync(config.keyword_file)) {
config.keywords = read_keywords_from_file(config.keyword_file);
}
var results = await scraper.scrape(scrape_config);
if (fs.existsSync(config.proxy_file)) {
config.proxies = read_keywords_from_file(config.proxy_file);
if (config.verbose) {
console.log(`${config.proxies.length} proxies loaded.`);
}
}
await scraper.quit();
if (!callback) {
// called when results are ready
callback = function (err, response) {
if (err) {
console.error(err)
}
console.dir(response.results, {depth: null, colors: true});
}
}
await handler.handler(config, undefined, callback );
};
function read_keywords_from_file(fname) {
let kws = fs.readFileSync(fname).toString().split(os.EOL);
// clean keywords
kws = kws.filter((kw) => {
return kw.trim().length > 0;
});
return kws;
return results;
}
module.exports = {
scrape: scrape,
ScrapeManager: se_scraper.ScrapeManager,
};

View File

@ -0,0 +1,155 @@
{
"news": {
"1": {
"time": "Tue, 11 Jun 2019 15:48:30 GMT",
"no_results": false,
"effective_query": "",
"num_results": "195.000.000 Ergebnisse",
"results": [
{
"link": "https://www.bz-berlin.de/thema/berlin-news",
"title": "Berlin News B.Z. Berlin",
"snippet": "Berlin Top-News: 24 Stunden Nachrichten aus Berlin mit Meldungen aus Politik, Wirtschaft, Tatort und Polizei sowie Sport-News und Liveticker",
"visible_link": "https://www.bz-berlin.de/thema/berlin-news",
"rank": 1
},
{
"link": "https://news.google.com/?hl=de&gl=DE&ceid=DE:de",
"title": "Google News",
"snippet": "Ausführliche und aktuelle Beiträge - von Google News aus verschiedenen Nachrichtenquellen aus aller Welt zusammengetragen",
"visible_link": "https://news.google.com/?hl=de&gl=DE&ceid=DE:de",
"rank": 2
},
{
"link": "https://www.bild.de/news/startseite/news/news-16804530.bild.html",
"title": "News aktuell: Nachrichten aus Deutschland und der Welt ...",
"snippet": "Aktuelle News aus Deutschland, Europa und der Welt. Alle Informationen, Bilder und Videos zu Skandalen, Krisen und Sensationen bei BILD.de.",
"visible_link": "https://www.bild.de/news",
"rank": 3
},
{
"link": "http://www.news.de/",
"title": "news.de - mehr als Nachrichten und News, die Sie bewegen",
"snippet": "Promi News und Aktuelles aus Sport, TV & Web. Jetzt Sportnachrichten von Fußball bis Boxen und das Neueste aus Klatsch und Tratsch per Newsticker, Fotos & Videos verfolgen! Neben den aktuellen Nachrichten aus Politik und Wirtschaft unterhalten wir Sie mit Promi-News, live News zum Bundesliga-Endspurt und aktuellen TV-Events.",
"visible_link": "www.news.de",
"rank": 4
},
{
"link": "https://www.mopo.de/news",
"title": "News - Aktuelle Nachrichten aus Deutschland und der Welt ...",
"snippet": "News - Aktuelle Nachrichten aus Hamburg, der Welt, zum HSV und der Welt der Promis.",
"visible_link": "https://www.mopo.de/news",
"rank": 5
},
{
"link": "https://www.n-tv.de/",
"title": "Nachrichten, aktuelle Schlagzeilen und Videos - n-tv.de",
"snippet": "Nachrichten seriös, schnell und kompetent. Artikel und Videos aus Politik, Wirtschaft, Börse, Sport und News aus aller Welt.",
"visible_link": "https://www.n-tv.de",
"rank": 6
},
{
"link": "https://www.gala.de/stars/news/",
"title": "Alle News der Stars und exklusive VIP-News | GALA.de",
"snippet": "News zu Stars und VIPs: Ob Hollywood-Schauspieler, TV-Liebling, C-Promi oder Supermodel - auf GALA.de verpassen Sie keine News zu ihrem Star.",
"visible_link": "https://www.gala.de/stars/news",
"rank": 7
},
{
"link": "https://www.ka-news.de/",
"title": "Karlsruhe News - KSC, Sport, Veranstaltungen, Karlsruhe ...",
"snippet": "News für Karlsruhe. Mit Nachrichten aus Karlsruhe und der Region Karlsruhe, Infos über den KSC, Veranstaltungen und Ausgeh-Tipp für die Region Karlsruhe",
"visible_link": "https://www.ka-news.de",
"rank": 8
},
{
"link": "http://www.pi-news.net/",
"title": "PI-NEWS | Politically Incorrect",
"snippet": "Von ACHILL PATRAS | Das erste Patrioten-Camp ist dieses Jahr in Mallorca erfolgreich über die Bühne gegangen. Ein guter Zeitpunkt, für mehr patriotische Reiseanbieter zu werben.",
"visible_link": "www.pi-news.net",
"rank": 9
},
{
"link": "https://www.bbc.com/news",
"title": "Home - BBC News",
"snippet": "Visit BBC News for up-to-the-minute news, breaking news, video, audio and feature stories. BBC News provides trusted World and UK news as well as local and regional perspectives. Also ...",
"visible_link": "https://www.bbc.com/news",
"rank": 10
}
]
}
},
"se-scraper": {
"1": {
"time": "Tue, 11 Jun 2019 15:48:33 GMT",
"no_results": false,
"effective_query": "",
"num_results": "48.300 Ergebnisse",
"results": [
{
"link": "http://konjugator.reverso.net/konjugation-franzosisch-verb-ne%20pas%20se%20scraper.html",
"title": "Konjugation ne pas se scraper | Konjugieren verb …",
"snippet": "Reverso-Konjugation: Konjugation des französischen Verbs ne pas se scraper, Konjugator für französische Verben, unregelmäßige Verben, Übersetzung,Grammatik",
"visible_link": "konjugator.reverso.net/konjugation-franzosisch-verb-ne pas se scraper.html",
"rank": 1
},
{
"link": "https://www.amazon.de/ADAALEN-Silikon-Frischk%C3%83%C2%A4se-Scraper-Anr%C3%83%C2%BChrspatel/dp/B01KVXVB6C",
"title": "ADAALEN Silikon Frischkäse Butter Scraper Butter Batter ...",
"snippet": "Amazon.de: Küchen- und Haushaltsartikel online - ADAALEN Silikon Frischkäse Butter Scraper Butter Batter Anrührspatel. Beschreibung: Die Silikon Sahnebutter Schaber aus reinem Silikon. Mit einem.",
"visible_link": "https://www.amazon.de/ADAALEN-Silikon-Frischkäse-Scraper-Anrührspatel/dp/B01KVXVB6C",
"rank": 2
},
{
"link": "https://www.amazon.de/Moppi-Silikon-Frischk%C3%83%C2%A4se-Scraper-Anr%C3%83%C2%BChrspatel/dp/B01K8JT38C",
"title": "Moppi Silikon Frischkäse Butter Scraper Butter Batter ...",
"snippet": "Amazon.de: Küchen- und Haushaltsartikel online - Moppi Silikon Frischkäse Butter Scraper Butter Batter Anrührspatel. Beschreibung: Die Silikon Sahnebutter Schaber aus reinem Silikon. Mit einem.",
"visible_link": "https://www.amazon.de/Moppi-Silikon-Frischkäse-Scraper-Anrührspatel/dp/B01K8JT38C",
"rank": 3
},
{
"link": "https://github.com/NikolaiT/se-scraper",
"title": "GitHub - NikolaiT/se-scraper: Javascript scraping …",
"snippet": "Search Engine Scraper - se-scraper. This node module allows you to scrape search engines concurrently with different proxies. If you don't have much technical experience or don't want to purchase proxies, you can use my scraping service.",
"visible_link": "https://github.com/NikolaiT/se-scraper",
"rank": 4
},
{
"link": "https://libraries.io/npm/se-scraper/1.0.1",
"title": "se-scraper 1.0.1 on npm - Libraries.io",
"snippet": "Search Engine Scraper - se-scraper. This node module allows you to scrape search engines concurrently with different proxies. If you don't have much technical experience or don't want to purchase proxies, you can use my scraping service.",
"visible_link": "https://libraries.io/npm/se-scraper/1.0.1",
"rank": 5
},
{
"link": "https://www.idealo.at/preisvergleich/OffersOfProduct/3071147_-multi-purpose-scraper-toko.html",
"title": "Toko Multi-Purpose Scraper ab € 4,48 | Preisvergleich bei ...",
"snippet": "Bereits ab € 4,48 Große Shopvielfalt Testberichte & Meinungen | Jetzt Toko Multi-Purpose Scraper Ski-Zubehör günstig kaufen bei idealo.at",
"visible_link": "https://www.idealo.at/preisvergleich/OffersOfProduct/3071147_-multi-purpose-scraper...",
"rank": 6
},
{
"link": "https://woerterbuch.reverso.net/franzosisch-definitionen/se+scraper",
"title": "se scraper Definition | Französisch Definition Wörterbuch ...",
"snippet": "Definition se scraper Franzosisch, Synonym und Antonym, Siehe auch 'scrapeur',scrap',scrapie',scalper'",
"visible_link": "https://woerterbuch.reverso.net/franzosisch-definitionen/se+scraper",
"rank": 7
},
{
"link": "https://www.idealo.de/preisvergleich/OffersOfProduct/3071147_-multi-purpose-scraper-toko.html",
"title": "Toko Multi-Purpose Scraper ab 3,99 € | Preisvergleich bei ...",
"snippet": "Ver­sand in­ner­halb von 3 Werk­ta­gen nach Zah­lungs­ein­gang.",
"visible_link": "https://www.idealo.de/preisvergleich/OffersOfProduct/3071147_-multi-purpose-scraper...",
"rank": 8
},
{
"link": "https://www.sonic-equipment.com/se/scraper-10233.html",
"title": "Scraper - sonic-equipment.com",
"snippet": "Universal scraper for removing sealants, filler, gaskets etc.",
"visible_link": "https://www.sonic-equipment.com/se/scraper-10233.html",
"rank": 9
}
]
}
}
}

View File

@ -1,7 +1,7 @@
{
"name": "se-scraper",
"version": "1.2.16",
"description": "A simple module using puppeteer to scrape several search engines such as Google, Duckduckgo, Bing or Baidu",
"version": "1.3.0",
"description": "A module using puppeteer to scrape several search engines such as Google, Duckduckgo, Bing or Baidu",
"homepage": "https://scrapeulous.com/",
"main": "index.js",
"scripts": {

48
run.js
View File

@ -8,22 +8,14 @@ let config = {
// how long to sleep between requests. a random sleep interval within the range [a,b]
// is drawn before every request. empty string for no sleeping.
sleep_range: '',
// which search engine to scrape
search_engine: 'baidu',
// whether debug information should be printed
// debug info is useful for developers when debugging
debug: true,
// whether verbose program output should be printed
// this output is informational
verbose: true,
// an array of keywords to scrape
keywords: ['cat', 'mouse'],
// alternatively you can specify a keyword_file. this overwrites the keywords array
keyword_file: '',
// the number of pages to scrape for each keyword
num_pages: 1,
// whether to start the browser in headless mode
headless: false,
headless: true,
// whether debug information should be printed
// level 0: print nothing
// level 1: print most important info
// ...
// level 4: print all shit nobody wants to know
debug_level: 1,
// specify flags passed to chrome here
chrome_flags: [],
// path to output file, data will be stored in JSON
@ -61,17 +53,19 @@ let config = {
}
};
function callback(err, response) {
if (err) { console.error(err) }
(async () => {
let scrape_config = {
// which search engine to scrape
search_engine: 'bing',
// an array of keywords to scrape
keywords: ['cat', 'mouse'],
// alternatively you can specify a keyword_file. this overwrites the keywords array
keyword_file: '',
// the number of pages to scrape for each keyword
num_pages: 2,
};
/* response object has the following properties:
let results = await se_scraper.scrape(config, scrape_config);
console.dir(results, {depth: null, colors: true});
})();
response.results - json object with the scraping results
response.metadata - json object with metadata information
response.statusCode - status code of the scraping process
*/
console.dir(response.results, {depth: null, colors: true});
}
se_scraper.scrape(config, callback);

View File

@ -3,168 +3,168 @@ const Scraper = require('./se_scraper');
class BingScraper extends Scraper {
parse(html) {
// load the page source into cheerio
const $ = cheerio.load(html);
parse(html) {
// load the page source into cheerio
const $ = cheerio.load(html);
// perform queries
const results = [];
$('#b_content #b_results .b_algo').each((i, link) => {
results.push({
link: $(link).find('h2 a').attr('href'),
title: $(link).find('h2').text(),
snippet: $(link).find('.b_caption p').text(),
visible_link: $(link).find('cite').text(),
})
});
// perform queries
const results = [];
$('#b_content #b_results .b_algo').each((i, link) => {
results.push({
link: $(link).find('h2 a').attr('href'),
title: $(link).find('h2').text(),
snippet: $(link).find('.b_caption p').text(),
visible_link: $(link).find('cite').text(),
})
});
// 'Including results for', 'Einschließlich Ergebnisse'
let no_results = this.no_results(
['There are no results', 'Es gibt keine Ergebnisse'],
$('#b_results').text()
);
// 'Including results for', 'Einschließlich Ergebnisse'
let no_results = this.no_results(
['There are no results', 'Es gibt keine Ergebnisse'],
$('#b_results').text()
);
let effective_query = $('#sp_requery a').first().text() || '';
let effective_query = $('#sp_requery a').first().text() || '';
const cleaned = [];
for (var i=0; i < results.length; i++) {
let res = results[i];
if (res.link && res.link.trim() && res.title && res.title.trim()) {
res.rank = this.result_rank++;
cleaned.push(res);
}
}
const cleaned = [];
for (var i=0; i < results.length; i++) {
let res = results[i];
if (res.link && res.link.trim() && res.title && res.title.trim()) {
res.rank = this.result_rank++;
cleaned.push(res);
}
}
return {
time: (new Date()).toUTCString(),
no_results: no_results,
effective_query: effective_query,
num_results: $('#b_content .sb_count').text(),
results: cleaned,
}
}
return {
time: (new Date()).toUTCString(),
no_results: no_results,
effective_query: effective_query,
num_results: $('#b_content .sb_count').text(),
results: cleaned,
}
}
async load_start_page() {
let startUrl = this.build_start_url('https://www.bing.com/search?') || 'https://www.bing.com/';
async load_start_page() {
let startUrl = this.build_start_url('https://www.bing.com/search?') || 'https://www.bing.com/';
try {
await this.page.goto(startUrl);
await this.page.waitForSelector('input[name="q"]', { timeout: 5000 });
} catch (e) {
return false;
}
return true;
}
try {
await this.page.goto(startUrl);
await this.page.waitForSelector('input[name="q"]', { timeout: 5000 });
} catch (e) {
return false;
}
return true;
}
async search_keyword(keyword) {
const input = await this.page.$('input[name="q"]');
await this.set_input_value(`input[name="q"]`, keyword);
await this.sleep(50);
await input.focus();
await this.page.keyboard.press("Enter");
}
async search_keyword(keyword) {
const input = await this.page.$('input[name="q"]');
await this.set_input_value(`input[name="q"]`, keyword);
await this.sleep(50);
await input.focus();
await this.page.keyboard.press("Enter");
}
async next_page() {
let next_page_link = await this.page.$('.sb_pagN', {timeout: 1000});
if (!next_page_link) {
return false;
}
await next_page_link.click();
await this.page.waitForNavigation();
async next_page() {
let next_page_link = await this.page.$('.sb_pagN', {timeout: 1000});
if (!next_page_link) {
return false;
}
await next_page_link.click();
await this.page.waitForNavigation();
return true;
}
return true;
}
async wait_for_results() {
await this.page.waitForSelector('#b_content', { timeout: 5000 });
await this.sleep(750);
}
async wait_for_results() {
await this.page.waitForSelector('#b_content', { timeout: 5000 });
await this.sleep(750);
}
async detected() {
// TODO: I was actually never detected by bing. those are good boys.
}
async detected() {
// TODO: I was actually never detected by bing. those are good boys.
}
}
class BingNewsScraper extends Scraper {
parse(html) {
// load the page source into cheerio
const $ = cheerio.load(html);
parse(html) {
// load the page source into cheerio
const $ = cheerio.load(html);
// perform queries
const results = [];
$('#algocore .newsitem').each((i, link) => {
results.push({
link: $(link).attr('url'),
title: $(link).find('a.title').text(),
snippet: $(link).find('.snippet').text(),
date: $(link).find('.source span').last().text(),
})
});
// perform queries
const results = [];
$('#algocore .newsitem').each((i, link) => {
results.push({
link: $(link).attr('url'),
title: $(link).find('a.title').text(),
snippet: $(link).find('.snippet').text(),
date: $(link).find('.source span').last().text(),
})
});
const cleaned = [];
for (var i=0; i < results.length; i++) {
let res = results[i];
if (res.link && res.link.trim() && res.title && res.title.trim()) {
res.rank = this.result_rank++;
cleaned.push(res);
}
}
const cleaned = [];
for (var i=0; i < results.length; i++) {
let res = results[i];
if (res.link && res.link.trim() && res.title && res.title.trim()) {
res.rank = this.result_rank++;
cleaned.push(res);
}
}
return {
time: (new Date()).toUTCString(),
results: cleaned,
}
}
return {
time: (new Date()).toUTCString(),
results: cleaned,
}
}
async load_start_page() {
async load_start_page() {
let startUrl = 'https://www.bing.com/news/search?';
try {
await this.page.goto(startUrl);
if (this.config.set_manual_settings === true) {
console.log('Sleeping 30 seconds. Set your settings now.');
await this.sleep(30000);
}
await this.page.waitForSelector('input[name="q"]', { timeout: 5000 });
} catch (e) {
return false;
}
try {
await this.page.goto(startUrl);
if (this.config.set_manual_settings === true) {
console.log('Sleeping 30 seconds. Set your settings now.');
await this.sleep(30000);
}
await this.page.waitForSelector('input[name="q"]', { timeout: 5000 });
} catch (e) {
return false;
}
return true;
}
return true;
}
async search_keyword(keyword) {
const input = await this.page.$('input[name="q"]');
await this.set_input_value(`input[name="q"]`, keyword);
await this.sleep(50);
await input.focus();
await this.page.keyboard.press("Enter");
}
async search_keyword(keyword) {
const input = await this.page.$('input[name="q"]');
await this.set_input_value(`input[name="q"]`, keyword);
await this.sleep(50);
await input.focus();
await this.page.keyboard.press("Enter");
}
async next_page() {
let next_page_link = await this.page.$('.sb_pagN', {timeout: 1000});
if (!next_page_link) {
return false;
}
await next_page_link.click();
await this.page.waitForNavigation();
async next_page() {
let next_page_link = await this.page.$('.sb_pagN', {timeout: 1000});
if (!next_page_link) {
return false;
}
await next_page_link.click();
await this.page.waitForNavigation();
return true;
}
return true;
}
async wait_for_results() {
await this.page.waitForSelector('#news', { timeout: 5000 });
await this.sleep(2000);
}
async wait_for_results() {
await this.page.waitForSelector('#news', { timeout: 5000 });
await this.sleep(2000);
}
async detected() {
// TODO: I was actually never detected by bing news.
}
async detected() {
// TODO: I was actually never detected by bing news.
}
}
module.exports = {
BingNewsScraper: BingNewsScraper,
BingScraper: BingScraper,
BingNewsScraper: BingNewsScraper,
BingScraper: BingScraper,
};

21
src/modules/common.js Normal file
View File

@ -0,0 +1,21 @@
function log(config, loglevel, msg = null, cb = null) {
if (typeof loglevel != "number") {
throw Error('loglevel must be numeric.');
}
if (loglevel <= config.debug_level) {
if (msg) {
if (typeof msg == 'object') {
console.dir(msg, {depth: null, colors: true});
} else {
console.log('[i] ' + msg);
}
} else if (cb) {
cb();
}
}
}
module.exports = {
log: log,
};

View File

@ -1,5 +1,7 @@
const cheerio = require('cheerio');
const Scraper = require('./se_scraper');
const common = require('./common.js');
var log = common.log;
class GoogleScraper extends Scraper {
@ -7,53 +9,53 @@ class GoogleScraper extends Scraper {
super(...args);
}
parse(html) {
// load the page source into cheerio
const $ = cheerio.load(html);
parse(html) {
// load the page source into cheerio
const $ = cheerio.load(html);
// perform queries
const results = [];
$('#center_col .g').each((i, link) => {
results.push({
link: $(link).find('.r a').attr('href'),
title: $(link).find('.r a').text(),
snippet: $(link).find('span.st').text(),
visible_link: $(link).find('.r cite').text(),
date: $(link).find('span.f').text() || '',
})
});
// perform queries
const results = [];
$('#center_col .g').each((i, link) => {
results.push({
link: $(link).find('.r a').attr('href'),
title: $(link).find('.r a').text(),
snippet: $(link).find('span.st').text(),
visible_link: $(link).find('.r cite').text(),
date: $(link).find('span.f').text() || '',
})
});
// 'Ergebnisse für', 'Showing results for'
let no_results = this.no_results(
['Es wurden keine mit deiner Suchanfrage', 'did not match any documents', 'Keine Ergebnisse für',
'No results found for'],
$('#main').text()
);
// 'Ergebnisse für', 'Showing results for'
let no_results = this.no_results(
['Es wurden keine mit deiner Suchanfrage', 'did not match any documents', 'Keine Ergebnisse für',
'No results found for'],
$('#main').text()
);
let effective_query = $('#fprsl').text() || '';
if (!effective_query) {
effective_query = $('#fprs a').text()
}
let effective_query = $('#fprsl').text() || '';
if (!effective_query) {
effective_query = $('#fprs a').text()
}
const cleaned = [];
for (var i=0; i < results.length; i++) {
let res = results[i];
if (res.link && res.link.trim() && res.title && res.title.trim()) {
res.rank = this.result_rank++;
cleaned.push(res);
}
}
const cleaned = [];
for (var i=0; i < results.length; i++) {
let res = results[i];
if (res.link && res.link.trim() && res.title && res.title.trim()) {
res.rank = this.result_rank++;
cleaned.push(res);
}
}
return {
time: (new Date()).toUTCString(),
num_results: $('#resultStats').text(),
no_results: no_results,
effective_query: effective_query,
results: cleaned
}
}
return {
time: (new Date()).toUTCString(),
num_results: $('#resultStats').text(),
no_results: no_results,
effective_query: effective_query,
results: cleaned
}
}
async load_start_page() {
async load_start_page() {
let startUrl = 'https://www.google.com';
if (this.config.google_settings) {
@ -71,357 +73,355 @@ class GoogleScraper extends Scraper {
}
}
if (this.config.verbose) {
console.log('Using startUrl: ' + startUrl);
log(this.config, 1, 'Using startUrl: ' + startUrl);
await this.page.goto(startUrl);
try {
await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
} catch (e) {
return false;
}
await this.page.goto(startUrl);
return true;
}
try {
await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
} catch (e) {
return false;
}
async search_keyword(keyword) {
const input = await this.page.$('input[name="q"]');
await this.set_input_value(`input[name="q"]`, keyword);
await this.sleep(50);
await input.focus();
await this.page.keyboard.press("Enter");
}
return true;
}
async next_page() {
let next_page_link = await this.page.$('#pnnext', {timeout: 1000});
if (!next_page_link) {
return false;
}
await next_page_link.click();
async search_keyword(keyword) {
const input = await this.page.$('input[name="q"]');
await this.set_input_value(`input[name="q"]`, keyword);
await this.sleep(50);
await input.focus();
await this.page.keyboard.press("Enter");
}
return true;
}
async next_page() {
let next_page_link = await this.page.$('#pnnext', {timeout: 1000});
if (!next_page_link) {
return false;
}
await next_page_link.click();
async wait_for_results() {
await this.page.waitForSelector('#center_col', { timeout: this.STANDARD_TIMEOUT });
await this.sleep(500);
}
return true;
}
async wait_for_results() {
await this.page.waitForSelector('#center_col', { timeout: this.STANDARD_TIMEOUT });
await this.sleep(500);
}
async detected() {
const title = await this.page.title();
let html = await this.page.content();
return html.indexOf('detected unusual traffic') !== -1 || title.indexOf('/sorry/') !== -1;
}
async detected() {
const title = await this.page.title();
let html = await this.page.content();
return html.indexOf('detected unusual traffic') !== -1 || title.indexOf('/sorry/') !== -1;
}
}
class GoogleNewsOldScraper extends Scraper {
parse(html) {
const $ = cheerio.load(html);
// perform queries
const results = [];
parse(html) {
const $ = cheerio.load(html);
// perform queries
const results = [];
$('.g').each((i, result) => {
results.push({
link: $(result).find('h3 a').attr('href'),
title: $(result).find('h3 a').text(),
snippet: $(result).find('.st').text(),
date: $(result).find('.nsa').text(),
})
});
$('.g').each((i, result) => {
results.push({
link: $(result).find('h3 a').attr('href'),
title: $(result).find('h3 a').text(),
snippet: $(result).find('.st').text(),
date: $(result).find('.nsa').text(),
})
});
let no_results = this.no_results(
['Es wurden keine mit deiner Suchanfrage', 'did not match any documents', 'Keine Ergebnisse für',
'No results found for', 'did not match any news results'],
$('#main').text()
);
let no_results = this.no_results(
['Es wurden keine mit deiner Suchanfrage', 'did not match any documents', 'Keine Ergebnisse für',
'No results found for', 'did not match any news results'],
$('#main').text()
);
let effective_query = $('#fprsl').text() || '';
if (!effective_query) {
effective_query = $('#fprs a').text()
}
let effective_query = $('#fprsl').text() || '';
if (!effective_query) {
effective_query = $('#fprs a').text()
}
const cleaned = [];
for (var i=0; i < results.length; i++) {
let res = results[i];
if (res.link && res.link.trim()) {
res.rank = this.result_rank++;
cleaned.push(res);
}
}
const cleaned = [];
for (var i=0; i < results.length; i++) {
let res = results[i];
if (res.link && res.link.trim()) {
res.rank = this.result_rank++;
cleaned.push(res);
}
}
return {
time: (new Date()).toUTCString(),
results: cleaned,
no_results: no_results,
effective_query: effective_query,
}
}
return {
time: (new Date()).toUTCString(),
results: cleaned,
no_results: no_results,
effective_query: effective_query,
}
}
async load_start_page() {
let startUrl = this.build_start_url('https://www.google.com/search?source=lnms&tbm=nws&') || 'https://www.google.com/search?source=lnms&tbm=nws';
async load_start_page() {
let startUrl = this.build_start_url('https://www.google.com/search?source=lnms&tbm=nws&') || 'https://www.google.com/search?source=lnms&tbm=nws';
try {
await this.page.goto(startUrl);
await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
} catch (e) {
return false;
}
return true;
}
try {
await this.page.goto(startUrl);
await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
} catch (e) {
return false;
}
return true;
}
async search_keyword(keyword) {
let url = this.build_start_url(`https://www.google.com/search?q=${keyword}&source=lnms&tbm=nws&`) ||
`https://www.google.com/search?q=${keyword}&hl=en&source=lnms&tbm=nws`;
await this.page.goto(url, {
referer: 'https://www.google.com/'
});
await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
}
async search_keyword(keyword) {
let url = this.build_start_url(`https://www.google.com/search?q=${keyword}&source=lnms&tbm=nws&`) ||
`https://www.google.com/search?q=${keyword}&hl=en&source=lnms&tbm=nws`;
async next_page() {
let next_page_link = await this.page.$('#pnnext', {timeout: 1000});
if (!next_page_link) {
return false;
}
await next_page_link.click();
await this.page.goto(url, {
referer: 'https://www.google.com/'
});
return true;
}
await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
}
async wait_for_results() {
await this.page.waitForSelector('#main', { timeout: this.STANDARD_TIMEOUT });
await this.sleep(500);
}
async next_page() {
let next_page_link = await this.page.$('#pnnext', {timeout: 1000});
if (!next_page_link) {
return false;
}
await next_page_link.click();
async detected() {
const title = await this.page.title();
let html = await this.page.content();
return html.indexOf('detected unusual traffic') !== -1 || title.indexOf('/sorry/') !== -1;
}
return true;
}
async wait_for_results() {
await this.page.waitForSelector('#main', { timeout: this.STANDARD_TIMEOUT });
await this.sleep(500);
}
async detected() {
const title = await this.page.title();
let html = await this.page.content();
return html.indexOf('detected unusual traffic') !== -1 || title.indexOf('/sorry/') !== -1;
}
}
class GoogleImageScraper extends Scraper {
parse(html) {
// load the page source into cheerio
const $ = cheerio.load(html);
parse(html) {
// load the page source into cheerio
const $ = cheerio.load(html);
// perform queries
const results = [];
$('.rg_bx').each((i, link) => {
let link_element = $(link).find('a.rg_l').attr('href');
let clean_link = clean_image_url(link_element);
results.push({
link: link_element,
clean_link: clean_link,
snippet: $(link).find('.a-no-hover-decoration').text(),
})
});
// perform queries
const results = [];
$('.rg_bx').each((i, link) => {
let link_element = $(link).find('a.rg_l').attr('href');
let clean_link = clean_image_url(link_element);
results.push({
link: link_element,
clean_link: clean_link,
snippet: $(link).find('.a-no-hover-decoration').text(),
})
});
let no_results = this.no_results(
['stimmt mit keinem Bildergebnis', 'Keine Ergebnisse für', 'not match any image results', 'No results found for',],
$('#main').text()
);
let no_results = this.no_results(
['stimmt mit keinem Bildergebnis', 'Keine Ergebnisse für', 'not match any image results', 'No results found for',],
$('#main').text()
);
let effective_query = $('#fprsl').text() || '';
if (!effective_query) {
effective_query = $('#fprs a').text();
}
let effective_query = $('#fprsl').text() || '';
if (!effective_query) {
effective_query = $('#fprs a').text();
}
const cleaned = [];
for (var i=0; i < results.length; i++) {
let res = results[i];
if (res.link && res.link.trim() && res.link.trim().length > 10) {
res.link = res.link.trim();
res.rank = this.result_rank++;
cleaned.push(res);
}
}
const cleaned = [];
for (var i=0; i < results.length; i++) {
let res = results[i];
if (res.link && res.link.trim() && res.link.trim().length > 10) {
res.link = res.link.trim();
res.rank = this.result_rank++;
cleaned.push(res);
}
}
return {
time: (new Date()).toUTCString(),
no_results: no_results,
results: cleaned,
effective_query: effective_query
}
}
return {
time: (new Date()).toUTCString(),
no_results: no_results,
results: cleaned,
effective_query: effective_query
}
}
async load_start_page() {
try {
await this.page.goto(`https://www.google.com/imghp?tbm=isch`, {
referer: 'https://www.google.com/'
});
await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
} catch (e) {
return false;
}
return true;
}
async load_start_page() {
try {
await this.page.goto(`https://www.google.com/imghp?tbm=isch`, {
referer: 'https://www.google.com/'
});
await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
} catch (e) {
return false;
}
return true;
}
async search_keyword(keyword) {
const input = await this.page.$('input[name="q"]');
await this.set_input_value(`input[name="q"]`, keyword);
await this.sleep(50);
await input.focus();
await this.page.keyboard.press("Enter");
}
async search_keyword(keyword) {
const input = await this.page.$('input[name="q"]');
await this.set_input_value(`input[name="q"]`, keyword);
await this.sleep(50);
await input.focus();
await this.page.keyboard.press("Enter");
}
async next_page() {
return false;
}
async next_page() {
return false;
}
async wait_for_results() {
await this.page.waitForSelector('#main', { timeout: this.STANDARD_TIMEOUT });
await this.sleep(500);
}
async wait_for_results() {
await this.page.waitForSelector('#main', { timeout: this.STANDARD_TIMEOUT });
await this.sleep(500);
}
async detected() {
const title = await this.page.title();
let html = await this.page.content();
return html.indexOf('detected unusual traffic') !== -1 || title.indexOf('/sorry/') !== -1;
}
async detected() {
const title = await this.page.title();
let html = await this.page.content();
return html.indexOf('detected unusual traffic') !== -1 || title.indexOf('/sorry/') !== -1;
}
}
class GoogleNewsScraper extends Scraper {
parse(html) {
const $ = cheerio.load(html);
// perform queries
const results = [];
parse(html) {
const $ = cheerio.load(html);
// perform queries
const results = [];
$('article h3').each((i, headline) => {
$('article h3').each((i, headline) => {
let title = $(headline).find('a span').text();
let title = $(headline).find('a span').text();
try {
var snippet = $(headline).parent().find('p').text();
var link = $(headline).find('a').attr('href');
var date = $(headline).parent().parent().parent().find('time').text();
var ts = $(headline).parent().parent().parent().find('time').attr('datetime');
} catch(e) {
try {
var snippet = $(headline).parent().find('p').text();
var link = $(headline).find('a').attr('href');
var date = $(headline).parent().parent().parent().find('time').text();
var ts = $(headline).parent().parent().parent().find('time').attr('datetime');
} catch(e) {
}
}
if (!this.all_results.has(title)) {
results.push({
rank: i+1,
title: title,
snippet: snippet,
link: link,
date: date,
ts: ts,
});
}
this.all_results.add(title);
});
if (!this.all_results.has(title)) {
results.push({
rank: i+1,
title: title,
snippet: snippet,
link: link,
date: date,
ts: ts,
});
}
this.all_results.add(title);
});
let no_results = this.no_results(
['Es wurden keine mit deiner Suchanfrage', 'did not match any documents', 'Keine Ergebnisse für',
'No results found for', 'did not match any news results'],
$('body').text()
);
let no_results = this.no_results(
['Es wurden keine mit deiner Suchanfrage', 'did not match any documents', 'Keine Ergebnisse für',
'No results found for', 'did not match any news results'],
$('body').text()
);
let effective_query = $('#fprsl').text() || '';
let effective_query = $('#fprsl').text() || '';
const cleaned = [];
for (var i=0; i < results.length; i++) {
let res = results[i];
if (res.title && res.title.trim()) {
res.rank = this.result_rank++;
cleaned.push(res);
}
}
const cleaned = [];
for (var i=0; i < results.length; i++) {
let res = results[i];
if (res.title && res.title.trim()) {
res.rank = this.result_rank++;
cleaned.push(res);
}
}
return {
time: (new Date()).toUTCString(),
results: cleaned,
no_results: no_results,
effective_query: effective_query,
}
}
return {
time: (new Date()).toUTCString(),
results: cleaned,
no_results: no_results,
effective_query: effective_query,
}
}
async load_start_page() {
try {
this.all_results = new Set();
await this.page.goto(`https://news.google.com/?hl=en-US&gl=US&ceid=US:en`, {
referer: 'https://news.google.com'
});
await this.page.waitForSelector('div input:nth-child(2)', {timeout: this.STANDARD_TIMEOUT});
await this.sleep(1000);
async load_start_page() {
try {
this.all_results = new Set();
await this.page.goto(`https://news.google.com/?hl=en-US&gl=US&ceid=US:en`, {
referer: 'https://news.google.com'
});
await this.page.waitForSelector('div input:nth-child(2)', {timeout: this.STANDARD_TIMEOUT});
await this.sleep(1000);
// parse here front page results
let html = await this.page.content();
this.results['frontpage'] = this.parse(html);
this.result_rank = 1;
} catch(e) {
return false;
}
return true;
}
// parse here front page results
let html = await this.page.content();
this.results['frontpage'] = this.parse(html);
this.result_rank = 1;
} catch(e) {
return false;
}
return true;
}
async search_keyword(keyword) {
await this.page.waitForSelector('div input:nth-child(2)', { timeout: this.STANDARD_TIMEOUT });
const input = await this.page.$('div input:nth-child(2)');
// overwrites last text in input
await input.click({ clickCount: 3 });
await input.type(keyword);
await this.sleep(50);
await input.focus();
await this.page.keyboard.press("Enter");
}
async search_keyword(keyword) {
await this.page.waitForSelector('div input:nth-child(2)', { timeout: this.STANDARD_TIMEOUT });
const input = await this.page.$('div input:nth-child(2)');
// overwrites last text in input
await input.click({ clickCount: 3 });
await input.type(keyword);
await this.sleep(50);
await input.focus();
await this.page.keyboard.press("Enter");
}
async next_page() {
// google news app does not have next pages
return false;
}
async next_page() {
// google news app does not have next pages
return false;
}
async wait_for_results() {
await this.page.waitForSelector(`[data-n-q="${this.keyword}"]`, { timeout: this.STANDARD_TIMEOUT });
await this.sleep(2000);
}
async wait_for_results() {
await this.page.waitForSelector(`[data-n-q="${this.keyword}"]`, { timeout: this.STANDARD_TIMEOUT });
await this.sleep(2000);
}
async detected() {
const title = await this.page.title();
let html = await this.page.content();
return html.indexOf('detected unusual traffic') !== -1 || title.indexOf('/sorry/') !== -1;
}
async detected() {
const title = await this.page.title();
let html = await this.page.content();
return html.indexOf('detected unusual traffic') !== -1 || title.indexOf('/sorry/') !== -1;
}
}
function clean_image_url(url) {
// Example:
// https://www.google.com/imgres?imgurl=https%3A%2F%2Fupload.wikimedia.org%2Fwikipedia%2Fen%2Fthumb%2Ff%2Ffd%2F1928_Edward_Campbell.jpg%2F220px-1928_Edward_Campbell.jpg&imgrefurl=https%3A%2F%2Fwww.revolvy.com%2Fpage%2FSir-Edward-Campbell%252C-1st-Baronet&docid=BMkW_GerTIY4GM&tbnid=TmQapIxDCQbQhM%3A&vet=10ahUKEwje_LLE_YXeAhXisaQKHVAEBSAQMwiNAShEMEQ..i&w=220&h=290&bih=1696&biw=1280&q=John%20MacLeod%20Breadalbane%20Councillor%20Prince%20Edward%20Island&ved=0ahUKEwje_LLE_YXeAhXisaQKHVAEBSAQMwiNAShEMEQ&iact=mrc&uact=8
const regex = /imgurl=(.*?)&/gm;
let match = regex.exec(url);
if (match !== null) {
return decodeURIComponent(match[1]);
}
// Example:
// https://www.google.com/imgres?imgurl=https%3A%2F%2Fupload.wikimedia.org%2Fwikipedia%2Fen%2Fthumb%2Ff%2Ffd%2F1928_Edward_Campbell.jpg%2F220px-1928_Edward_Campbell.jpg&imgrefurl=https%3A%2F%2Fwww.revolvy.com%2Fpage%2FSir-Edward-Campbell%252C-1st-Baronet&docid=BMkW_GerTIY4GM&tbnid=TmQapIxDCQbQhM%3A&vet=10ahUKEwje_LLE_YXeAhXisaQKHVAEBSAQMwiNAShEMEQ..i&w=220&h=290&bih=1696&biw=1280&q=John%20MacLeod%20Breadalbane%20Councillor%20Prince%20Edward%20Island&ved=0ahUKEwje_LLE_YXeAhXisaQKHVAEBSAQMwiNAShEMEQ&iact=mrc&uact=8
const regex = /imgurl=(.*?)&/gm;
let match = regex.exec(url);
if (match !== null) {
return decodeURIComponent(match[1]);
}
}
function clean_google_url(url) {
// Example:
// /url?q=https://www.zeit.de/thema/donald-trump&sa=U&ved=0ahUKEwiL9-u-_ZLgAhVJsqQKHeITDAoQFgg0MAc&usg=AOvVaw3JV3UZjTXRwaS2I-sBbeXF
// /search?q=trump&hl=de&gbv=2&ie=UTF-8&prmd=ivns&source=univ&tbm=nws&tbo=u&sa=X&ved=0ahUKEwiL9-u-_ZLgAhVJsqQKHeITDAoQqAIIFA
const regex = /url\?q=(.*?)&/gm;
let match = regex.exec(url);
if (match !== null) {
return decodeURIComponent(match[1]);
} else {
return url;
}
// Example:
// /url?q=https://www.zeit.de/thema/donald-trump&sa=U&ved=0ahUKEwiL9-u-_ZLgAhVJsqQKHeITDAoQFgg0MAc&usg=AOvVaw3JV3UZjTXRwaS2I-sBbeXF
// /search?q=trump&hl=de&gbv=2&ie=UTF-8&prmd=ivns&source=univ&tbm=nws&tbo=u&sa=X&ved=0ahUKEwiL9-u-_ZLgAhVJsqQKHeITDAoQqAIIFA
const regex = /url\?q=(.*?)&/gm;
let match = regex.exec(url);
if (match !== null) {
return decodeURIComponent(match[1]);
} else {
return url;
}
}
module.exports = {
GoogleNewsOldScraper: GoogleNewsOldScraper,
GoogleScraper: GoogleScraper,
GoogleImageScraper: GoogleImageScraper,
GoogleNewsScraper: GoogleNewsScraper,
GoogleNewsOldScraper: GoogleNewsOldScraper,
GoogleScraper: GoogleScraper,
GoogleImageScraper: GoogleImageScraper,
GoogleNewsScraper: GoogleNewsScraper,
};

View File

@ -1,31 +1,31 @@
const cheerio = require('cheerio');
module.exports = {
get_ip_data: get_ip_data,
get_http_headers: get_http_headers,
get_ip_data: get_ip_data,
get_http_headers: get_http_headers,
};
async function get_ip_data(page) {
await page.goto('https://ipinfo.io/json', {
waitLoad: true,
waitNetworkIdle: true
});
let json = await page.content({
timeout: 20000
});
const $ = cheerio.load(json);
let ipinfo_text = $('pre').text();
return JSON.parse(ipinfo_text);
await page.goto('https://ipinfo.io/json', {
waitLoad: true,
waitNetworkIdle: true
});
let json = await page.content({
timeout: 20000
});
const $ = cheerio.load(json);
let ipinfo_text = $('pre').text();
return JSON.parse(ipinfo_text);
}
async function get_http_headers(page) {
await page.goto('https://httpbin.org/get', {
waitLoad: true,
waitNetworkIdle: true
});
let headers = await page.content();
await page.goto('https://httpbin.org/get', {
waitLoad: true,
waitNetworkIdle: true
});
let headers = await page.content();
const $ = cheerio.load(headers);
let headers_text = $('pre').text();
return JSON.parse(headers_text);
const $ = cheerio.load(headers);
let headers_text = $('pre').text();
return JSON.parse(headers_text);
}

View File

@ -1,4 +1,6 @@
const meta = require('./metadata.js');
const common = require('./common.js');
var log = common.log;
/*
Get useful JS knowledge and get awesome...
@ -118,13 +120,15 @@ module.exports = class Scraper {
// check that our proxy is working by confirming
// that ipinfo.io sees the proxy IP address
if (this.proxy && this.config.log_ip_address === true) {
console.log(`${this.metadata.ipinfo.ip} vs ${this.proxy}`);
log(this.config, 3, `${this.metadata.ipinfo.ip} vs ${this.proxy}`);
try {
// if the ip returned by ipinfo is not a substring of our proxystring, get the heck outta here
if (!this.proxy.includes(this.metadata.ipinfo.ip)) {
console.error('Proxy not working properly.');
return false;
} else {
log(this.config, 1, `Using valid Proxy: ${this.proxy}`);
}
} catch (exception) {
}
@ -152,7 +156,7 @@ module.exports = class Scraper {
this.results[keyword] = {};
this.result_rank = 1;
if (this.pluggable.before_keyword_scraped) {
if (this.pluggable && this.pluggable.before_keyword_scraped) {
await this.pluggable.before_keyword_scraped({
results: this.results,
num_keywords: this.num_keywords,
@ -175,9 +179,7 @@ module.exports = class Scraper {
do {
if (this.config.verbose === true) {
console.log(`${this.config.search_engine} scrapes keyword "${keyword}" on page ${page_num}`);
}
log(this.config, 1, `${this.config.search_engine} scrapes keyword "${keyword}" on page ${page_num}`);
await this.wait_for_results();
@ -247,9 +249,7 @@ module.exports = class Scraper {
baseUrl += `${key}=${settings[key]}&`
}
if (this.config.verbose) {
console.log('Using startUrl: ' + baseUrl);
}
log(this.config, 1, 'Using startUrl: ' + baseUrl);
return baseUrl;
}
@ -266,9 +266,7 @@ module.exports = class Scraper {
async random_sleep() {
const [min, max] = this.config.sleep_range;
let rand = Math.floor(Math.random() * (max - min + 1) + min); //Generate Random number
if (this.config.verbose === true) {
console.log(`Sleeping for ${rand}s`);
}
log(this.config, 1, `Sleeping for ${rand}s`);
await this.sleep(rand * 1000);
}
@ -282,9 +280,7 @@ module.exports = class Scraper {
no_results(needles, html) {
for (let needle of needles) {
if (html.includes(needle)) {
if (this.config.debug) {
console.log(`HTML contains needle ${needle}. no_results=true`);
}
console.log(this.config, 2, `HTML contains needle ${needle}. no_results=true`);
return true;
}
}

View File

@ -1,81 +1,81 @@
module.exports = {
random_user_agent: random_user_agent,
random_user_agent: random_user_agent,
};
function random_user_agent() {
let rand = user_agents[Math.floor(Math.random()*user_agents.length)];
let rand = user_agents[Math.floor(Math.random()*user_agents.length)];
}
// updated: 29 Jan 2019
const user_agents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.2 Safari/605.1.15',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134',
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.2 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 YaBrowser/18.11.1.805 Yowser/2.5 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/18.17763',
'Mozilla/5.0 (iPad; CPU OS 12_1_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.1 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 OPR/57.0.3098.116',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 YaBrowser/18.11.1.805 Yowser/2.5 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 OPR/57.0.3098.106',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.2 Safari/605.1.15',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.3 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/71.0.3578.98 Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; Touch; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 6.1; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 OPR/57.0.3098.116',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.3 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0',
'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/71.0.3578.80 Chrome/71.0.3578.80 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36',
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:63.0) Gecko/20100101 Firefox/63.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8',
'Mozilla/5.0 (Windows NT 10.0; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0',
'Mozilla/5.0 (X11; CrOS x86_64 11151.59.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.94 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.2 Safari/605.1.15',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134',
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.2 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 YaBrowser/18.11.1.805 Yowser/2.5 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/18.17763',
'Mozilla/5.0 (iPad; CPU OS 12_1_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.1 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 OPR/57.0.3098.116',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 YaBrowser/18.11.1.805 Yowser/2.5 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 OPR/57.0.3098.106',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.2 Safari/605.1.15',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.3 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/71.0.3578.98 Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; Touch; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 6.1; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 OPR/57.0.3098.116',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.3 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0',
'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/71.0.3578.80 Chrome/71.0.3578.80 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36',
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:63.0) Gecko/20100101 Firefox/63.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8',
'Mozilla/5.0 (Windows NT 10.0; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0',
'Mozilla/5.0 (X11; CrOS x86_64 11151.59.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.94 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
];

View File

@ -3,103 +3,103 @@ const Scraper = require('./se_scraper');
class YoutubeScraper extends Scraper {
parse(html) {
// load the page source into cheerio
const $ = cheerio.load(html);
parse(html) {
// load the page source into cheerio
const $ = cheerio.load(html);
// perform queries
const results = [];
$('#contents ytd-video-renderer,#contents ytd-grid-video-renderer').each((i, link) => {
results.push({
link: $(link).find('#video-title').attr('href'),
title: $(link).find('#video-title').text(),
snippet: $(link).find('#description-text').text(),
channel: $(link).find('#byline a').text(),
channel_link: $(link).find('#byline a').attr('href'),
num_views: $(link).find('#metadata-line span:nth-child(1)').text(),
release_date: $(link).find('#metadata-line span:nth-child(2)').text(),
})
});
// perform queries
const results = [];
$('#contents ytd-video-renderer,#contents ytd-grid-video-renderer').each((i, link) => {
results.push({
link: $(link).find('#video-title').attr('href'),
title: $(link).find('#video-title').text(),
snippet: $(link).find('#description-text').text(),
channel: $(link).find('#byline a').text(),
channel_link: $(link).find('#byline a').attr('href'),
num_views: $(link).find('#metadata-line span:nth-child(1)').text(),
release_date: $(link).find('#metadata-line span:nth-child(2)').text(),
})
});
let no_results = this.no_results(
['No results found', 'Keine Ergebnisse', 'Es werden Ergebnisse angezeigt', 'Showing results for' ],
$('yt-showing-results-for-renderer').text()
);
let no_results = this.no_results(
['No results found', 'Keine Ergebnisse', 'Es werden Ergebnisse angezeigt', 'Showing results for' ],
$('yt-showing-results-for-renderer').text()
);
let effective_query = $('#corrected-link').text() || '';
let effective_query = $('#corrected-link').text() || '';
const cleaned = [];
for (var i=0; i < results.length; i++) {
let res = results[i];
if (res.link && res.link.trim() && res.title && res.title.trim()) {
res.title = res.title.trim();
res.snippet = res.snippet.trim();
res.rank = this.result_rank++;
const cleaned = [];
for (var i=0; i < results.length; i++) {
let res = results[i];
if (res.link && res.link.trim() && res.title && res.title.trim()) {
res.title = res.title.trim();
res.snippet = res.snippet.trim();
res.rank = this.result_rank++;
// check if this result has been used before
if (this.all_videos.has(res.title) === false) {
cleaned.push(res);
}
this.all_videos.add(res.title);
}
}
// check if this result has been used before
if (this.all_videos.has(res.title) === false) {
cleaned.push(res);
}
this.all_videos.add(res.title);
}
}
return {
time: (new Date()).toUTCString(),
no_results: no_results,
effective_query: effective_query,
num_results: '',
results: cleaned,
}
}
return {
time: (new Date()).toUTCString(),
no_results: no_results,
effective_query: effective_query,
num_results: '',
results: cleaned,
}
}
async load_start_page() {
try {
this.all_videos = new Set();
await this.page.goto('https://www.youtube.com', {
referer: 'https://google.com'
});
await this.page.waitForSelector('input[id="search"]', { timeout: 5000 });
// before we do anything, parse the results of the front page of youtube
await this.page.waitForSelector('ytd-video-renderer,ytd-grid-video-renderer', { timeout: 10000 });
await this.sleep(500);
let html = await this.page.content();
this.results['frontpage'] = this.parse(html);
this.result_rank = 1;
} catch(e) {
return false;
}
return true;
}
async load_start_page() {
try {
this.all_videos = new Set();
await this.page.goto('https://www.youtube.com', {
referer: 'https://google.com'
});
await this.page.waitForSelector('input[id="search"]', { timeout: 5000 });
// before we do anything, parse the results of the front page of youtube
await this.page.waitForSelector('ytd-video-renderer,ytd-grid-video-renderer', { timeout: 10000 });
await this.sleep(500);
let html = await this.page.content();
this.results['frontpage'] = this.parse(html);
this.result_rank = 1;
} catch(e) {
return false;
}
return true;
}
async search_keyword(keyword) {
const input = await this.page.$('input[id="search"]');
// overwrites last text in input
await input.click({ clickCount: 3 });
await input.type(keyword);
await input.focus();
await this.page.keyboard.press("Enter");
}
async search_keyword(keyword) {
const input = await this.page.$('input[id="search"]');
// overwrites last text in input
await input.click({ clickCount: 3 });
await input.type(keyword);
await input.focus();
await this.page.keyboard.press("Enter");
}
async next_page() {
// youtube needs scrolling
// TODO: implement scrolling, no priority right now
return false;
}
async next_page() {
// youtube needs scrolling
// TODO: implement scrolling, no priority right now
return false;
}
async wait_for_results() {
await this.page.waitForFunction(`document.title.indexOf('${this.keyword}') !== -1`, { timeout: 5000 });
await this.page.waitForSelector('ytd-video-renderer,ytd-grid-video-renderer', { timeout: 5000 });
await this.sleep(500);
}
async wait_for_results() {
await this.page.waitForFunction(`document.title.indexOf('${this.keyword}') !== -1`, { timeout: 5000 });
await this.page.waitForSelector('ytd-video-renderer,ytd-grid-video-renderer', { timeout: 5000 });
await this.sleep(500);
}
async detected() {
const title = await this.page.title();
let html = await this.page.content();
return html.indexOf('detected unusual traffic') !== -1 || title.indexOf('/sorry/') !== -1;
}
async detected() {
const title = await this.page.title();
let html = await this.page.content();
return html.indexOf('detected unusual traffic') !== -1 || title.indexOf('/sorry/') !== -1;
}
}
module.exports = {
YoutubeScraper: YoutubeScraper,
YoutubeScraper: YoutubeScraper,
};

View File

@ -1,5 +1,6 @@
const zlib = require('zlib');
var fs = require('fs');
var os = require("os");
const google = require('./modules/google.js');
const amazon = require('./modules/amazon.js');
const bing = require('./modules/bing.js');
@ -9,6 +10,9 @@ const youtube = require('./modules/youtube.js');
const ua = require('./modules/user_agents.js');
const duckduckgo = require('./modules/duckduckgo.js');
const tickersearch = require('./modules/ticker_search.js');
const { Cluster } = require('./puppeteer-cluster/dist/index.js');
const common = require('./modules/common.js');
var log = common.log;
function write_results(fname, data) {
fs.writeFileSync(fname, data, (err) => {
@ -17,6 +21,15 @@ function write_results(fname, data) {
});
}
function read_keywords_from_file(fname) {
let kws = fs.readFileSync(fname).toString().split(os.EOL);
// clean keywords
kws = kws.filter((kw) => {
return kw.trim().length > 0;
});
return kws;
}
function getScraper(searchEngine, args) {
return new {
google: google.GoogleScraper,
@ -39,31 +52,103 @@ function getScraper(searchEngine, args) {
}[searchEngine](args);
}
module.exports.handler = async function handler (event, context, callback) {
let config = event;
let pluggable = {};
if (config.custom_func) {
if (fs.existsSync(config.custom_func)) {
try {
Pluggable = require(config.custom_func);
pluggable = new Pluggable({config: config});
} catch (exception) {
console.error(exception);
class ScrapeManager {
constructor(config = {}) {
this.config = {
// the user agent to scrape with
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
// if random_user_agent is set to True, a random user agent is chosen
random_user_agent: false,
// whether to select manual settings in visible mode
set_manual_settings: false,
// log ip address data
log_ip_address: false,
// log http headers
log_http_headers: false,
// how long to sleep between requests. a random sleep interval within the range [a,b]
// is drawn before every request. empty string for no sleeping.
sleep_range: '',
// which search engine to scrape
search_engine: 'google',
compress: false, // compress
debug_level: 1, // 0 logs nothing, 1 logs most important stuff, ...., 4 logs everything
keywords: ['nodejs rocks',],
// whether to start the browser in headless mode
headless: true,
// specify flags passed to chrome here
chrome_flags: [],
// the number of pages to scrape for each keyword
num_pages: 1,
// path to output file, data will be stored in JSON
output_file: '',
// whether to prevent images, css, fonts and media from being loaded
// will speed up scraping a great deal
block_assets: true,
// path to js module that extends functionality
// this module should export the functions:
// get_browser, handle_metadata, close_browser
//custom_func: resolve('examples/pluggable.js'),
custom_func: '',
// path to a proxy file, one proxy per line. Example:
// socks5://78.94.172.42:1080
// http://118.174.233.10:48400
proxy_file: '',
proxies: [],
// check if headless chrome escapes common detection techniques
// this is a quick test and should be used for debugging
test_evasion: false,
apply_evasion_techniques: true,
// settings for puppeteer-cluster
puppeteer_cluster_config: {
timeout: 30 * 60 * 1000, // max timeout set to 30 minutes
monitor: false,
concurrency: Cluster.CONCURRENCY_BROWSER,
maxConcurrency: 1,
}
} else {
console.error(`File "${config.custom_func}" does not exist...`);
};
// overwrite default config
for (var key in config) {
this.config[key] = config[key];
}
this.config = parseEventData(this.config);
if (fs.existsSync(this.config.keyword_file)) {
this.config.keywords = read_keywords_from_file(this.config.keyword_file);
}
if (fs.existsSync(this.config.proxy_file)) {
this.config.proxies = read_keywords_from_file(this.config.proxy_file);
log(this.config, 1, `${this.config.proxies.length} proxies read from file.`);
}
log(this.config, 2, this.config);
this.cluster = null;
this.pluggable = null;
this.scraper = null;
}
try {
const startTime = Date.now();
config = parseEventData(config);
if (config.debug === true) {
console.log(config);
}
/*
* Launches the puppeteer cluster or browser.
*/
async start() {
if (config.keywords && config.search_engine) {
console.log(`[se-scraper] started at [${(new Date()).toUTCString()}] and scrapes ${config.search_engine} with ${config.keywords.length} keywords on ${config.num_pages} pages each.`);
if (this.config.custom_func) {
if (fs.existsSync(this.config.custom_func)) {
try {
const PluggableClass = require(this.config.custom_func);
this.pluggable = new PluggableClass({config: this.config});
} catch (exception) {
console.error(exception);
}
} else {
console.error(`File "${this.config.custom_func}" does not exist!`);
}
}
// See here: https://peter.sh/experiments/chromium-command-line-switches/
@ -82,17 +167,17 @@ module.exports.handler = async function handler (event, context, callback) {
'--disable-notifications',
];
if (Array.isArray(config.chrome_flags) && config.chrome_flags.length) {
chrome_flags = config.chrome_flags;
if (Array.isArray(this.config.chrome_flags) && this.config.chrome_flags.length) {
chrome_flags = this.config.chrome_flags;
}
var user_agent = null;
if (config.user_agent) {
user_agent = config.user_agent;
if (this.config.user_agent) {
user_agent = this.config.user_agent;
}
if (config.random_user_agent === true) {
if (this.config.random_user_agent === true) {
user_agent = ua.random_user_agent();
}
@ -102,101 +187,106 @@ module.exports.handler = async function handler (event, context, callback) {
)
}
if (config.proxy) {
if (this.config.proxy) {
chrome_flags.push(
'--proxy-server=' + config.proxy,
'--proxy-server=' + this.config.proxy,
)
}
let launch_args = {
args: chrome_flags,
headless: config.headless,
headless: this.config.headless,
ignoreHTTPSErrors: true,
};
if (config.debug === true) {
console.log('Using the following puppeteer configuration: ');
console.dir(launch_args);
}
var results = {};
var num_requests = 0;
var metadata = {};
if (pluggable.start_browser) {
launch_args.config = config;
let browser = await pluggable.start_browser(launch_args);
const page = await browser.newPage();
if (config.do_work && pluggable.do_work) {
let res = await pluggable.do_work(page);
results = res.results;
num_requests = res.num_requests;
} else {
let obj = getScraper(config.search_engine, {
config: config,
context: context,
pluggable: pluggable,
page: page,
});
results = await obj.run({});
num_requests = obj.num_requests;
metadata = obj.metadata;
}
if (pluggable.close_browser) {
await pluggable.close_browser();
} else {
await browser.close();
}
log(this.config, 2, `Using the following puppeteer configuration: ${launch_args}`);
if (this.pluggable) {
launch_args.config = this.config;
this.browser = await this.pluggable.start_browser(launch_args);
} else {
// if no custom start_browser functionality was given
// use puppeteer-cluster for scraping
const { Cluster } = require('./puppeteer-cluster/dist/index.js');
var numClusters = config.puppeteer_cluster_config.maxConcurrency;
this.numClusters = this.config.puppeteer_cluster_config.maxConcurrency;
var perBrowserOptions = [];
// if we have at least one proxy, always use CONCURRENCY_BROWSER
// and set maxConcurrency to config.proxies.length + 1
// else use whatever configuration was passed
if (config.proxies.length > 0) {
config.puppeteer_cluster_config.concurrency = Cluster.CONCURRENCY_BROWSER;
// and set maxConcurrency to this.config.proxies.length + 1
// else use whatever this.configuration was passed
if (this.config.proxies.length > 0) {
this.config.puppeteer_cluster_config.concurrency = Cluster.CONCURRENCY_BROWSER;
// because we use real browsers, we ran out of memory on normal laptops
// when using more than maybe 5 or 6 browsers.
// therfore hardcode a limit here
numClusters = Math.min(config.proxies.length + 1, 5);
config.puppeteer_cluster_config.maxConcurrency = numClusters;
this.numClusters = Math.min(this.config.proxies.length + 1, 5);
this.config.puppeteer_cluster_config.maxConcurrency = this.numClusters;
// the first browser config with home IP
// the first browser this.config with home IP
perBrowserOptions = [launch_args, ];
for (var proxy of config.proxies) {
for (var proxy of this.config.proxies) {
perBrowserOptions.push({
headless: config.headless,
headless: this.config.headless,
ignoreHTTPSErrors: true,
args: chrome_flags.concat(`--proxy-server=${proxy}`)
})
}
}
var cluster = await Cluster.launch({
monitor: config.puppeteer_cluster_config.monitor,
timeout: config.puppeteer_cluster_config.timeout, // max timeout set to 30 minutes
concurrency: config.puppeteer_cluster_config.concurrency,
maxConcurrency: config.puppeteer_cluster_config.maxConcurrency,
this.cluster = await Cluster.launch({
monitor: this.config.puppeteer_cluster_config.monitor,
timeout: this.config.puppeteer_cluster_config.timeout, // max timeout set to 30 minutes
concurrency: this.config.puppeteer_cluster_config.concurrency,
maxConcurrency: this.config.puppeteer_cluster_config.maxConcurrency,
puppeteerOptions: launch_args,
perBrowserOptions: perBrowserOptions,
});
cluster.on('taskerror', (err, data) => {
this.cluster.on('taskerror', (err, data) => {
console.log(`Error while scraping ${data}: ${err.message}`);
console.log(err)
});
}
}
/*
* Scrapes the keywords specified by the config.
*/
async scrape(scrape_config = {}) {
this.config.keywords = scrape_config.keywords;
this.config.num_pages = scrape_config.num_pages;
this.config.search_engine = scrape_config.search_engine;
var results = {};
var num_requests = 0;
var metadata = {};
var startTime = Date.now();
if (this.config.keywords && this.config.search_engine) {
log(this.config, 1, `[se-scraper] started at [${(new Date()).toUTCString()}] and scrapes ${this.config.search_engine} with ${this.config.keywords.length} keywords on ${this.config.num_pages} pages each.`)
}
if (this.config.do_work && this.pluggable) {
let res = await this.pluggable.do_work(page);
results = res.results;
num_requests = res.num_requests;
} else {
// const page = await this.browser.newPage();
// this.scraper = getScraper(this.config.search_engine, {
// config: this.config,
// context: context,
// pluggable: pluggable,
// page: page,
// });
// results = await this.scraper.run({});
// num_requests = this.scraper.num_requests;
// metadata = this.scraper.metadata;
// }
// Each browser will get N/(K+1) keywords and will issue N/(K+1) * M total requests to the search engine.
// https://github.com/GoogleChrome/puppeteer/issues/678
// The question is: Is it possible to set proxies per Page? Per Browser?
@ -205,29 +295,29 @@ module.exports.handler = async function handler (event, context, callback) {
// https://www.npmjs.com/package/proxy-chain
// this answer looks nice: https://github.com/GoogleChrome/puppeteer/issues/678#issuecomment-389096077
let chunks = [];
for (var n = 0; n < numClusters; n++) {
for (var n = 0; n < this.numClusters; n++) {
chunks.push([]);
}
for (var k = 0; k < config.keywords.length; k++) {
chunks[k%numClusters].push(config.keywords[k]);
for (var k = 0; k < this.config.keywords.length; k++) {
chunks[k % this.numClusters].push(this.config.keywords[k]);
}
let execPromises = [];
let scraperInstances = [];
for (var c = 0; c < chunks.length; c++) {
config.keywords = chunks[c];
// the first scraping config uses the home IP
this.config.keywords = chunks[c];
// the first scraping this.config uses the home IP
if (c > 0) {
config.proxy = config.proxies[c-1];
this.config.proxy = this.config.proxies[c - 1];
}
var obj = getScraper(config.search_engine, {
config: config,
context: context,
pluggable: pluggable,
var obj = getScraper(this.config.search_engine, {
config: this.config,
context: {},
pluggable: this.pluggable,
});
var boundMethod = obj.run.bind(obj);
execPromises.push(cluster.execute({}, boundMethod));
execPromises.push(this.cluster.execute({}, boundMethod));
scraperInstances.push(obj);
}
@ -239,9 +329,6 @@ module.exports.handler = async function handler (event, context, callback) {
}
}
await cluster.idle();
await cluster.close();
// count total requests among all scraper instances
for (var o of scraperInstances) {
num_requests += o.num_requests;
@ -251,28 +338,26 @@ module.exports.handler = async function handler (event, context, callback) {
let timeDelta = Date.now() - startTime;
let ms_per_request = timeDelta/num_requests;
if (config.verbose === true) {
console.log(`Scraper took ${timeDelta}ms to perform ${num_requests} requests.`);
console.log(`On average ms/request: ${ms_per_request}ms/request`);
}
log(this.config, 1, `Scraper took ${timeDelta}ms to perform ${num_requests} requests.`);
log(this.config, 1, `On average ms/request: ${ms_per_request}ms/request`);
if (config.compress === true) {
if (this.config.compress === true) {
results = JSON.stringify(results);
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding
results = zlib.deflateSync(results).toString('base64');
}
if (pluggable.handle_results) {
await pluggable.handle_results({
config: config,
if (this.pluggable && this.pluggable.handle_results) {
await this.pluggable.handle_results({
config: this.config,
results: results,
});
}
if (config.chunk_lines) {
metadata.chunk_lines = config.chunk_lines;
if (config.job_name) {
metadata.id = `${config.job_name} ${config.chunk_lines}`;
if (this.config.chunk_lines) {
metadata.chunk_lines = this.config.chunk_lines;
if (this.config.job_name) {
metadata.id = `${this.config.job_name} ${this.config.chunk_lines}`;
}
}
@ -280,33 +365,39 @@ module.exports.handler = async function handler (event, context, callback) {
metadata.ms_per_keyword = ms_per_request.toString();
metadata.num_requests = num_requests;
if (config.verbose === true) {
console.log(metadata);
log(this.config, 2, metadata);
if (this.pluggable && this.pluggable.handle_metadata) {
await this.pluggable.handle_metadata({metadata: metadata, config: this.config});
}
if (pluggable.handle_metadata) {
await pluggable.handle_metadata({metadata: metadata, config: config});
if (this.config.output_file) {
write_results(this.config.output_file, JSON.stringify(results, null, 4));
}
if (config.output_file) {
write_results(config.output_file, JSON.stringify(results, null, 4));
}
let response = {
headers: {
'Content-Type': 'text/json',
},
results: results,
metadata: metadata || {},
statusCode: 200
return {
headers: {
'Content-Type': 'text/json',
},
results: results,
metadata: metadata || {},
statusCode: 200
};
callback(null, response);
} catch (e) {
callback(e, null);
}
};
/*
* Quits the puppeteer cluster/browser.
*/
async quit() {
if (this.pluggable && this.pluggable.close_browser) {
await this.pluggable.close_browser();
} else {
await this.cluster.idle();
await this.cluster.close();
}
}
}
function parseEventData(config) {
@ -319,7 +410,7 @@ function parseEventData(config) {
}
}
const booleans = ['debug', 'verbose', 'upload_to_s3', 'log_ip_address', 'log_http_headers', 'random_user_agent',
const booleans = ['upload_to_s3', 'log_ip_address', 'log_http_headers', 'random_user_agent',
'compress', 'is_local', 'max_results', 'set_manual_settings', 'block_assets', 'test_evasion', 'do_work', 'apply_evasion_techniques'];
for (b of booleans) {
@ -336,4 +427,10 @@ function parseEventData(config) {
}
return config;
}
}
module.exports = {
ScrapeManager: ScrapeManager,
};

View File

@ -10,83 +10,79 @@ const normal_search_keywords = ['iphone', 'clock'];
async function normal_search_test() {
let config = {
search_engine: 'amazon',
compress: false,
debug: false,
verbose: false,
keywords: normal_search_keywords,
keyword_file: '',
num_pages: 1,
headless: true,
output_file: '',
block_assets: true,
random_user_agent: false,
};
let scrape_config = {
search_engine: 'amazon',
num_pages: 1,
keywords: normal_search_keywords,
};
console.log('normal_search_test()');
await se_scraper.scrape(config, normal_search_test_case);
normal_search_test_case( await se_scraper.scrape(config, scrape_config) );
}
// we test with a callback function to our handler
function normal_search_test_case(err, response) {
function normal_search_test_case(response) {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
assert.equal(response.metadata.num_requests, 2);
if (err) {
console.error(err);
} else {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
assert.equal(response.metadata.num_requests, 2);
for (let query in response.results) {
let total_rank = 1;
assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.');
for (let query in response.results) {
let total_rank = 1;
assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
for (let res of obj.results) {
for (let res of obj.results) {
assert.containsAllKeys(res, ['link', 'title', 'rank', 'image', 'seller', 'stars', 'num_reviews', 'price', 'oldprice'], 'not all keys are in the SERP object');
assert.containsAllKeys(res, ['link', 'title', 'rank', 'image', 'seller', 'stars', 'num_reviews', 'price', 'oldprice'], 'not all keys are in the SERP object');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars');
assert.isOk(res.seller, 'seller must be ok');
assert.typeOf(res.seller, 'string', 'seller must be string');
assert.isAtLeast(res.seller.length, 5, 'seller must have at least 10 chars');
assert.isOk(res.seller, 'seller must be ok');
assert.typeOf(res.seller, 'string', 'seller must be string');
assert.isAtLeast(res.seller.length, 5, 'seller must have at least 10 chars');
assert.isOk(res.stars, 'stars be ok');
assert.typeOf(res.stars, 'string', 'stars must be string');
assert.isAtLeast(res.stars.length, 5, 'stars must have at least 6 chars');
assert.include(res.stars, ' out of ', 'stars must include " out of "');
assert.isOk(res.stars, 'stars be ok');
assert.typeOf(res.stars, 'string', 'stars must be string');
assert.isAtLeast(res.stars.length, 5, 'stars must have at least 6 chars');
assert.include(res.stars, ' out of ', 'stars must include " out of "');
assert.isOk(res.num_reviews, 'num_reviews be ok');
assert.typeOf(res.num_reviews, 'string', 'num_reviews must be string');
assert.isAtLeast(res.num_reviews.length, 1, 'num_reviews must have at least 1 chars');
assert.isOk(res.num_reviews, 'num_reviews be ok');
assert.typeOf(res.num_reviews, 'string', 'num_reviews must be string');
assert.isAtLeast(res.num_reviews.length, 1, 'num_reviews must have at least 1 chars');
assert.isOk(res.price, 'price be ok');
assert.typeOf(res.price, 'string', 'price must be string');
assert.isAtLeast(res.price.length, 5, 'price must have at least 5 chars');
assert.isOk(res.price, 'price be ok');
assert.typeOf(res.price, 'string', 'price must be string');
assert.isAtLeast(res.price.length, 5, 'price must have at least 5 chars');
assert.isNumber(res.rank, 'rank must be integer');
assert.equal(res.rank, total_rank++, 'rank ist wrong');
}
assert.isNumber(res.rank, 'rank must be integer');
assert.equal(res.rank, total_rank++, 'rank ist wrong');
}
}
}
@ -96,49 +92,48 @@ const keywords_no_results = ['2342kljp;fj9834u40abJ54634344023safkl34a44dsflkjaQ
async function no_results_test() {
let config = {
search_engine: 'amazon',
compress: false,
debug: false,
verbose: false,
keywords: keywords_no_results,
debug_level: 1,
keyword_file: '',
num_pages: 1,
headless: true,
output_file: '',
block_assets: true,
random_user_agent: false,
};
let scrape_config = {
search_engine: 'amazon',
num_pages: 1,
keywords: keywords_no_results,
};
console.log('no_results_test()');
await se_scraper.scrape(config, test_case_no_results);
test_case_no_results( await se_scraper.scrape(config, scrape_config) );
}
// we test with a callback function to our handler
function test_case_no_results(err, response) {
if (err) {
console.error(err);
} else {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
assert.equal(response.metadata.num_requests, 1);
function test_case_no_results(response) {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
assert.equal(response.metadata.num_requests, 1);
results = response.results;
for (let query in response.results) {
results = response.results;
for (let query in response.results) {
assert.containsAllKeys(response.results, keywords_no_results, 'not all keywords were scraped.');
assert.containsAllKeys(response.results, keywords_no_results, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
assert(obj.results.length === 0, 'results must have 0 SERP objects');
assert.equal(obj.no_results, true, 'no results should be true');
assert.isEmpty(obj.num_results, 'no results should be a empty string');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
}
assert(obj.results.length === 0, 'results must have 0 SERP objects');
assert.equal(obj.no_results, true, 'no results should be true');
assert.isEmpty(obj.num_results, 'no results should be a empty string');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
}
}
}

View File

@ -10,76 +10,73 @@ const normal_search_keywords = ['mouse', 'cat'];
async function normal_search_test() {
let config = {
search_engine: 'baidu',
compress: false,
debug: false,
verbose: false,
keywords: normal_search_keywords,
debug_level: 1,
keyword_file: '',
num_pages: 2,
headless: true,
block_assets: true,
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
};
let scrape_config = {
search_engine: 'baidu',
keywords: normal_search_keywords,
num_pages: 2,
};
console.log('normal_search_test()');
await se_scraper.scrape(config, normal_search_test_case);
normal_search_test_case( await se_scraper.scrape(config, scrape_config) );
}
// we test with a callback function to our handler
function normal_search_test_case(err, response) {
function normal_search_test_case(response) {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
assert.equal(response.metadata.num_requests, 4);
if (err) {
console.error(err);
} else {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
assert.equal(response.metadata.num_requests, 4);
for (let query in response.results) {
let total_rank = 1;
for (let query in response.results) {
let total_rank = 1;
assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.');
assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'num_results', 'no_results'], 'not all keys are in the object');
assert.containsAllKeys(obj, ['results', 'time', 'num_results', 'no_results'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.equal(obj.no_results, false, 'no results should be false');
assert.equal(obj.no_results, false, 'no results should be false');
for (let res of obj.results) {
for (let res of obj.results) {
assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link', 'snippet'], 'not all keys are in the SERP object');
assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link', 'snippet'], 'not all keys are in the SERP object');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'visible_link must be ok');
assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.visible_link, 'visible_link must be ok');
assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.isNumber(res.rank, 'rank must be integer');
assert.equal(res.rank, total_rank++, 'rank ist wrong');
}
assert.isNumber(res.rank, 'rank must be integer');
assert.equal(res.rank, total_rank++, 'rank ist wrong');
}
}
}

View File

@ -12,8 +12,7 @@ async function normal_search_test() {
let config = {
search_engine: 'bing',
compress: false,
debug: false,
verbose: false,
debug_level: 1,
keywords: normal_search_keywords,
keyword_file: '',
num_pages: 3,
@ -24,61 +23,62 @@ async function normal_search_test() {
random_user_agent: false,
};
let scrape_config = {
search_engine: 'bing',
keywords: normal_search_keywords,
num_pages: 3,
};
console.log('normal_search_test()');
await se_scraper.scrape(config, normal_search_test_case);
normal_search_test_case( await se_scraper.scrape(config, scrape_config) );
}
// we test with a callback function to our handler
function normal_search_test_case(err, response) {
function normal_search_test_case(response) {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
assert.equal(response.metadata.num_requests, 6);
if (err) {
console.error(err);
} else {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
assert.equal(response.metadata.num_requests, 6);
for (let query in response.results) {
let total_rank = 1;
assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.');
for (let query in response.results) {
let total_rank = 1;
assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
for (let res of obj.results) {
for (let res of obj.results) {
assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link', 'rank'], 'not all keys are in the SERP object');
assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link', 'rank'], 'not all keys are in the SERP object');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'visible_link must be ok');
assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.visible_link, 'visible_link must be ok');
assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.isNumber(res.rank, 'rank must be integer');
assert.equal(res.rank, total_rank++, 'rank ist wrong');
}
assert.isNumber(res.rank, 'rank must be integer');
assert.equal(res.rank, total_rank++, 'rank ist wrong');
}
}
}
@ -90,8 +90,7 @@ async function no_results_test() {
let config = {
search_engine: 'bing',
compress: false,
debug: false,
verbose: false,
debug_level: 1,
keywords: keywords_no_results,
keyword_file: '',
num_pages: 1,
@ -101,37 +100,40 @@ async function no_results_test() {
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
random_user_agent: false,
};
let scrape_config = {
search_engine: 'bing',
keywords: keywords_no_results,
num_pages: 1,
};
console.log('no_results_test()');
await se_scraper.scrape(config, test_case_no_results);
test_case_no_results( await se_scraper.scrape(config, scrape_config) );
}
// we test with a callback function to our handler
function test_case_no_results(err, response) {
if (err) {
console.error(err);
} else {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
assert.equal(response.metadata.num_requests, 1);
function test_case_no_results(response) {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
assert.equal(response.metadata.num_requests, 1);
results = response.results;
for (let query in response.results) {
results = response.results;
for (let query in response.results) {
assert.containsAllKeys(response.results, keywords_no_results, 'not all keywords were scraped.');
assert.containsAllKeys(response.results, keywords_no_results, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
assert(obj.results.length === 0, 'results must have 0 SERP objects');
assert.equal(obj.no_results, true, 'no results should be true');
assert.isEmpty(obj.num_results, 'no results should be a empty string');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
}
assert(obj.results.length === 0, 'results must have 0 SERP objects');
assert.equal(obj.no_results, true, 'no results should be true');
assert.isEmpty(obj.num_results, 'no results should be a empty string');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
}
}
}
@ -140,58 +142,55 @@ const effective_query_keywords = ['mount everrest'];
async function effective_query_test() {
let config = {
search_engine: 'bing',
compress: false,
debug: false,
verbose: false,
keywords: effective_query_keywords,
debug_level: 1,
keyword_file: '',
num_pages: 1,
headless: true,
output_file: '',
block_assets: true,
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
random_user_agent: false,
};
let scrape_config = {
search_engine: 'bing',
keywords: effective_query_keywords,
num_pages: 1,
};
console.log('effective_query_test()');
await se_scraper.scrape(config, test_case_effective_query);
test_case_effective_query( await se_scraper.scrape(config, scrape_config) );
}
// we test with a callback function to our handler
function test_case_effective_query(err, response) {
function test_case_effective_query(response) {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
assert.equal(response.metadata.num_requests, 1);
if (err) {
console.error(err);
} else {
results = response.results;
for (let query in response.results) {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
assert.equal(response.metadata.num_requests, 1);
assert.containsAllKeys(response.results, effective_query_keywords, 'not all keywords were scraped.');
results = response.results;
for (let query in response.results) {
for (let page_number in response.results[query]) {
assert.containsAllKeys(response.results, effective_query_keywords, 'not all keywords were scraped.');
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
for (let page_number in response.results[query]) {
let obj = response.results[query][page_number];
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
let obj = response.results[query][page_number];
// effective query must be different to the original keyword
assert.isOk(obj.effective_query, 'effective query must be ok');
assert.isNotEmpty(obj.effective_query, 'effective query must be valid');
assert(obj.effective_query !== query, 'effective query must be different from keyword');
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
// effective query must be different to the original keyword
assert.isOk(obj.effective_query, 'effective query must be ok');
assert.isNotEmpty(obj.effective_query, 'effective query must be valid');
assert(obj.effective_query !== query, 'effective query must be different from keyword');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
}
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
}
}
}

View File

@ -10,13 +10,9 @@ const normal_search_keywords = ['apple tree', 'weather tomorrow'];
async function normal_search_test() {
let config = {
search_engine: 'duckduckgo',
compress: false,
debug: false,
verbose: false,
keywords: normal_search_keywords,
debug_level: 1,
keyword_file: '',
num_pages: 2,
headless: true,
output_file: '',
block_assets: true,
@ -24,59 +20,60 @@ async function normal_search_test() {
random_user_agent: false,
};
let scrape_config = {
search_engine: 'duckduckgo',
keywords: normal_search_keywords,
num_pages: 2,
};
console.log('normal_search_test()');
await se_scraper.scrape(config, normal_search_test_case);
normal_search_test_case( await se_scraper.scrape(config, scrape_config) );
}
// we test with a callback function to our handler
function normal_search_test_case(err, response) {
function normal_search_test_case(response) {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
assert.equal(response.metadata.num_requests, 2);
if (err) {
console.error(err);
} else {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
assert.equal(response.metadata.num_requests, 4);
for (let query in response.results) {
let total_rank = 1;
for (let query in response.results) {
let total_rank = 1;
assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.');
assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'effective_query'], 'not all keys are in the object');
assert.containsAllKeys(obj, ['results', 'time', 'effective_query'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
for (let res of obj.results) {
for (let res of obj.results) {
assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link', 'snippet'], 'not all keys are in the SERP object');
assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link', 'snippet'], 'not all keys are in the SERP object');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'visible_link must be ok');
assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.visible_link, 'visible_link must be ok');
assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.isNumber(res.rank, 'rank must be integer');
assert.equal(res.rank, total_rank++, 'rank ist wrong');
}
assert.isNumber(res.rank, 'rank must be integer');
assert.equal(res.rank, total_rank++, 'rank ist wrong');
}
}
}
@ -86,55 +83,52 @@ const effective_query_keywords = ['mount everrest'];
async function effective_query_test() {
let config = {
search_engine: 'duckduckgo',
compress: false,
debug: false,
verbose: false,
keywords: effective_query_keywords,
debug_level: 1,
keyword_file: '',
num_pages: 1,
headless: true,
output_file: '',
block_assets: true,
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
random_user_agent: false,
};
console.log('effective_query_test()');
await se_scraper.scrape(config, test_case_effective_query);
let scrape_config = {
search_engine: 'duckduckgo',
keywords: effective_query_keywords,
num_pages: 1,
};
console.log('test_case_effective_query()');
test_case_effective_query( await se_scraper.scrape(config, scrape_config) );
}
// we test with a callback function to our handler
function test_case_effective_query(err, response) {
function test_case_effective_query(response) {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
assert.equal(response.metadata.num_requests, 1);
if (err) {
console.error(err);
} else {
results = response.results;
for (let query in response.results) {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
assert.equal(response.metadata.num_requests, 1);
assert.containsAllKeys(response.results, effective_query_keywords, 'not all keywords were scraped.');
results = response.results;
for (let query in response.results) {
for (let page_number in response.results[query]) {
assert.containsAllKeys(response.results, effective_query_keywords, 'not all keywords were scraped.');
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
for (let page_number in response.results[query]) {
let obj = response.results[query][page_number];
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
assert.containsAllKeys(obj, ['results', 'time', 'effective_query'], 'not all keys are in the object');
let obj = response.results[query][page_number];
// effective query must be different to the original keyword
assert.isOk(obj.effective_query, 'effective query must be ok');
assert.isNotEmpty(obj.effective_query, 'effective query must be valid');
assert(obj.effective_query !== query, 'effective query must be different from keyword');
assert.containsAllKeys(obj, ['results', 'time', 'effective_query'], 'not all keys are in the object');
// effective query must be different to the original keyword
assert.isOk(obj.effective_query, 'effective query must be ok');
assert.isNotEmpty(obj.effective_query, 'effective query must be valid');
assert(obj.effective_query !== query, 'effective query must be different from keyword');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
}
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
}
}
}

View File

@ -10,13 +10,9 @@ const normal_search_keywords = ['apple tree', 'weather tomorrow'];
async function normal_search_test() {
let config = {
search_engine: 'google',
compress: false,
debug: false,
verbose: false,
keywords: normal_search_keywords,
debug_level: 1,
keyword_file: '',
num_pages: 3,
headless: true,
output_file: '',
block_assets: true,
@ -24,62 +20,63 @@ async function normal_search_test() {
random_user_agent: false,
};
let scrape_config = {
search_engine: 'google',
keywords: normal_search_keywords,
num_pages: 3,
};
console.log('normal_search_test()');
await se_scraper.scrape(config, normal_search_test_case);
normal_search_test_case( await se_scraper.scrape(config, scrape_config) );
}
// we test with a callback function to our handler
function normal_search_test_case(err, response) {
function normal_search_test_case(response) {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
assert.equal(response.metadata.num_requests, 6);
if (err) {
console.error(err);
} else {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
assert.equal(response.metadata.num_requests, 6);
for (let query in response.results) {
let total_rank = 1;
for (let query in response.results) {
let total_rank = 1;
assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.');
assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 8 SERP objects');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
assert.isAtLeast(obj.results.length, 8, 'results must have at least 8 SERP objects');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
for (let res of obj.results) {
for (let res of obj.results) {
assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link'], 'not all keys are in the SERP object');
assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link'], 'not all keys are in the SERP object');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'visible_link must be ok');
assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.visible_link, 'visible_link must be ok');
assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.isNumber(res.rank, 'rank must be integer');
assert.equal(res.rank, total_rank++, 'rank ist wrong');
}
assert.isNumber(res.rank, 'rank must be integer');
assert.equal(res.rank, total_rank++, 'rank ist wrong');
}
}
}
@ -89,50 +86,49 @@ const keywords_no_results = ['fgskl34440abJAksafkl34a44dsflkjaQQuBBdfk',];
async function no_results_test() {
let config = {
search_engine: 'google',
compress: false,
debug: false,
verbose: false,
keywords: keywords_no_results,
debug_level: 1,
keyword_file: '',
num_pages: 1,
headless: true,
output_file: '',
block_assets: true,
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
random_user_agent: false,
};
let scrape_config = {
search_engine: 'google',
keywords: keywords_no_results,
num_pages: 1,
};
console.log('no_results_test()');
await se_scraper.scrape(config, test_case_no_results);
test_case_no_results( await se_scraper.scrape(config, scrape_config) );
}
// we test with a callback function to our handler
function test_case_no_results(err, response) {
if (err) {
console.error(err);
} else {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
assert.equal(response.metadata.num_requests, 1);
function test_case_no_results(response) {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
assert.equal(response.metadata.num_requests, 1);
results = response.results;
for (let query in response.results) {
results = response.results;
for (let query in response.results) {
assert.containsAllKeys(response.results, keywords_no_results, 'not all keywords were scraped.');
assert.containsAllKeys(response.results, keywords_no_results, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
assert(obj.results.length === 0, 'results must have 0 SERP objects');
assert.equal(obj.no_results, true, 'no results should be true');
assert.isEmpty(obj.num_results, 'no results should be a empty string');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
}
assert(obj.results.length === 0, 'results must have 0 SERP objects');
assert.equal(obj.no_results, true, 'no results should be true');
assert.isEmpty(obj.num_results, 'no results should be a empty string');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
}
}
}
@ -141,58 +137,55 @@ const effective_query_keywords = ['mount evverrest'];
async function effective_query_test() {
let config = {
search_engine: 'google',
compress: false,
debug: false,
verbose: false,
keywords: effective_query_keywords,
debug_level: 1,
keyword_file: '',
num_pages: 1,
headless: true,
output_file: '',
block_assets: true,
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
random_user_agent: false,
};
let scrape_config = {
search_engine: 'google',
keywords: effective_query_keywords,
num_pages: 1,
};
console.log('effective_query_test()');
await se_scraper.scrape(config, test_case_effective_query);
test_case_effective_query( await se_scraper.scrape(config, scrape_config) );
}
// we test with a callback function to our handler
function test_case_effective_query(err, response) {
function test_case_effective_query(response) {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
assert.equal(response.metadata.num_requests, 1);
if (err) {
console.error(err);
} else {
results = response.results;
for (let query in response.results) {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
assert.equal(response.metadata.num_requests, 1);
assert.containsAllKeys(response.results, effective_query_keywords, 'not all keywords were scraped.');
results = response.results;
for (let query in response.results) {
for (let page_number in response.results[query]) {
assert.containsAllKeys(response.results, effective_query_keywords, 'not all keywords were scraped.');
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
for (let page_number in response.results[query]) {
let obj = response.results[query][page_number];
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
let obj = response.results[query][page_number];
// effective query must be different to the original keyword
assert.isOk(obj.effective_query, 'effective query must be ok');
assert.isNotEmpty(obj.effective_query, 'effective query must be valid');
assert(obj.effective_query !== query, 'effective query must be different from keyword');
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
// effective query must be different to the original keyword
assert.isOk(obj.effective_query, 'effective query must be ok');
assert.isNotEmpty(obj.effective_query, 'effective query must be valid');
assert(obj.effective_query !== query, 'effective query must be different from keyword');
assert.isAtLeast(obj.results.length, 8, 'results must have at least 8 SERP objects');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
}
assert.isAtLeast(obj.results.length, 7, 'results must have at least 8 SERP objects');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
}
}
}