test: remove legacy tests

This commit is contained in:
HugoPoi 2020-01-07 09:53:59 +01:00
parent 3ab8e46126
commit f192e4ebb4
18 changed files with 1 additions and 2878 deletions

View File

@ -5,7 +5,7 @@
"homepage": "https://scrapeulous.com/",
"main": "index.js",
"scripts": {
"test": "mocha test/static_tests/"
"test": "mocha test test/modules"
},
"keywords": [
"scraping",

View File

@ -1,66 +0,0 @@
/**
Test server with:
curl -XPOST http://0.0.0.0:3000 -H 'Content-Type: application/json' \
-d '{
"browser_config": {
"random_user_agent": true
},
"scrape_config": {
"search_engine": "google",
"keywords": ["test"],
"num_pages": 1
}
}'
*/
const se_scraper = require('../index.js');
'use strict';
const express = require('express');
// Constants
const PORT = process.env.PORT || 3000;
const HOST = process.env.HOST || '0.0.0.0';
// App
const app = express();
app.use(express.json());
let browser_config = {
random_user_agent: true,
headless : true,
debug_level: 1,
sleep_range: '',
puppeteer_cluster_config: {
timeout: 30 * 60 * 1000, // max timeout set to 30 minutes
monitor: false,
concurrency: 1, // one scraper per tab
maxConcurrency: 1, // scrape with 5 tabs
}
};
app.post('/', async (req, res) => {
if (!req.body.browser_config || !req.body.scrape_config) {
res.json({
'status': 400,
'msg': 'please specify browser_config and scrape_config'
});
} else {
// overwrite standard browser config
Object.assign(browser_config, req.body.browser_config);
var scraper = new se_scraper.ScrapeManager(browser_config);
await scraper.start();
var results = await scraper.scrape(req.body.scrape_config);
// console.dir(results, {depth: null, colors: true});
await scraper.quit();
res.send(results);
}
});
app.listen(PORT, HOST);
console.log(`Running on http://${HOST}:${PORT}`);

View File

@ -1,15 +0,0 @@
## Test with static HTML
Dynamic testing of se-scraper takes too much time.
Save some html and initialize se-scraper by loading the search from disk.
### Disadvantage
static html gets outdated after some time
### Advantages
1. Let's us test corner cases that are missed easily
2. Testing is not reliable, since search engines do not always return the same results for the same query
3. As said, much faster

View File

@ -1,222 +0,0 @@
const se_scraper = require('./../../index.js');
const chai = require('chai');
chai.use(require('chai-string'));
const assert = chai.assert;
const path = require('path');
async function bing_ads() {
let config = {
compress: false,
debug_level: 1,
headless: true,
};
let scrape_config = {
search_engine: 'bing',
keywords: ['kaffeemaschine kaufen'],
num_pages: 1,
scrape_from_file: 'file://' + path.join(__dirname, './html/bing.html'),
};
var scraper = new se_scraper.ScrapeManager(config);
await scraper.start();
bing_search_with_ads( await scraper.scrape(scrape_config) );
scrape_config.keywords = ['best cloud services'];
scrape_config.scrape_from_file = 'file://' + path.join(__dirname, './html/bing2.html');
bing_search_with_ads2( await scraper.scrape(scrape_config) );
scrape_config.keywords = ['car tires cheap'];
scrape_config.scrape_from_file = 'file://' + path.join(__dirname, './html/bing3.html');
bing_search_with_ads3( await scraper.scrape(scrape_config) );
scrape_config.keywords = ['service auto garage'];
scrape_config.scrape_from_file = 'file://' + path.join(__dirname, './html/bing4.html');
bing_search_with_ads4( await scraper.scrape(scrape_config) );
await scraper.quit();
}
// we test with a callback function to our handler
function bing_search_with_ads(response) {
assert.equal(response.metadata.num_requests, 1);
for (let query in response.results) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.include(obj.num_results, '1100000', 'num results not included');
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query', 'ads'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 6, 'results must have at least 6 SERP objects');
assert.isAtLeast(obj.ads.length, 7, 'there are 7 ads');
assert.isAtLeast(obj.right_side_ads.length, 5, 'there are 5 ads');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
confirm_results_ok(obj);
}
}
}
function bing_search_with_ads2(response) {
assert.equal(response.metadata.num_requests, 1);
for (let query in response.results) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.include(obj.num_results, '44300000', 'num results not included');
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query', 'ads'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 6, 'results must have at least 6 SERP objects');
assert.isAtLeast(obj.ads.length, 7, 'there are 7 ads');
assert.isAtLeast(obj.right_side_ads.length, 5, 'there are 5 ads');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
confirm_results_ok(obj);
}
}
}
function bing_search_with_ads3(response) {
assert.equal(response.metadata.num_requests, 1);
for (let query in response.results) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.include(obj.num_results, '65.500.000 Results', 'num results not included');
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query', 'ads'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 10, 'results must have at least 10 SERP objects');
assert.isAtLeast(obj.ads.length, 3, 'there are 3 ads');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
confirm_results_ok(obj);
}
}
}
function bing_search_with_ads4(response) {
assert.equal(response.metadata.num_requests, 1);
for (let query in response.results) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.include(obj.num_results, '4.200.000 Ergebnisse', 'num results not included');
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query', 'ads'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 9, 'results must have at least 9 SERP objects');
assert.isAtLeast(obj.ads.length, 3, 'there are 3 ads');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
confirm_results_ok(obj);
}
}
}
function confirm_results_ok(obj) {
for (let res of obj.results) {
assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link', 'snippet'], 'not all keys are in the SERP object');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'visible_link must be ok');
assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 8, 'title must have at least 8 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.isNumber(res.rank, 'rank must be integer');
}
for (let res of obj.ads) {
assert.isOk(res.tracking_link, 'link must be ok');
assert.typeOf(res.tracking_link, 'string', 'link must be string');
assert.isAtLeast(res.tracking_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'link must be ok');
assert.typeOf(res.visible_link, 'string', 'link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 8, 'title must have at least 8 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
}
for (let res of obj.right_side_ads) {
assert.isOk(res.tracking_link, 'link must be ok');
assert.typeOf(res.tracking_link, 'string', 'link must be string');
assert.isAtLeast(res.tracking_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'link must be ok');
assert.typeOf(res.visible_link, 'string', 'link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 8, 'title must have at least 8 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
}
}
describe('Bing', function(){
this.timeout(15000);
it('static bing searches with ads', bing_ads);
});

View File

@ -1,173 +0,0 @@
const se_scraper = require('./../../index.js');
const chai = require('chai');
chai.use(require('chai-string'));
const assert = chai.assert;
const path = require('path');
const cheerio = require('cheerio');
async function test_html_output() {
let config = {
debug_level: 1,
headless: true,
html_output: true,
// whether to strip JS and CSS from the html_output
// has only an effect if `html_output` is true
clean_html_output: true,
// remove all data images from the html
clean_data_images: true,
// test compression
compress: false,
};
let scrape_config = {
search_engine: 'bing',
keywords: ['kaffeemaschine kaufen'],
num_pages: 1,
scrape_from_file: 'file://' + path.join(__dirname, './html/bing.html'),
};
var scraper = new se_scraper.ScrapeManager(config);
await scraper.start();
var response = await scraper.scrape(scrape_config);
scrape_config.clean_html_output = false;
scrape_config.clean_data_images = false;
var response_no_cleaned = await scraper.scrape(scrape_config);
test(response, response_no_cleaned, 'bing');
scrape_config.search_engine = 'google';
scrape_config.keywords = ['rückspiegel schwarz'];
scrape_config.scrape_from_file = 'file://' + path.join(__dirname, './html/google.html');
scrape_config.clean_html_output = true;
scrape_config.clean_data_images = true;
var responseGoogle = await scraper.scrape(scrape_config);
scrape_config.clean_html_output = false;
scrape_config.clean_data_images = false;
var response_no_cleanedGoogle = await scraper.scrape(scrape_config);
test(responseGoogle, response_no_cleanedGoogle, 'google');
scrape_config.keywords = ['cloud services'];
scrape_config.scrape_from_file = 'file://' + path.join(__dirname, './html/googleLarge.html');
scrape_config.clean_html_output = true;
scrape_config.clean_data_images = true;
var responseGoogle = await scraper.scrape(scrape_config);
scrape_config.clean_html_output = false;
scrape_config.clean_data_images = false;
var response_no_cleanedGoogle = await scraper.scrape(scrape_config);
test(responseGoogle, response_no_cleanedGoogle, 'google');
await scraper.quit();
}
function test(response, response_no_cleaned, se='google') {
for (let query in response.results) {
for (let page_number in response.results[query]) {
let obj = response.results[query][page_number];
let obj_no_cleaned = response_no_cleaned.results[query][page_number];
console.log('html length of no cleaned SERP: ' + obj_no_cleaned.html.length);
console.log('html length of cleaned SERP: ' + obj.html.length);
assert.isOk(obj.html, 'Html must be ok!');
assert.isAtLeast(obj.html.length, 100, 'html must be a length string');
assert.isOk(obj_no_cleaned.html, 'Html must be ok!');
assert.isAtLeast(obj_no_cleaned.html.length, 100, 'html must be a length string');
assert.isBelow(obj.html.length, obj_no_cleaned.html.length, 'cleaned html must be smaller');
// test that we can parse the html of both the cleaned and no cleaned versions
// with cheerio and that serp results are roughly the same
const cleaned$ = cheerio.load(obj.html);
const no_cleaned$ = cheerio.load(obj_no_cleaned.html);
var resCleaned = parseResults(cleaned$, se);
var resNoCleaned = parseResults(no_cleaned$, se);
assert.equal(resCleaned.length, resNoCleaned.length);
assert.equal(resCleaned.length, obj.results.length);
assert.equal(resNoCleaned.length, obj.results.length);
// unset the rank
resCleaned = resCleaned.map((el) => el.rank = undefined);
resNoCleaned = resNoCleaned.map((el) => el.rank = undefined);
obj.results = obj.results.map((el) => el.rank = undefined);
assert.deepEqual(resCleaned, resNoCleaned, 'parsed results should be equal, even if html is cleaned');
assert.deepEqual(resCleaned, obj.results, 'parsed results from cleaned html should be equal to se-scraper results');
assert.deepEqual(resNoCleaned, obj.results, 'parsed results from non-cleaned html should be equal to se-scraper results');
}
}
}
function parseResults(s$, se) {
var results = [];
if (se === 'google') {
s$('#center_col .g').each((i, link) => {
results.push({
link: s$(link).find('.r a').attr('href'),
title: s$(link).find('.r a').text(),
snippet: s$(link).find('span.st').text(),
visible_link: s$(link).find('.r cite').text(),
date: s$(link).find('span.f').text() || '',
})
});
} else if (se === 'bing') {
s$('#b_content #b_results .b_algo').each((i, link) => {
results.push({
link: s$(link).find('h2 a').attr('href'),
title: s$(link).find('h2').text(),
snippet: s$(link).find('.b_caption p').text(),
visible_link: s$(link).find('cite').text(),
})
});
} else {
throw "no such search engine";
}
results = clean_results(results, ['title', 'link', 'snippet']);
return results;
}
function clean_results(results, attributes) {
const cleaned = [];
var rank = 1;
for (var res of results) {
let goodboy = true;
for (var attr of attributes) {
if (!res[attr] || !res[attr].trim()) {
goodboy = false;
break;
}
}
if (goodboy) {
res.rank = rank++;
cleaned.push(res);
}
}
return cleaned;
}
describe('html output', function(){
this.timeout(15000);
it('static html output test', test_html_output);
});

View File

@ -1,24 +0,0 @@
'use strict';
const zlib = require('zlib');
const fs = require('fs');
const path = require('path');
var files = ['google.html', 'google2.html', 'google3.html', 'bing.html', 'bing2.html'];
for (var file of files) {
var html = fs.readFileSync(path.resolve(__dirname, './html/' + file));
var compressed = zlib.gzipSync(html);
var deflated = zlib.deflateSync(html);
var compressed_encoded = compressed.toString('base64');
var deflated_encoded = deflated.toString('base64');
console.log(file)
console.log('Normal length: ' + html.length/1000);
console.log('GZIP Compressed length: ' + compressed.length/1000);
console.log('Deflate Compressed length: ' + deflated.length/1000);
console.log('Encoded GZIP Compressed length: ' + compressed_encoded.length/1000);
console.log('Encoded Deflate Compressed length: ' + deflated_encoded.length/1000);
console.log('------\n')
}

View File

@ -1,99 +0,0 @@
const se_scraper = require('./../../index.js');
const chai = require('chai');
chai.use(require('chai-string'));
const assert = chai.assert;
const path = require('path');
async function duckduckgo() {
let config = {
compress: false,
debug_level: 1,
headless: true,
};
let scrape_config = {
search_engine: 'duckduckgo',
keywords: ['cloud service'],
num_pages: 1,
scrape_from_file: 'file://' + path.join(__dirname, './html/duckduckgo1.html'),
};
var scraper = new se_scraper.ScrapeManager(config);
await scraper.start();
duckduckgo_normal( await scraper.scrape(scrape_config) );
await scraper.quit();
}
function duckduckgo_normal(response) {
assert.equal(response.metadata.num_requests, 1);
for (let query in response.results) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'ads',], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 10, 'results must have at least 10 SERP objects');
assert.isAtLeast(obj.ads.length, 2, 'ads must have at least 2 SERP objects');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
confirm_results_ok(obj);
}
}
}
function confirm_results_ok(obj) {
for (let res of obj.results) {
assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link', 'snippet'], 'not all keys are in the SERP object');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'visible_link must be ok');
assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.isNumber(res.rank, 'rank must be integer');
}
for (let res of obj.ads) {
assert.isOk(res.tracking_link, 'link must be ok');
assert.typeOf(res.tracking_link, 'string', 'link must be string');
assert.isAtLeast(res.tracking_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'link must be ok');
assert.typeOf(res.visible_link, 'string', 'link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
}
}
describe('Duckduckgo', function(){
this.timeout(10000);
it('static duckduckgo sarch', duckduckgo);
});

View File

@ -1,410 +0,0 @@
const se_scraper = require('./../../index.js');
const chai = require('chai');
chai.use(require('chai-string'));
const assert = chai.assert;
const path = require('path');
async function normal_search_test() {
let config = {
compress: false,
debug_level: 1,
headless: true,
};
let scrape_config = {
search_engine: 'google',
keywords: ['rückspiegel schwarz'],
num_pages: 1,
scrape_from_file: 'file://' + path.join(__dirname, './html/google.html'),
};
var scraper = new se_scraper.ScrapeManager(config);
await scraper.start();
google_search_with_products( await scraper.scrape(scrape_config) );
scrape_config.scrape_from_file = 'file://' + path.join(__dirname, './html/google2.html');
scrape_config.keywords = ['autoreifen mercedes c-klasse'];
google_search_with_products2( await scraper.scrape(scrape_config) );
scrape_config.scrape_from_file = 'file://' + path.join(__dirname, './html/google3.html');
scrape_config.keywords = ['kaffeemaschine kaufen'];
google_places( await scraper.scrape(scrape_config) );
scrape_config.scrape_from_file = 'file://' + path.join(__dirname, './html/google4.html');
scrape_config.keywords = ['MODEL MARKET SW18 4ES'];
right_side_info_text( await scraper.scrape(scrape_config) );
scrape_config.scrape_from_file = 'file://' + path.join(__dirname, './html/google5.html');
scrape_config.keywords = ['BRANDON MOTORS HP13 6NR'];
right_side_info_text2( await scraper.scrape(scrape_config) );
scrape_config.scrape_from_file = 'file://' + path.join(__dirname, './html/google6.html');
scrape_config.keywords = ['car tires for sale'];
google_places_and_ads( await scraper.scrape(scrape_config) );
scrape_config.scrape_from_file = 'file://' + path.join(__dirname, './html/google_bmw_felgen.html');
scrape_config.keywords = ['bmw felgen'];
google_ads2( await scraper.scrape(scrape_config) );
await scraper.quit();
}
// we test with a callback function to our handler
function google_search_with_products(response) {
assert.equal(response.metadata.num_requests, 1);
for (let query in response.results) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.include(obj.num_results, '1780000', 'num results not included');
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query', 'top_ads', 'bottom_ads', 'places'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 9, 'results must have at least 8 SERP objects');
assert.isAtLeast(obj.top_ads.length, 0, 'there are no top ads');
assert.isAtLeast(obj.bottom_ads.length, 3, 'there are 3 bottom ads');
assert.isAtLeast(obj.top_products.length, 15, 'there are 15 top products');
assert.equal(obj.right_products.length, 0, 'there are 0 right products');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
confirm_results_ok(obj);
}
}
}
function google_search_with_products2(response) {
assert.equal(response.metadata.num_requests, 1);
for (let query in response.results) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.include(obj.num_results, '437000 Ergebnisse (0.41 Sekunden)', 'num results not included');
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query', 'top_ads', 'bottom_ads', 'places'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 9, 'results must have at least 8 SERP objects');
assert.isAtLeast(obj.top_ads.length, 0, 'there are no top ads');
assert.isAtLeast(obj.bottom_ads.length, 1, 'there are 1 bottom ads');
assert.isAtLeast(obj.top_products.length, 0, 'there are 0 top products');
assert.equal(obj.right_products.length, 4, 'there are 4 right products');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
confirm_results_ok(obj);
}
}
}
function google_places(response) {
assert.equal(response.metadata.num_requests, 1);
for (let query in response.results) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.include(obj.num_results, '6750000 Ergebnisse (0.52 Sekunden)', 'num results not included');
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query', 'top_ads', 'bottom_ads', 'places'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 10, 'results must have at least 9 SERP objects');
assert.equal(obj.top_ads.length, 0, 'there are no top ads');
assert.equal(obj.bottom_ads.length, 0, 'there are 0 bottom ads');
assert.equal(obj.top_products.length, 0, 'there are 0 top products');
assert.equal(obj.right_products.length, 0, 'there are 0 right products');
assert.equal(obj.places.length, 3, 'there are 3 places');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
confirm_results_ok(obj);
}
}
}
function right_side_info_text(response) {
assert.equal(response.metadata.num_requests, 1);
for (let query in response.results) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.include(obj.num_results, '6 Ergebnisse', 'num results not included');
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results',
'effective_query', 'top_ads', 'bottom_ads', 'right_side_info_text'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.isOk(obj.right_side_info_text.length, 'right_side_info_text should have some data');
assert.isAtLeast(obj.right_side_info_text.length, 50, 'right_side_info_text should have some data');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
confirm_results_ok(obj);
}
}
}
function right_side_info_text2(response) {
assert.equal(response.metadata.num_requests, 1);
for (let query in response.results) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.include(obj.num_results, '5 Ergebnisse', 'num results not included');
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results',
'effective_query', 'top_ads', 'bottom_ads', 'right_side_info_text'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 5, 'results must have at least 5 SERP objects');
assert.isOk(obj.right_side_info_text.length, 'right_side_info_text should have some data');
assert.isAtLeast(obj.right_side_info_text.length, 50, 'right_side_info_text should have some data');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
confirm_results_ok(obj);
}
}
}
function google_places_and_ads(response) {
assert.equal(response.metadata.num_requests, 1);
for (let query in response.results) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.include(obj.num_results, '439.000.000 Ergebnisse (0,64 Sekunden)', 'num results not included');
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query', 'top_ads', 'bottom_ads', 'places'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 10, 'results must have at least 10 SERP objects');
assert.equal(obj.top_ads.length, 0, 'there are no top ads');
assert.equal(obj.bottom_ads.length, 0, 'there are 0 bottom ads');
assert.isAtLeast(obj.top_products.length, 13, 'there are 13 top products');
assert.equal(obj.right_products.length, 0, 'there are 0 right products');
assert.equal(obj.places.length, 2, 'there are 2 places');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
confirm_results_ok(obj);
}
}
}
function google_ads2(response) {
assert.equal(response.metadata.num_requests, 1);
for (let query in response.results) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.include(obj.num_results, 'Ungefähr 23.200.000 Ergebnisse (0,29 Sekunden)', 'num results not included');
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query', 'top_ads', 'bottom_ads', 'places'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 10, 'results must have at least 10 SERP objects');
assert.equal(obj.top_ads.length, 3, 'there are no top ads');
assert.equal(obj.bottom_ads.length, 0, 'there are 0 bottom ads');
assert.isAtLeast(obj.top_products.length, 0, 'there must be 0 top products');
assert.equal(obj.right_products.length, 9, 'there are 9 right products');
assert.equal(obj.places.length, 0, 'there are 0 places');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
confirm_results_ok(obj);
}
}
}
function confirm_results_ok(obj) {
for (let res of obj.results) {
assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link', 'snippet'], 'not all keys are in the SERP object');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'visible_link must be ok');
assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.isNumber(res.rank, 'rank must be integer');
}
for (let res of obj.top_ads) {
assert.isOk(res.tracking_link, 'link must be ok');
assert.typeOf(res.tracking_link, 'string', 'link must be string');
assert.isAtLeast(res.tracking_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'link must be ok');
assert.typeOf(res.visible_link, 'string', 'link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.link, 'visible_link must be ok');
assert.typeOf(res.link, 'string', 'visible_link must be string');
assert.isAtLeast(res.link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.typeOf(res.links, 'array', 'links must be array');
}
for (let res of obj.bottom_ads) {
assert.isOk(res.tracking_link, 'link must be ok');
assert.typeOf(res.tracking_link, 'string', 'link must be string');
assert.isAtLeast(res.tracking_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'link must be ok');
assert.typeOf(res.visible_link, 'string', 'link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.link, 'visible_link must be ok');
assert.typeOf(res.link, 'string', 'visible_link must be string');
assert.isAtLeast(res.link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.typeOf(res.links, 'array', 'links must be array');
}
for (let res of obj.top_products) {
assert.isOk(res.tracking_link, 'link must be ok');
assert.typeOf(res.tracking_link, 'string', 'link must be string');
assert.isAtLeast(res.tracking_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.price, 'price must be ok');
assert.typeOf(res.price, 'string', 'price must be string');
assert.isAtLeast(res.price.length, 5, 'price must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
assert.isOk(res.vendor_link, 'vendor_link must be ok');
assert.typeOf(res.vendor_link, 'string', 'vendor_link must be string');
assert.isAtLeast(res.vendor_link.length, 8, 'vendor_link must have at least 10 chars');
}
for (let res of obj.right_products) {
assert.isOk(res.tracking_link, 'link must be ok');
assert.typeOf(res.tracking_link, 'string', 'link must be string');
assert.isAtLeast(res.tracking_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.price, 'price must be ok');
assert.typeOf(res.price, 'string', 'price must be string');
assert.isAtLeast(res.price.length, 5, 'price must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
assert.isOk(res.vendor_link, 'vendor_link must be ok');
assert.typeOf(res.vendor_link, 'string', 'vendor_link must be string');
assert.isAtLeast(res.vendor_link.length, 8, 'vendor_link must have at least 10 chars');
}
for (let res of obj.places) {
assert.isOk(res.heading, 'heading must be ok');
assert.typeOf(res.heading, 'string', 'heading must be string');
assert.isAtLeast(res.heading.length, 5, 'heading must have at least 5 chars');
assert.isOk(res.rating, 'rating must be ok');
assert.typeOf(res.rating, 'string', 'rating must be string');
assert.isAtLeast(res.rating.length, 5, 'rating must have at least 5 chars');
assert.isOk(res.contact, 'contact must be ok');
assert.typeOf(res.contact, 'string', 'contact must be string');
assert.isAtLeast(res.contact.length, 5, 'contact must have at least 5 chars');
assert.typeOf(res.hours, 'string', 'hours must be string');
if (res.hours) {
assert.isAtLeast(res.hours.length, 10, 'hours must have at least 10 chars');
}
}
}
describe('Google', function() {
this.timeout(25000);
it('static google searches with products,ads and places', normal_search_test);
});

View File

@ -1,213 +0,0 @@
const se_scraper = require('./../../index.js');
const chai = require('chai');
chai.use(require('chai-string'));
const assert = chai.assert;
const path = require('path');
async function normal_search_test() {
let config = {
compress: false,
debug_level: 1,
headless: true,
};
let scrape_config = {
search_engine: 'google',
keywords: ['in.linkedin.com/in/altanai'],
num_pages: 1,
scrape_from_file: 'file://' + path.join(__dirname, './html/google7.html'),
};
var scraper = new se_scraper.ScrapeManager(config);
await scraper.start();
google_test_title( await scraper.scrape(scrape_config) );
await scraper.quit();
}
// we test with a callback function to our handler
function google_test_title(response) {
assert.equal(response.metadata.num_requests, 1);
for (let query in response.results) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.include(obj.num_results, '7.600', 'num results not included');
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query', 'top_ads', 'bottom_ads', 'places'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 9, 'results must have at least 9 SERP objects');
assert.isAtLeast(obj.top_ads.length, 0, 'there are no top ads');
assert.isAtLeast(obj.bottom_ads.length, 0, 'there are 0 bottom ads');
assert.isAtLeast(obj.top_products.length, 0, 'there are 0 top products');
assert.equal(obj.right_products.length, 0, 'there are 0 right products');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
confirm_results_ok(obj);
assert.equal( obj.results[0].title, 'ALTANAI BISHT - SD2 at Voice Engineering - Plivo | LinkedIn' );
assert.equal( obj.results[1].title, 'ALTANAI BISHT | LinkedIn' );
assert.equal( obj.results[2].title, 'ALTANAI BISHT SD2 at Voice Engineering Plivo | LinkedIn' );
assert.equal( obj.results[3].title, 'AI AT - South Delhi, Delhi, India | Professional Profile | LinkedIn' );
assert.equal( obj.results[4].title, 'ALTANAI BISHT | LinkedIn' );
assert.equal( obj.results[9].title, 'Phani Kumar Parasaram - VOIP Expert - Infinite ... - LinkedIn');
assert.equal (obj.results[0].date, '27.07.2016');
assert.equal( obj.results[0].snippet, '27.07.2016 - View ALTANAI BISHT\'S profile on LinkedIn, the world\'s largest professional community. ALTANAI has 6 jobs listed on their profile. See the ...');
assert.equal (obj.results[2].date, '27.07.2016');
}
}
}
function confirm_results_ok(obj) {
for (let res of obj.results) {
assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link', 'snippet'], 'not all keys are in the SERP object');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'visible_link must be ok');
assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.isNumber(res.rank, 'rank must be integer');
}
for (let res of obj.top_ads) {
assert.isOk(res.tracking_link, 'link must be ok');
assert.typeOf(res.tracking_link, 'string', 'link must be string');
assert.isAtLeast(res.tracking_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'link must be ok');
assert.typeOf(res.visible_link, 'string', 'link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.link, 'visible_link must be ok');
assert.typeOf(res.link, 'string', 'visible_link must be string');
assert.isAtLeast(res.link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.typeOf(res.links, 'array', 'links must be array');
}
for (let res of obj.bottom_ads) {
assert.isOk(res.tracking_link, 'link must be ok');
assert.typeOf(res.tracking_link, 'string', 'link must be string');
assert.isAtLeast(res.tracking_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'link must be ok');
assert.typeOf(res.visible_link, 'string', 'link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.link, 'visible_link must be ok');
assert.typeOf(res.link, 'string', 'visible_link must be string');
assert.isAtLeast(res.link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.typeOf(res.links, 'array', 'links must be array');
}
for (let res of obj.top_products) {
assert.isOk(res.tracking_link, 'link must be ok');
assert.typeOf(res.tracking_link, 'string', 'link must be string');
assert.isAtLeast(res.tracking_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.price, 'price must be ok');
assert.typeOf(res.price, 'string', 'price must be string');
assert.isAtLeast(res.price.length, 5, 'price must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
assert.isOk(res.vendor_link, 'vendor_link must be ok');
assert.typeOf(res.vendor_link, 'string', 'vendor_link must be string');
assert.isAtLeast(res.vendor_link.length, 10, 'vendor_link must have at least 10 chars');
}
for (let res of obj.right_products) {
assert.isOk(res.tracking_link, 'link must be ok');
assert.typeOf(res.tracking_link, 'string', 'link must be string');
assert.isAtLeast(res.tracking_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.price, 'price must be ok');
assert.typeOf(res.price, 'string', 'price must be string');
assert.isAtLeast(res.price.length, 5, 'price must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
assert.isOk(res.vendor_link, 'vendor_link must be ok');
assert.typeOf(res.vendor_link, 'string', 'vendor_link must be string');
assert.isAtLeast(res.vendor_link.length, 10, 'vendor_link must have at least 10 chars');
}
for (let res of obj.places) {
assert.isOk(res.heading, 'heading must be ok');
assert.typeOf(res.heading, 'string', 'heading must be string');
assert.isAtLeast(res.heading.length, 5, 'heading must have at least 5 chars');
assert.isOk(res.rating, 'rating must be ok');
assert.typeOf(res.rating, 'string', 'rating must be string');
assert.isAtLeast(res.rating.length, 5, 'rating must have at least 5 chars');
assert.isOk(res.contact, 'contact must be ok');
assert.typeOf(res.contact, 'string', 'contact must be string');
assert.isAtLeast(res.contact.length, 5, 'contact must have at least 5 chars');
assert.typeOf(res.hours, 'string', 'hours must be string');
if (res.hours) {
assert.isAtLeast(res.hours.length, 10, 'hours must have at least 10 chars');
}
}
}
describe('Google2', function(){
this.timeout(10000);
it('static google searches testing various details', normal_search_test);
});

View File

@ -1,152 +0,0 @@
const se_scraper = require('./../../index.js');
const chai = require('chai');
chai.use(require('chai-string'));
const assert = chai.assert;
const path = require('path');
async function yandex_ads() {
let config = {
compress: false,
debug_level: 1,
headless: true,
};
let scrape_config = {
search_engine: 'yandex',
keywords: ['cloud service'],
num_pages: 1,
scrape_from_file: 'file://' + path.join(__dirname, './html/yandex1.html'),
};
var scraper = new se_scraper.ScrapeManager(config);
await scraper.start();
yandex_search_with_ads( await scraper.scrape(scrape_config) );
scrape_config.keywords = ['car tires cheap'];
scrape_config.scrape_from_file = 'file://' + path.join(__dirname, './html/yandex2.html');
yandex_search_with_ads2( await scraper.scrape(scrape_config) );
scrape_config.keywords = ['купить деревянные окна'];
scrape_config.scrape_from_file = 'file://' + path.join(__dirname, './html/yandex3.html');
yandex_search_with_ads3( await scraper.scrape(scrape_config) );
await scraper.quit();
}
// we test with a callback function to our handler
function yandex_search_with_ads(response) {
assert.equal(response.metadata.num_requests, 1);
for (let query in response.results) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.include(obj.num_results, '2 million results', 'num results not included');
assert.containsAllKeys(obj, ['results', 'time', 'num_results',], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 12, 'results must have at least 12 SERP objects');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
confirm_results_ok(obj);
}
}
}
function yandex_search_with_ads2(response) {
assert.equal(response.metadata.num_requests, 1);
for (let query in response.results) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.include(obj.num_results, '5 million results', 'num results not included');
assert.containsAllKeys(obj, ['results', 'time', 'num_results',], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 11, 'results must have at least 12 SERP objects');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
confirm_results_ok(obj);
}
}
}
function yandex_search_with_ads3(response) {
assert.equal(response.metadata.num_requests, 1);
for (let query in response.results) {
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
// console.dir(obj.results, {depth: null, colors: true});
assert.containsAllKeys(obj, ['results', 'time', 'num_results',], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 14, 'results must have at least 14 SERP objects');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
// at least 4 ads
let cnt = 0;
obj.results.forEach((res) => {
if (res.is_ad) {
cnt++;
}
});
assert.isAtLeast(cnt, 4, 'there should be at least 4 ads in the results');
confirm_results_ok(obj);
}
}
}
function confirm_results_ok(obj) {
for (let res of obj.results) {
assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link', 'snippet'], 'not all keys are in the SERP object');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'visible_link must be ok');
assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.isNumber(res.rank, 'rank must be integer');
}
}
describe('Yandex', function(){
this.timeout(10000);
it('static yandex searches with ads', yandex_ads);
});

View File

@ -1,141 +0,0 @@
'use strict';
const se_scraper = require('./../index.js');
const assert = require('chai').assert;
/*
* Use chai and mocha for tests.
* https://mochajs.org/#installation
*/
const normal_search_keywords = ['iphone', 'clock'];
async function normal_search_test() {
let config = {
compress: false,
keyword_file: '',
headless: true,
output_file: '',
block_assets: true,
random_user_agent: false,
};
let scrape_config = {
search_engine: 'amazon',
num_pages: 1,
keywords: normal_search_keywords,
};
console.log('normal_search_test()');
normal_search_test_case( await se_scraper.scrape(config, scrape_config) );
}
// we test with a callback function to our handler
function normal_search_test_case(response) {
assert.equal(response.metadata.num_requests, 2);
for (let query in response.results) {
let total_rank = 1;
assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
for (let res of obj.results) {
assert.containsAllKeys(res, ['link', 'title', 'rank', 'image', 'seller', 'stars', 'num_reviews', 'price', 'oldprice'], 'not all keys are in the SERP object');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars');
assert.isOk(res.seller, 'seller must be ok');
assert.typeOf(res.seller, 'string', 'seller must be string');
assert.isAtLeast(res.seller.length, 5, 'seller must have at least 10 chars');
assert.isOk(res.stars, 'stars be ok');
assert.typeOf(res.stars, 'string', 'stars must be string');
assert.isAtLeast(res.stars.length, 5, 'stars must have at least 6 chars');
assert.include(res.stars, ' out of ', 'stars must include " out of "');
assert.isOk(res.num_reviews, 'num_reviews be ok');
assert.typeOf(res.num_reviews, 'string', 'num_reviews must be string');
assert.isAtLeast(res.num_reviews.length, 1, 'num_reviews must have at least 1 chars');
assert.isOk(res.price, 'price be ok');
assert.typeOf(res.price, 'string', 'price must be string');
assert.isAtLeast(res.price.length, 5, 'price must have at least 5 chars');
assert.isNumber(res.rank, 'rank must be integer');
assert.equal(res.rank, total_rank++, 'rank ist wrong');
}
}
}
}
const keywords_no_results = ['2342kljp;fj9834u40abJ54634344023safkl34a44dsflkjaQQuBBdfk',];
async function no_results_test() {
let config = {
compress: false,
debug_level: 1,
keyword_file: '',
headless: true,
output_file: '',
block_assets: true,
random_user_agent: false,
};
let scrape_config = {
search_engine: 'amazon',
num_pages: 1,
keywords: keywords_no_results,
};
console.log('no_results_test()');
test_case_no_results( await se_scraper.scrape(config, scrape_config) );
}
// we test with a callback function to our handler
function test_case_no_results(response) {
assert.equal(response.metadata.num_requests, 1);
for (let query in response.results) {
assert.containsAllKeys(response.results, keywords_no_results, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
assert(obj.results.length === 0, 'results must have 0 SERP objects');
assert.equal(obj.no_results, true, 'no results should be true');
assert.isEmpty(obj.num_results, 'no results should be a empty string');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
}
}
}
describe('Amazon', function(){
this.timeout(30000);
it('normal search test', normal_search_test);
it('no results test', no_results_test);
});

View File

@ -1,87 +0,0 @@
'use strict';
const se_scraper = require('./../index.js');
const assert = require('chai').assert;
/*
* Use chai and mocha for tests.
* https://mochajs.org/#installation
*/
const normal_search_keywords = ['mouse', 'cat'];
async function normal_search_test() {
let config = {
compress: false,
debug_level: 1,
keyword_file: '',
headless: true,
block_assets: true,
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
};
let scrape_config = {
search_engine: 'baidu',
keywords: normal_search_keywords,
num_pages: 2,
};
console.log('normal_search_test()');
normal_search_test_case( await se_scraper.scrape(config, scrape_config) );
}
// we test with a callback function to our handler
function normal_search_test_case(response) {
assert.equal(response.metadata.num_requests, 4);
for (let query in response.results) {
let total_rank = 1;
assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'num_results', 'no_results'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.equal(obj.no_results, false, 'no results should be false');
for (let res of obj.results) {
assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link', 'snippet'], 'not all keys are in the SERP object');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'visible_link must be ok');
assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.isNumber(res.rank, 'rank must be integer');
assert.equal(res.rank, total_rank++, 'rank ist wrong');
}
}
}
}
describe('Baidu', function(){
this.timeout(30000);
it('normal search test', normal_search_test);
});

View File

@ -1,271 +0,0 @@
'use strict';
const se_scraper = require('./../index.js');
const chai = require('chai');
chai.use(require('chai-string'));
const assert = chai.assert;
/*
* Use chai and mocha for tests.
* https://mochajs.org/#installation
*/
const normal_search_keywords = ['apple tree', 'weather tomorrow'];
async function normal_search_test() {
let config = {
search_engine: 'bing',
compress: false,
debug_level: 1,
keywords: normal_search_keywords,
keyword_file: '',
num_pages: 3,
headless: true,
output_file: '',
block_assets: true,
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
random_user_agent: false,
};
let scrape_config = {
search_engine: 'bing',
keywords: normal_search_keywords,
num_pages: 3,
};
console.log('normal_search_test()');
normal_search_test_case( await se_scraper.scrape(config, scrape_config) );
}
// we test with a callback function to our handler
function normal_search_test_case(response) {
assert.equal(response.metadata.num_requests, 6);
for (let query in response.results) {
let total_rank = 1;
assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
for (let res of obj.results) {
assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link', 'rank'], 'not all keys are in the SERP object');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'visible_link must be ok');
assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars');
if (res.snippet) {
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
}
assert.isNumber(res.rank, 'rank must be integer');
assert.equal(res.rank, total_rank++, 'rank ist wrong');
}
}
}
}
const keywords_no_results = ['2342kljp;fj9834u40abJAkasdlfkjsladfkjasfdas;lk3453-934023safkl34a44dsflkjaQQuBBdfk',];
async function no_results_test() {
let config = {
search_engine: 'bing',
compress: false,
debug_level: 1,
keywords: keywords_no_results,
keyword_file: '',
num_pages: 1,
headless: true,
output_file: '',
block_assets: true,
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
random_user_agent: false,
};
let scrape_config = {
search_engine: 'bing',
keywords: keywords_no_results,
num_pages: 1,
};
console.log('no_results_test()');
test_case_no_results( await se_scraper.scrape(config, scrape_config) );
}
// we test with a callback function to our handler
function test_case_no_results(response) {
assert.equal(response.metadata.num_requests, 1);
for (let query in response.results) {
assert.containsAllKeys(response.results, keywords_no_results, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
assert(obj.results.length === 0, 'results must have 0 SERP objects');
assert.equal(obj.no_results, true, 'no results should be true');
assert.isEmpty(obj.num_results, 'no results should be a empty string');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
}
}
}
const effective_query_keywords = ['mount everrest'];
async function effective_query_test() {
let config = {
compress: false,
debug_level: 1,
keyword_file: '',
headless: true,
output_file: '',
block_assets: true,
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
random_user_agent: false,
};
let scrape_config = {
search_engine: 'bing',
keywords: effective_query_keywords,
num_pages: 1,
};
console.log('effective_query_test()');
test_case_effective_query( await se_scraper.scrape(config, scrape_config) );
}
// we test with a callback function to our handler
function test_case_effective_query(response) {
assert.equal(response.metadata.num_requests, 1);
for (let query in response.results) {
assert.containsAllKeys(response.results, effective_query_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
// effective query must be different to the original keyword
assert.isOk(obj.effective_query, 'effective query must be ok');
assert.isNotEmpty(obj.effective_query, 'effective query must be valid');
assert(obj.effective_query !== query, 'effective query must be different from keyword');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
}
}
}
const ads_keywords = ['cloud services', 'buy shoes'];
async function ads_test() {
let config = {
compress: false,
debug_level: 1,
headless: true,
block_assets: false,
random_user_agent: true,
};
let scrape_config = {
search_engine: 'bing',
keywords: ads_keywords,
num_pages: 1,
};
console.log('ads_test()');
test_case_ads_test( await se_scraper.scrape(config, scrape_config) );
}
function test_case_ads_test(response) {
assert.equal(response.metadata.num_requests, 2);
for (let query in response.results) {
assert.containsAllKeys(response.results, ads_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query', 'ads'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 5, 'results must have at least 5 SERP objects');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
assert.isAtLeast(obj.ads.length, 2, 'ads must have at least 2 SERP objects');
for (let res of obj.ads) {
assert.isOk(res.tracking_link, 'link must be ok');
assert.typeOf(res.tracking_link, 'string', 'link must be string');
assert.isAtLeast(res.tracking_link.length, 5, 'link must have at least 5 chars');
// assert.isOk(res.link, 'link must be ok');
// assert.typeOf(res.link, 'string', 'link must be string');
// assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'visible_link must be ok');
assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
}
}
}
}
describe('Bing', function(){
this.timeout(30000);
it('normal search', normal_search_test);
it('no results', no_results_test);
it('effective query', effective_query_test);
it('finds ads', ads_test);
});

View File

@ -1,192 +0,0 @@
'use strict';
const se_scraper = require('./../index.js');
const chai = require('chai');
chai.use(require('chai-string'));
const assert = chai.assert;
const normal_search_keywords = ['apple tree', 'weather tomorrow'];
async function normal_search_test() {
let config = {
compress: false,
debug_level: 1,
headless: true,
block_assets: false,
random_user_agent: true,
};
let scrape_config = {
search_engine: 'duckduckgo',
keywords: normal_search_keywords,
num_pages: 2,
};
console.log('normal_search_test()');
normal_search_test_case( await se_scraper.scrape(config, scrape_config) );
}
// we test with a callback function to our handler
function normal_search_test_case(response) {
assert.equal(response.metadata.num_requests, 2);
for (let query in response.results) {
let total_rank = 1;
assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'effective_query'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
for (let res of obj.results) {
assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link', 'snippet'], 'not all keys are in the SERP object');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'visible_link must be ok');
assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.isNumber(res.rank, 'rank must be integer');
assert.equal(res.rank, total_rank++, 'rank ist wrong');
}
}
}
}
const effective_query_keywords = ['mount everrest'];
async function effective_query_test() {
let config = {
compress: false,
debug_level: 1,
headless: true,
block_assets: true,
random_user_agent: true,
};
let scrape_config = {
search_engine: 'duckduckgo',
keywords: effective_query_keywords,
num_pages: 1,
};
console.log('test_case_effective_query()');
test_case_effective_query( await se_scraper.scrape(config, scrape_config) );
}
// we test with a callback function to our handler
function test_case_effective_query(response) {
assert.equal(response.metadata.num_requests, 1);
results = response.results;
for (let query in response.results) {
assert.containsAllKeys(response.results, effective_query_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'effective_query'], 'not all keys are in the object');
// effective query must be different to the original keyword
assert.isOk(obj.effective_query, 'effective query must be ok');
assert.isNotEmpty(obj.effective_query, 'effective query must be valid');
assert(obj.effective_query !== query, 'effective query must be different from keyword');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
}
}
}
const ads_keywords = ['cloud services', 'buy shoes'];
async function ads_test() {
let config = {
compress: false,
debug_level: 1,
headless: true,
block_assets: false,
random_user_agent: false,
};
let scrape_config = {
search_engine: 'duckduckgo',
keywords: ads_keywords,
num_pages: 1,
};
console.log('ads_test()');
test_case_ads_test( await se_scraper.scrape(config, scrape_config) );
}
function test_case_ads_test(response) {
assert.equal(response.metadata.num_requests, 2);
for (let query in response.results) {
assert.containsAllKeys(response.results, ads_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'effective_query', 'ads'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 6, 'results must have at least 6 SERP objects');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
assert.isAtLeast(obj.ads.length, 2, 'ads must have at least 2 SERP object');
for (let res of obj.ads) {
assert.isOk(res.tracking_link, 'link must be ok');
assert.typeOf(res.tracking_link, 'string', 'link must be string');
assert.isAtLeast(res.tracking_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'visible_link must be ok');
assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
}
}
}
}
describe('Duckduckgo', function(){
this.timeout(30000);
it('normal search', normal_search_test);
it('effective query', effective_query_test);
it('finds ads', ads_test);
});

View File

@ -1,424 +0,0 @@
'use strict';
const se_scraper = require('./../index.js');
const chai = require('chai');
chai.use(require('chai-string'));
const assert = chai.assert;
/*
* Use chai and mocha for tests.
* https://mochajs.org/#installation
*/
const normal_search_keywords = ['apple tree', 'weather tomorrow'];
async function normal_search_test() {
let config = {
compress: false,
debug_level: 1,
keyword_file: '',
headless: true,
output_file: '',
block_assets: true,
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
random_user_agent: false,
};
let scrape_config = {
search_engine: 'google',
keywords: normal_search_keywords,
num_pages: 3,
};
console.log('normal_search_test()');
normal_search_test_case( await se_scraper.scrape(config, scrape_config) );
}
// we test with a callback function to our handler
function normal_search_test_case(response) {
assert.equal(response.metadata.num_requests, 6);
for (let query in response.results) {
let total_rank = 1;
assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 8 SERP objects');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
for (let res of obj.results) {
assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link'], 'not all keys are in the SERP object');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'visible_link must be ok');
assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.isNumber(res.rank, 'rank must be integer');
assert.equal(res.rank, total_rank++, 'rank ist wrong');
}
}
}
}
const keywords_no_results = ['fgskl34440abJAksfs4353534a3l34AVGFDFflkjaQQuBBdfk',];
async function no_results_test() {
let config = {
compress: false,
debug_level: 1,
keyword_file: '',
headless: true,
output_file: '',
block_assets: true,
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
random_user_agent: false,
};
let scrape_config = {
search_engine: 'google',
keywords: keywords_no_results,
num_pages: 1,
};
console.log('no_results_test()');
test_case_no_results( await se_scraper.scrape(config, scrape_config) );
}
// we test with a callback function to our handler
function test_case_no_results(response) {
assert.equal(response.metadata.num_requests, 1);
for (let query in response.results) {
assert.containsAllKeys(response.results, keywords_no_results, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
assert.strictEqual(obj.results.length, 0, 'results must have 0 SERP objects');
assert.equal(obj.no_results, true, 'no results should be true');
assert.isEmpty(obj.num_results, 'num_results should be a empty string');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
}
}
}
const effective_query_keywords = ['mount evverrest'];
async function effective_query_test() {
let config = {
compress: false,
debug_level: 1,
keyword_file: '',
headless: true,
output_file: '',
block_assets: true,
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
random_user_agent: false,
};
let scrape_config = {
search_engine: 'google',
keywords: effective_query_keywords,
num_pages: 1,
};
console.log('effective_query_test()');
test_case_effective_query( await se_scraper.scrape(config, scrape_config) );
}
// we test with a callback function to our handler
function test_case_effective_query(response) {
assert.equal(response.metadata.num_requests, 1);
for (let query in response.results) {
assert.containsAllKeys(response.results, effective_query_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
// effective query must be different to the original keyword
assert.isOk(obj.effective_query, 'effective query must be ok');
assert.isNotEmpty(obj.effective_query, 'effective query must be valid');
assert(obj.effective_query !== query, 'effective query must be different from keyword');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 8 SERP objects');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
}
}
}
async function html_output_query_test() {
let config = {
compress: false,
debug_level: 1,
keyword_file: '',
headless: true,
output_file: '',
block_assets: true,
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
random_user_agent: false,
};
let scrape_config = {
search_engine: 'google',
keywords: normal_search_keywords,
num_pages: 3,
html_output: true,
};
let output = await se_scraper.scrape(config, scrape_config);
normal_search_test_case( output );
check_html_output_test_case( output );
}
function check_html_output_test_case( response ) {
for (let query in response.html_output) {
assert.containsAllKeys(response.html_output, normal_search_keywords, 'not all keywords were scraped.');
for (let page_number in response.html_output[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
assert.startsWith(response.html_output[query][page_number], '<!DOCTYPE html><html');
}
}
}
const ads_keywords = ['cloud services', 'auto kaufen'];
async function ads_test() {
let config = {
compress: false,
debug_level: 1,
headless: true,
block_assets: false,
random_user_agent: false, // dont try to trick google with ads
};
let scrape_config = {
search_engine: 'google',
keywords: ads_keywords,
num_pages: 1,
};
console.log('ads_test()');
test_case_ads_test( await se_scraper.scrape(config, scrape_config) );
}
function test_case_ads_test(response) {
assert.equal(response.metadata.num_requests, 2);
for (let query in response.results) {
assert.containsAllKeys(response.results, ads_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query', 'top_ads', 'bottom_ads', 'places'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
assert(obj.top_ads.length >= 1 || obj.bottom_ads.length >= 1, 'top_ads or bottom_ads must have at least 1 SERP object');
for (let res of obj.top_ads) {
assert.isOk(res.tracking_link, 'link must be ok');
assert.typeOf(res.tracking_link, 'string', 'link must be string');
assert.isAtLeast(res.tracking_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'link must be ok');
assert.typeOf(res.visible_link, 'string', 'link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.link, 'visible_link must be ok');
assert.typeOf(res.link, 'string', 'visible_link must be string');
assert.isAtLeast(res.link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.typeOf(res.links, 'array', 'links must be array');
}
for (let res of obj.bottom_ads) {
assert.isOk(res.tracking_link, 'link must be ok');
assert.typeOf(res.tracking_link, 'string', 'link must be string');
assert.isAtLeast(res.tracking_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'link must be ok');
assert.typeOf(res.visible_link, 'string', 'link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.link, 'visible_link must be ok');
assert.typeOf(res.link, 'string', 'visible_link must be string');
assert.isAtLeast(res.link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.typeOf(res.links, 'array', 'links must be array');
}
}
}
}
const product_keywords = ['autoreifen bmw'];
async function products_test() {
let config = {
compress: false,
debug_level: 1,
headless: true,
block_assets: false,
random_user_agent: false, // dont try to trick google with ads
};
let scrape_config = {
search_engine: 'google',
keywords: ads_keywords,
num_pages: 1,
};
console.log('products_test()');
test_case_products_test( await se_scraper.scrape(config, scrape_config) );
}
function test_case_products_test(response) {
assert.equal(response.metadata.num_requests, 2);
for (let query in response.results) {
assert.containsAllKeys(response.results, ads_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query', 'top_ads', 'bottom_ads', 'places'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
assert(obj.top_products.length >= 1 || obj.right_products.length >= 1, 'top_products or right_products must have at least 1 SERP object');
for (let res of obj.top_products) {
assert.isOk(res.tracking_link, 'link must be ok');
assert.typeOf(res.tracking_link, 'string', 'link must be string');
assert.isAtLeast(res.tracking_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.price, 'price must be ok');
assert.typeOf(res.price, 'string', 'price must be string');
assert.isAtLeast(res.price.length, 5, 'price must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
assert.isOk(res.vendor_link, 'vendor_link must be ok');
assert.typeOf(res.vendor_link, 'string', 'vendor_link must be string');
assert.isAtLeast(res.vendor_link.length, 10, 'vendor_link must have at least 10 chars');
}
for (let res of obj.right_products) {
assert.isOk(res.tracking_link, 'link must be ok');
assert.typeOf(res.tracking_link, 'string', 'link must be string');
assert.isAtLeast(res.tracking_link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.price, 'price must be ok');
assert.typeOf(res.price, 'string', 'price must be string');
assert.isAtLeast(res.price.length, 5, 'price must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
assert.isOk(res.vendor_link, 'vendor_link must be ok');
assert.typeOf(res.vendor_link, 'string', 'vendor_link must be string');
assert.isAtLeast(res.vendor_link.length, 10, 'vendor_link must have at least 10 chars');
}
}
}
}
describe('Google', function(){
this.timeout(30000);
it('normal search', normal_search_test);
it('no results', no_results_test);
it('effective query', effective_query_test);
it('html output query', html_output_query_test);
it('ads', ads_test);
it('products test', products_test);
});

View File

@ -1,80 +0,0 @@
'use strict';
const se_scraper = require('./../index.js');
const assert = require('chai').assert;
/*
* Use chai and mocha for tests.
* https://mochajs.org/#installation
*/
const normal_search_keywords = ['apple', 'rain'];
async function normal_image_search_test() {
let config = {
compress: false,
debug_level: 0,
headless: true,
block_assets: true,
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
random_user_agent: false,
};
let scrape_config = {
search_engine: 'google_image',
keywords: normal_search_keywords,
num_pages: 2,
};
console.log('normal_image_search_test()');
normal_image_search_test_case( await se_scraper.scrape(config, scrape_config) );
}
// we test with a callback function to our handler
function normal_image_search_test_case(response) {
assert.equal(response.metadata.num_requests, 2);
for (let query in response.results) {
let total_rank = 1;
assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'effective_query'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 15, 'results must have at least 15 SERP objects');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
for (let res of obj.results) {
assert.containsAllKeys(res, ['link', 'snippet', 'rank', 'clean_link'], 'not all keys are in the SERP object');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.clean_link, 'clean_link must be ok');
assert.typeOf(res.clean_link, 'string', 'clean_link must be string');
assert.isAtLeast(res.clean_link.length, 5, 'clean_link must have at least 5 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.isNumber(res.rank, 'rank must be integer');
assert.equal(res.rank, total_rank++, 'rank ist wrong');
}
}
}
}
describe('Google Image', function(){
this.timeout(30000);
it('normal image search test', normal_image_search_test);
});

View File

@ -1,91 +0,0 @@
'use strict';
const se_scraper = require('./../index.js');
const assert = require('chai').assert;
const normal_search_keywords = ['apple juice'];
async function queryargs_search_test() {
let config = {
search_engine: 'google',
compress: false,
debug: true,
verbose: true,
keywords: normal_search_keywords,
keyword_file: '',
num_pages: 2,
headless: true,
output_file: '',
block_assets: true,
// use specific search engine parameters for various search engines
google_settings: {
google_domain: 'google.com',
gl: 'fr', // The gl parameter determines the Google country to use for the query.
hl: 'fr', // The hl parameter determines the Google UI language to return results.
start: 30, // Determines the results offset to use, defaults to 0.
num: 100, // Determines the number of results to show, defaults to 10. Maximum is 100.
},
};
console.log('queryargs_search_test()');
await se_scraper.scrape(config, queryargs_search_test_case);
}
// we test with a callback function to our handler
function queryargs_search_test_case(err, response) {
if (err) {
console.error(err);
} else {
assert.equal(response.metadata.num_requests, 2);
for (let query in response.results) {
let total_rank = 1;
assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 90, 'results must have at least 80 SERP objects');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
for (let res of obj.results) {
assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link'], 'not all keys are in the SERP object');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'visible_link must be ok');
assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.isNumber(res.rank, 'rank must be integer');
assert.equal(res.rank, total_rank++, 'rank ist wrong');
}
}
}
}
}
describe('Google with query arguments', function(){
this.timeout(30000);
it('query args search test', queryargs_search_test);
});

View File

@ -1,217 +0,0 @@
'use strict';
const se_scraper = require('./../index.js');
const assert = require('chai').assert;
/*
* Use chai and mocha for tests.
* https://mochajs.org/#installation
*/
const quote_search_keywords = ['MSFT', 'AAPL'];
async function reuters_search_test() {
let config = {
search_engine: 'reuters',
compress: false,
debug: false,
verbose: false,
keywords: quote_search_keywords,
keyword_file: '',
num_pages: 1,
headless: true,
output_file: '',
block_assets: true,
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
random_user_agent: false,
};
console.log('reuters_search_test()');
await se_scraper.scrape(config, reuters_search_test_case);
}
// we test with a callback function to our handler
function reuters_search_test_case(err, response) {
if (err) {
console.error(err);
} else {
for (let query in response.results) {
let total_rank = 1;
assert.containsAllKeys(response.results, quote_search_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
for (let res of obj.results) {
assert.containsAllKeys(res, ['link', 'title', 'date', 'snippet'], 'not all keys are in the SERP object');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.isOk(res.date, 'date must be ok');
assert.typeOf(res.date, 'string', 'date must be string');
assert.isAtLeast(res.date.length, 5, 'date must have at least 5 chars');
}
}
}
}
}
async function cnbc_search_test() {
let config = {
search_engine: 'cnbc',
compress: false,
debug: false,
verbose: false,
keywords: quote_search_keywords,
keyword_file: '',
num_pages: 1,
headless: true,
output_file: '',
block_assets: true,
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
random_user_agent: false,
};
console.log('cnbc_search_test()');
await se_scraper.scrape(config, cnbc_search_test_case);
}
// we test with a callback function to our handler
function cnbc_search_test_case(err, response) {
if (err) {
console.error(err);
} else {
for (let query in response.results) {
let total_rank = 1;
assert.containsAllKeys(response.results, quote_search_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
for (let res of obj.results) {
assert.containsAllKeys(res, ['link', 'title', 'date'], 'not all keys are in the SERP object');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars');
assert.isOk(res.date, 'date must be ok');
assert.typeOf(res.date, 'string', 'date must be string');
assert.isAtLeast(res.date.length, 5, 'date must have at least 5 chars');
}
}
}
}
}
const marketwatch_search_keywords = ['MSFT'];
async function marketwatch_search_test() {
let config = {
search_engine: 'marketwatch',
compress: false,
debug: false,
verbose: false,
keywords: marketwatch_search_keywords,
keyword_file: '',
num_pages: 1,
headless: true,
output_file: '',
block_assets: true,
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
random_user_agent: false,
};
console.log('marketwatch_search_test()');
await se_scraper.scrape(config, marketwatch_search_test_case);
}
// we test with a callback function to our handler
function marketwatch_search_test_case(err, response) {
if (err) {
console.error(err);
} else {
for (let query in response.results) {
let total_rank = 1;
assert.containsAllKeys(response.results, marketwatch_search_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
for (let res of obj.results) {
assert.containsAllKeys(res, ['link', 'title', 'date', 'author'], 'not all keys are in the SERP object');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars');
assert.isOk(res.author, 'author must be ok');
assert.typeOf(res.author, 'string', 'author must be string');
assert.isAtLeast(res.author.length, 5, 'author must have at least 5 chars');
assert.isOk(res.date, 'date must be ok');
assert.typeOf(res.date, 'string', 'date must be string');
assert.isAtLeast(res.date.length, 5, 'date must have at least 5 chars');
}
}
}
}
}
describe('Ticker', function(){
this.timeout(30000);
it('Reuters search test', reuters_search_test);
it('CNBC search test', cnbc_search_test);
it('Marketwatch search test', marketwatch_search_test);
});