added clean test cases for bing and duckduckgo

This commit is contained in:
Nikolai Tschacher 2019-01-31 15:36:27 +01:00
parent 7441c57a43
commit d35a602994
5 changed files with 358 additions and 13 deletions

View File

@ -18,8 +18,9 @@ class BingScraper extends Scraper {
}) })
}); });
// 'Including results for', 'Einschließlich Ergebnisse'
let no_results = this.no_results( let no_results = this.no_results(
['There are no results', 'Es gibt keine Ergebnisse', 'Including results for', 'Einschließlich Ergebnisse'], ['There are no results', 'Es gibt keine Ergebnisse'],
$('#b_results').text() $('#b_results').text()
); );

View File

@ -19,6 +19,8 @@ class DuckduckgoScraper extends Scraper {
}); });
}); });
let effective_query = $('#did_you_mean a.js-spelling-suggestion-link').attr('data-query') || '';
const cleaned = []; const cleaned = [];
for (var i=0; i < results.length; i++) { for (var i=0; i < results.length; i++) {
let res = results[i]; let res = results[i];
@ -30,6 +32,7 @@ class DuckduckgoScraper extends Scraper {
return { return {
time: (new Date()).toUTCString(), time: (new Date()).toUTCString(),
effective_query: effective_query,
results: cleaned results: cleaned
} }
} }

201
test/test_bing.js Normal file
View File

@ -0,0 +1,201 @@
const se_scraper = require('./../index.js');
var assert = require('chai').assert;
/*
* Use chai and mocha for tests.
* https://mochajs.org/#installation
*/
const normal_search_keywords = ['apple tree', 'weather tomorrow'];
async function normal_search_test() {
let config = {
search_engine: 'bing',
compress: false,
debug: false,
verbose: false,
keywords: normal_search_keywords,
keyword_file: '',
num_pages: 3,
headless: true,
output_file: '',
block_assets: true,
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
random_user_agent: false,
};
console.log('normal_search_test()');
await se_scraper.scrape(config, normal_search_test_case);
}
// we test with a callback function to our handler
function normal_search_test_case(err, response) {
if (err) {
console.error(err);
} else {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
let total_rank = 1;
for (let query in response.results) {
assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
for (let res of obj.results) {
assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link', 'rank'], 'not all keys are in the SERP object');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'visible_link must be ok');
assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.isNumber(res.rank, 'rank must be integer');
assert.equal(res.rank, total_rank++, 'rank ist wrong');
}
}
}
}
}
const keywords_no_results = ['fgskl34440abJAksafkl34a44dsflkjaQQuBBdfk',];
async function no_results_test() {
let config = {
search_engine: 'bing',
compress: false,
debug: false,
verbose: false,
keywords: keywords_no_results,
keyword_file: '',
num_pages: 1,
headless: true,
output_file: '',
block_assets: true,
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
random_user_agent: false,
};
console.log('no_results_test()');
await se_scraper.scrape(config, test_case_no_results);
}
// we test with a callback function to our handler
function test_case_no_results(err, response) {
if (err) {
console.error(err);
} else {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
results = response.results;
for (let query in response.results) {
assert.containsAllKeys(response.results, keywords_no_results, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
assert(obj.results.length === 0, 'results must have 0 SERP objects');
assert.equal(obj.no_results, true, 'no results should be true');
assert.isEmpty(obj.num_results, 'no results should be a empty string');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
}
}
}
}
const effective_query_keywords = ['mount everrest'];
async function effective_query_test() {
let config = {
search_engine: 'bing',
compress: false,
debug: false,
verbose: false,
keywords: effective_query_keywords,
keyword_file: '',
num_pages: 1,
headless: true,
output_file: '',
block_assets: true,
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
random_user_agent: false,
};
console.log('effective_query_test()');
await se_scraper.scrape(config, test_case_effective_query);
}
// we test with a callback function to our handler
function test_case_effective_query(err, response) {
if (err) {
console.error(err);
} else {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
results = response.results;
for (let query in response.results) {
assert.containsAllKeys(response.results, effective_query_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
// effective query must be different to the original keyword
assert.isOk(obj.effective_query, 'effective query must be ok');
assert.isNotEmpty(obj.effective_query, 'effective query must be valid');
assert(obj.effective_query !== query, 'effective query must be different from keyword');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.equal(obj.no_results, false, 'no results should be false');
assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
}
}
}
}
(async () => {
await normal_search_test();
await no_results_test();
await effective_query_test();
})();

144
test/test_duckduckgo.js Normal file
View File

@ -0,0 +1,144 @@
const se_scraper = require('./../index.js');
var assert = require('chai').assert;
/*
* Use chai and mocha for tests.
* https://mochajs.org/#installation
*/
const normal_search_keywords = ['apple tree', 'weather tomorrow'];
async function normal_search_test() {
let config = {
search_engine: 'duckduckgo',
compress: false,
debug: false,
verbose: false,
keywords: normal_search_keywords,
keyword_file: '',
num_pages: 2,
headless: true,
output_file: '',
block_assets: true,
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
random_user_agent: false,
};
console.log('normal_search_test()');
await se_scraper.scrape(config, normal_search_test_case);
}
// we test with a callback function to our handler
function normal_search_test_case(err, response) {
if (err) {
console.error(err);
} else {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
let total_rank = 1;
for (let query in response.results) {
assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'effective_query'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
for (let res of obj.results) {
assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link', 'rank'], 'not all keys are in the SERP object');
assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'visible_link must be ok');
assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars');
assert.isOk(res.snippet, 'snippet must be ok');
assert.typeOf(res.snippet, 'string', 'snippet must be string');
assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
assert.isNumber(res.rank, 'rank must be integer');
assert.equal(res.rank, total_rank++, 'rank ist wrong');
}
}
}
}
}
const effective_query_keywords = ['mount everrest'];
async function effective_query_test() {
let config = {
search_engine: 'duckduckgo',
compress: false,
debug: false,
verbose: false,
keywords: effective_query_keywords,
keyword_file: '',
num_pages: 1,
headless: true,
output_file: '',
block_assets: true,
user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
random_user_agent: false,
};
console.log('effective_query_test()');
await se_scraper.scrape(config, test_case_effective_query);
}
// we test with a callback function to our handler
function test_case_effective_query(err, response) {
if (err) {
console.error(err);
} else {
assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
assert.equal(response.statusCode, 200, 'status code must be 200');
results = response.results;
for (let query in response.results) {
assert.containsAllKeys(response.results, effective_query_keywords, 'not all keywords were scraped.');
for (let page_number in response.results[query]) {
assert.isNumber(parseInt(page_number), 'page_number must be numeric');
let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'effective_query'], 'not all keys are in the object');
// effective query must be different to the original keyword
assert.isOk(obj.effective_query, 'effective query must be ok');
assert.isNotEmpty(obj.effective_query, 'effective query must be valid');
assert(obj.effective_query !== query, 'effective query must be different from keyword');
assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects');
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
}
}
}
}
(async () => {
await normal_search_test();
await effective_query_test();
})();

View File

@ -6,12 +6,6 @@ var assert = require('chai').assert;
* https://mochajs.org/#installation * https://mochajs.org/#installation
*/ */
function sleep(ms) {
return new Promise(resolve => {
setTimeout(resolve, ms)
})
}
const normal_search_keywords = ['apple tree', 'weather tomorrow']; const normal_search_keywords = ['apple tree', 'weather tomorrow'];
async function normal_search_test() { async function normal_search_test() {
@ -55,7 +49,7 @@ function normal_search_test_case(err, response) {
let obj = response.results[query][page_number]; let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results'], 'not all keys are in the object'); assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
assert.isAtLeast(obj.results.length, 8, 'results must have at least 8 SERP objects'); assert.isAtLeast(obj.results.length, 8, 'results must have at least 8 SERP objects');
assert.equal(obj.no_results, false, 'no results should be false'); assert.equal(obj.no_results, false, 'no results should be false');
@ -65,12 +59,16 @@ function normal_search_test_case(err, response) {
for (let res of obj.results) { for (let res of obj.results) {
assert.containsAllKeys(res, ['link', 'title', 'rank'], 'not all keys are in the SERP object'); assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link', 'rank'], 'not all keys are in the SERP object');
assert.isOk(res.link, 'link must be ok'); assert.isOk(res.link, 'link must be ok');
assert.typeOf(res.link, 'string', 'link must be string'); assert.typeOf(res.link, 'string', 'link must be string');
assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars'); assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
assert.isOk(res.visible_link, 'visible_link must be ok');
assert.typeOf(res.visible_link, 'string', 'visible_link must be string');
assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars');
assert.isOk(res.title, 'title must be ok'); assert.isOk(res.title, 'title must be ok');
assert.typeOf(res.title, 'string', 'title must be string'); assert.typeOf(res.title, 'string', 'title must be string');
assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars'); assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
@ -87,7 +85,6 @@ function normal_search_test_case(err, response) {
} }
} }
const keywords_no_results = ['fgskl34440abJAksafkl34a44dsflkjaQQuBBdfk',]; const keywords_no_results = ['fgskl34440abJAksafkl34a44dsflkjaQQuBBdfk',];
async function no_results_test() { async function no_results_test() {
@ -127,7 +124,7 @@ function test_case_no_results(err, response) {
let obj = response.results[query][page_number]; let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results'], 'not all keys are in the object'); assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
assert(obj.results.length === 0, 'results must have 0 SERP objects'); assert(obj.results.length === 0, 'results must have 0 SERP objects');
assert.equal(obj.no_results, true, 'no results should be true'); assert.equal(obj.no_results, true, 'no results should be true');
@ -180,7 +177,7 @@ function test_case_effective_query(err, response) {
let obj = response.results[query][page_number]; let obj = response.results[query][page_number];
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results'], 'not all keys are in the object'); assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
// effective query must be different to the original keyword // effective query must be different to the original keyword
assert.isOk(obj.effective_query, 'effective query must be ok'); assert.isOk(obj.effective_query, 'effective query must be ok');
@ -197,7 +194,6 @@ function test_case_effective_query(err, response) {
} }
} }
(async () => { (async () => {
await normal_search_test(); await normal_search_test();
await no_results_test(); await no_results_test();