diff --git a/src/modules/bing.js b/src/modules/bing.js index f258a31..6a5f6bd 100644 --- a/src/modules/bing.js +++ b/src/modules/bing.js @@ -18,8 +18,9 @@ class BingScraper extends Scraper { }) }); + // 'Including results for', 'Einschließlich Ergebnisse' let no_results = this.no_results( - ['There are no results', 'Es gibt keine Ergebnisse', 'Including results for', 'Einschließlich Ergebnisse'], + ['There are no results', 'Es gibt keine Ergebnisse'], $('#b_results').text() ); diff --git a/src/modules/duckduckgo.js b/src/modules/duckduckgo.js index 7ce045c..b89bbd4 100644 --- a/src/modules/duckduckgo.js +++ b/src/modules/duckduckgo.js @@ -19,6 +19,8 @@ class DuckduckgoScraper extends Scraper { }); }); + let effective_query = $('#did_you_mean a.js-spelling-suggestion-link').attr('data-query') || ''; + const cleaned = []; for (var i=0; i < results.length; i++) { let res = results[i]; @@ -30,6 +32,7 @@ class DuckduckgoScraper extends Scraper { return { time: (new Date()).toUTCString(), + effective_query: effective_query, results: cleaned } } diff --git a/test/test_bing.js b/test/test_bing.js new file mode 100644 index 0000000..c8d6111 --- /dev/null +++ b/test/test_bing.js @@ -0,0 +1,201 @@ +const se_scraper = require('./../index.js'); +var assert = require('chai').assert; + +/* + * Use chai and mocha for tests. + * https://mochajs.org/#installation + */ + +const normal_search_keywords = ['apple tree', 'weather tomorrow']; + +async function normal_search_test() { + let config = { + search_engine: 'bing', + compress: false, + debug: false, + verbose: false, + keywords: normal_search_keywords, + keyword_file: '', + num_pages: 3, + headless: true, + output_file: '', + block_assets: true, + user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36', + random_user_agent: false, + }; + + console.log('normal_search_test()'); + await se_scraper.scrape(config, normal_search_test_case); +} + +// we test with a callback function to our handler +function normal_search_test_case(err, response) { + + if (err) { + console.error(err); + } else { + assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json'); + assert.equal(response.statusCode, 200, 'status code must be 200'); + + let total_rank = 1; + + for (let query in response.results) { + + assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.'); + + for (let page_number in response.results[query]) { + + assert.isNumber(parseInt(page_number), 'page_number must be numeric'); + + let obj = response.results[query][page_number]; + + assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object'); + + assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects'); + assert.equal(obj.no_results, false, 'no results should be false'); + assert.typeOf(obj.num_results, 'string', 'num_results must be a string'); + assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars'); + assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date'); + + for (let res of obj.results) { + + assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link', 'rank'], 'not all keys are in the SERP object'); + + assert.isOk(res.link, 'link must be ok'); + assert.typeOf(res.link, 'string', 'link must be string'); + assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars'); + + assert.isOk(res.visible_link, 'visible_link must be ok'); + assert.typeOf(res.visible_link, 'string', 'visible_link must be string'); + assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars'); + + assert.isOk(res.title, 'title must be ok'); + assert.typeOf(res.title, 'string', 'title must be string'); + assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars'); + + assert.isOk(res.snippet, 'snippet must be ok'); + assert.typeOf(res.snippet, 'string', 'snippet must be string'); + assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars'); + + assert.isNumber(res.rank, 'rank must be integer'); + assert.equal(res.rank, total_rank++, 'rank ist wrong'); + } + } + } + } +} + +const keywords_no_results = ['fgskl34440abJAksafkl34a44dsflkjaQQuBBdfk',]; + +async function no_results_test() { + let config = { + search_engine: 'bing', + compress: false, + debug: false, + verbose: false, + keywords: keywords_no_results, + keyword_file: '', + num_pages: 1, + headless: true, + output_file: '', + block_assets: true, + user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36', + random_user_agent: false, + }; + console.log('no_results_test()'); + await se_scraper.scrape(config, test_case_no_results); +} + +// we test with a callback function to our handler +function test_case_no_results(err, response) { + if (err) { + console.error(err); + } else { + assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json'); + assert.equal(response.statusCode, 200, 'status code must be 200'); + results = response.results; + for (let query in response.results) { + + assert.containsAllKeys(response.results, keywords_no_results, 'not all keywords were scraped.'); + + for (let page_number in response.results[query]) { + + assert.isNumber(parseInt(page_number), 'page_number must be numeric'); + + let obj = response.results[query][page_number]; + + assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object'); + + assert(obj.results.length === 0, 'results must have 0 SERP objects'); + assert.equal(obj.no_results, true, 'no results should be true'); + assert.isEmpty(obj.num_results, 'no results should be a empty string'); + assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date'); + } + } + } +} + +const effective_query_keywords = ['mount everrest']; + +async function effective_query_test() { + let config = { + search_engine: 'bing', + compress: false, + debug: false, + verbose: false, + keywords: effective_query_keywords, + keyword_file: '', + num_pages: 1, + headless: true, + output_file: '', + block_assets: true, + user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36', + random_user_agent: false, + }; + console.log('effective_query_test()'); + await se_scraper.scrape(config, test_case_effective_query); +} + +// we test with a callback function to our handler +function test_case_effective_query(err, response) { + + if (err) { + console.error(err); + } else { + + assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json'); + assert.equal(response.statusCode, 200, 'status code must be 200'); + + results = response.results; + for (let query in response.results) { + + assert.containsAllKeys(response.results, effective_query_keywords, 'not all keywords were scraped.'); + + for (let page_number in response.results[query]) { + + assert.isNumber(parseInt(page_number), 'page_number must be numeric'); + + let obj = response.results[query][page_number]; + + assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object'); + + // effective query must be different to the original keyword + assert.isOk(obj.effective_query, 'effective query must be ok'); + assert.isNotEmpty(obj.effective_query, 'effective query must be valid'); + assert(obj.effective_query !== query, 'effective query must be different from keyword'); + + assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects'); + assert.equal(obj.no_results, false, 'no results should be false'); + assert.typeOf(obj.num_results, 'string', 'num_results must be a string'); + assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars'); + assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date'); + } + } + } +} + +(async () => { + await normal_search_test(); + await no_results_test(); + await effective_query_test(); +})(); \ No newline at end of file diff --git a/test/test_duckduckgo.js b/test/test_duckduckgo.js new file mode 100644 index 0000000..b51f95d --- /dev/null +++ b/test/test_duckduckgo.js @@ -0,0 +1,144 @@ +const se_scraper = require('./../index.js'); +var assert = require('chai').assert; + +/* + * Use chai and mocha for tests. + * https://mochajs.org/#installation + */ + +const normal_search_keywords = ['apple tree', 'weather tomorrow']; + +async function normal_search_test() { + let config = { + search_engine: 'duckduckgo', + compress: false, + debug: false, + verbose: false, + keywords: normal_search_keywords, + keyword_file: '', + num_pages: 2, + headless: true, + output_file: '', + block_assets: true, + user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36', + random_user_agent: false, + }; + + console.log('normal_search_test()'); + await se_scraper.scrape(config, normal_search_test_case); +} + +// we test with a callback function to our handler +function normal_search_test_case(err, response) { + + if (err) { + console.error(err); + } else { + assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json'); + assert.equal(response.statusCode, 200, 'status code must be 200'); + + let total_rank = 1; + + for (let query in response.results) { + + assert.containsAllKeys(response.results, normal_search_keywords, 'not all keywords were scraped.'); + + for (let page_number in response.results[query]) { + + assert.isNumber(parseInt(page_number), 'page_number must be numeric'); + + let obj = response.results[query][page_number]; + + assert.containsAllKeys(obj, ['results', 'time', 'effective_query'], 'not all keys are in the object'); + + assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects'); + assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date'); + + for (let res of obj.results) { + + assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link', 'rank'], 'not all keys are in the SERP object'); + + assert.isOk(res.link, 'link must be ok'); + assert.typeOf(res.link, 'string', 'link must be string'); + assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars'); + + assert.isOk(res.visible_link, 'visible_link must be ok'); + assert.typeOf(res.visible_link, 'string', 'visible_link must be string'); + assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars'); + + assert.isOk(res.title, 'title must be ok'); + assert.typeOf(res.title, 'string', 'title must be string'); + assert.isAtLeast(res.title.length, 5, 'title must have at least 5 chars'); + + assert.isOk(res.snippet, 'snippet must be ok'); + assert.typeOf(res.snippet, 'string', 'snippet must be string'); + assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars'); + + assert.isNumber(res.rank, 'rank must be integer'); + assert.equal(res.rank, total_rank++, 'rank ist wrong'); + } + } + } + } +} + +const effective_query_keywords = ['mount everrest']; + +async function effective_query_test() { + let config = { + search_engine: 'duckduckgo', + compress: false, + debug: false, + verbose: false, + keywords: effective_query_keywords, + keyword_file: '', + num_pages: 1, + headless: true, + output_file: '', + block_assets: true, + user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36', + random_user_agent: false, + }; + console.log('effective_query_test()'); + await se_scraper.scrape(config, test_case_effective_query); +} + +// we test with a callback function to our handler +function test_case_effective_query(err, response) { + + if (err) { + console.error(err); + } else { + + assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json'); + assert.equal(response.statusCode, 200, 'status code must be 200'); + + results = response.results; + for (let query in response.results) { + + assert.containsAllKeys(response.results, effective_query_keywords, 'not all keywords were scraped.'); + + for (let page_number in response.results[query]) { + + assert.isNumber(parseInt(page_number), 'page_number must be numeric'); + + let obj = response.results[query][page_number]; + + assert.containsAllKeys(obj, ['results', 'time', 'effective_query'], 'not all keys are in the object'); + + // effective query must be different to the original keyword + assert.isOk(obj.effective_query, 'effective query must be ok'); + assert.isNotEmpty(obj.effective_query, 'effective query must be valid'); + assert(obj.effective_query !== query, 'effective query must be different from keyword'); + + assert.isAtLeast(obj.results.length, 7, 'results must have at least 7 SERP objects'); + assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date'); + } + } + } +} + +(async () => { + await normal_search_test(); + await effective_query_test(); +})(); \ No newline at end of file diff --git a/test/test_google.js b/test/test_google.js index 9b78736..7484270 100644 --- a/test/test_google.js +++ b/test/test_google.js @@ -6,12 +6,6 @@ var assert = require('chai').assert; * https://mochajs.org/#installation */ -function sleep(ms) { - return new Promise(resolve => { - setTimeout(resolve, ms) - }) -} - const normal_search_keywords = ['apple tree', 'weather tomorrow']; async function normal_search_test() { @@ -55,7 +49,7 @@ function normal_search_test_case(err, response) { let obj = response.results[query][page_number]; - assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results'], 'not all keys are in the object'); + assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object'); assert.isAtLeast(obj.results.length, 8, 'results must have at least 8 SERP objects'); assert.equal(obj.no_results, false, 'no results should be false'); @@ -65,12 +59,16 @@ function normal_search_test_case(err, response) { for (let res of obj.results) { - assert.containsAllKeys(res, ['link', 'title', 'rank'], 'not all keys are in the SERP object'); + assert.containsAllKeys(res, ['link', 'title', 'rank', 'visible_link', 'rank'], 'not all keys are in the SERP object'); assert.isOk(res.link, 'link must be ok'); assert.typeOf(res.link, 'string', 'link must be string'); assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars'); + assert.isOk(res.visible_link, 'visible_link must be ok'); + assert.typeOf(res.visible_link, 'string', 'visible_link must be string'); + assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars'); + assert.isOk(res.title, 'title must be ok'); assert.typeOf(res.title, 'string', 'title must be string'); assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars'); @@ -87,7 +85,6 @@ function normal_search_test_case(err, response) { } } - const keywords_no_results = ['fgskl34440abJAksafkl34a44dsflkjaQQuBBdfk',]; async function no_results_test() { @@ -127,7 +124,7 @@ function test_case_no_results(err, response) { let obj = response.results[query][page_number]; - assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results'], 'not all keys are in the object'); + assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object'); assert(obj.results.length === 0, 'results must have 0 SERP objects'); assert.equal(obj.no_results, true, 'no results should be true'); @@ -180,7 +177,7 @@ function test_case_effective_query(err, response) { let obj = response.results[query][page_number]; - assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results'], 'not all keys are in the object'); + assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object'); // effective query must be different to the original keyword assert.isOk(obj.effective_query, 'effective query must be ok'); @@ -197,7 +194,6 @@ function test_case_effective_query(err, response) { } } - (async () => { await normal_search_test(); await no_results_test();