test(duckduckgo): implement tests for duckduckgo module

This commit is contained in:
HugoPoi 2020-01-15 16:33:30 +01:00
parent b685fb4def
commit 28332528ea
5 changed files with 297 additions and 0 deletions

View File

@ -0,0 +1,148 @@
<!DOCTYPE html>
<!--[if IEMobile 7 ]> <html lang="en_US" class="no-js iem7"> <![endif]-->
<!--[if lt IE 7]> <html class="ie6 lt-ie10 lt-ie9 lt-ie8 lt-ie7 no-js" lang="en_US"> <![endif]-->
<!--[if IE 7]> <html class="ie7 lt-ie10 lt-ie9 lt-ie8 no-js" lang="en_US"> <![endif]-->
<!--[if IE 8]> <html class="ie8 lt-ie10 lt-ie9 no-js" lang="en_US"> <![endif]-->
<!--[if IE 9]> <html class="ie9 lt-ie10 no-js" lang="en_US"> <![endif]-->
<!--[if (gte IE 9)|(gt IEMobile 7)|!(IEMobile)|!(IE)]><!--><html class="no-js" lang="en_US"><!--<![endif]-->
<head>
<meta http-equiv="X-UA-Compatible" content="IE=Edge" />
<meta http-equiv="content-type" content="text/html; charset=UTF-8;charset=utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=1" />
<meta name="HandheldFriendly" content="true"/>
<link rel="canonical" href="https://duckduckgo.com/">
<link rel="stylesheet" href="/s1847.css" type="text/css">
<link rel="stylesheet" href="/o1847.css" type="text/css">
<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon"/>
<link rel="apple-touch-icon" href="/assets/icons/meta/DDG-iOS-icon_60x60.png"/>
<link rel="apple-touch-icon" sizes="76x76" href="/assets/icons/meta/DDG-iOS-icon_76x76.png"/>
<link rel="apple-touch-icon" sizes="120x120" href="/assets/icons/meta/DDG-iOS-icon_120x120.png"/>
<link rel="apple-touch-icon" sizes="152x152" href="/assets/icons/meta/DDG-iOS-icon_152x152.png"/>
<link rel="image_src" href="/assets/icons/meta/DDG-icon_256x256.png"/>
<link rel="manifest" href="/manifest.json"/>
<meta name="twitter:card" content="summary">
<meta name="twitter:site" value="@duckduckgo">
<meta property="og:url" content="https://duckduckgo.com/" />
<meta property="og:site_name" content="DuckDuckGo" />
<meta property="og:image" content="https://duckduckgo.com/assets/logo_social-media.png">
<title>DuckDuckGo — Privacy, simplified.</title>
<meta property="og:title" content="DuckDuckGo — Privacy, simplified." />
<meta property="og:description" content="The Internet privacy company that empowers you to seamlessly take control of your personal information online, without any tradeoffs.">
<meta name="description" content="The Internet privacy company that empowers you to seamlessly take control of your personal information online, without any tradeoffs.">
</head>
<body id="pg-index" class="page-index body--home">
<script type="text/javascript">
var settings_js_version = "/s2475.js",
locale = "en_US";
</script>
<script type="text/javascript" src="/lib/l113.js"></script>
<script type="text/javascript" src="/locale/en_US/duckduckgo14.js"></script>
<script type="text/javascript" src="/util/u418.js"></script>
<script type="text/javascript" src="/d2727.js"></script>
<script type="text/javascript">
DDG.page = new DDG.Pages.Home();
</script>
<div class="site-wrapper site-wrapper--home js-site-wrapper">
<div class="header-wrap--home js-header-wrap">
<div class="header--aside js-header-aside"></div>
<div class="js-header-home-search header-wrap--home__search">
<div class="logo-wrap--home">
<a id="logo_homepage_link" class="logo_homepage" href="/about">
About DuckDuckGo
<span class="logo_homepage__tt">Duck it!</span>
</a>
</div>
<form id="search_form_homepage_top" class="search search--home js-search-form-top" name="x" method="POST" action="/html">
<input class="search__input js-search-input" type="text" autocomplete="off" name="q" tabindex="1" value="">
<input class="search__button js-search-button" type="submit" tabindex="2" value="S" />
<input class="search__clear empty js-search-clear" type="button" tabindex="3" value="X" />
<div class="search__hidden js-search-hidden"></div>
</form>
</div>
</div>
<div id="" class="content-wrap--home">
<div id="content_homepage" class="content--home">
<div class="cw--c">
<div class="logo-wrap--home">
<a id="logo_homepage_link" class="logo_homepage" href="/about">
About DuckDuckGo
<span class="logo_homepage__tt">Duck it!</span>
</a>
</div>
<div class="search-wrap--home">
<form id="search_form_homepage" class="search search--home js-search-form" name="x" method="POST" action="/html">
<input id="search_form_input_homepage" class="search__input js-search-input" type="text" autocomplete="off" name="q" tabindex="1" value="">
<input id="search_button_homepage" class="search__button js-search-button" type="submit" tabindex="2" value="S" />
<input id="search_form_input_clear" class="search__clear empty js-search-clear" type="button" tabindex="3" value="X" />
<div id="search_elements_hidden" class="search__hidden js-search-hidden"></div>
</form>
</div>
<!-- en_US All Settings -->
<noscript>
<div class="tag-home">
<div class="tag-home__wrapper">
<div class="tag-home__item">
The search engine that doesn't track you.
<span class="hide--screen-xs"><a href="/about" class="tag-home__link">Learn More</a>.</span>
</div>
</div>
</div>
</noscript>
<div class="tag-home tag-home--slide no-js__hide js-tag-home"></div>
<div id="error_homepage"></div>
</div> <!-- cw -->
</div> <!-- content_homepage //-->
</div> <!-- content_wrapper_homepage //-->
<div id="footer_homepage" class="foot-home js-foot-home"></div>
<script type="text/javascript">
{function seterr(str) {
var error=document.getElementById('error_homepage');
error.innerHTML=str;
$(error).css('display','block');
}
var err=new RegExp('[\?\&]e=([^\&]+)');var errm=new Array();errm['2']='no search';errm['3']='search too long';errm['4']='not UTF\u002d8 encoding';errm['6']='too many search terms';if (err.test(window.location.href)) seterr('Oops, '+(errm[RegExp.$1]?errm[RegExp.$1]:'there was an error.')+' &nbsp;Please try again');};
if (kurl) {
document.getElementById("logo_homepage_link").href += (document.getElementById("logo_homepage_link").href.indexOf('?')==-1 ? '?t=i' : '') + kurl;
}
</script>
</div> <!-- site-wrapper -->
</body>
</html>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

140
test/modules/duckduckgo.js Normal file
View File

@ -0,0 +1,140 @@
'use strict';
const express = require('express');
const puppeteer = require('puppeteer');
const { createLogger, transports } = require('winston');
const http = require('http');
const https = require('https');
const assert = require('assert');
const path = require('path');
const keyCert = require('key-cert');
const Promise = require('bluebird');
const Proxy = require('http-mitm-proxy');
const debug = require('debug')('se-scraper:test');
const { DuckduckgoScraper } = require('../../src/modules/duckduckgo');
const httpPort = 3012;
const httpsPort = httpPort + 1;
const proxyPort = httpPort + 2;
const fakeSearchEngine = express();
fakeSearchEngine.use(express.urlencoded({ extended: true }))
fakeSearchEngine.get('/', (req, res, next) => {
if(!req.query.q){
return next();
}
debug('q=%s page=%d', req.query.q, req.query.page);
const pageNumber = req.query.page;
res.sendFile(path.join(__dirname, '../mocks/duckduckgo/' + req.query.q + '_page' + pageNumber + '.html'));
});
fakeSearchEngine.post('/html', (req, res) => {
debug('body=%o', req.body);
const pageNumber = 1;
res.sendFile(path.join(__dirname, '../mocks/duckduckgo/' + req.body.q + '_page' + pageNumber + '.html'));
});
fakeSearchEngine.use(express.static('test/mocks/duckduckgo', {extensions: ['html']}));
describe('Module DuckDuckGo', function(){
let httpServer, httpsServer, proxy;
before(async function(){
// Here mount our fake engine in both http and https listen server
httpServer = http.createServer(fakeSearchEngine);
httpsServer = https.createServer(await keyCert(), fakeSearchEngine);
proxy = Proxy();
proxy.onRequest((ctx, callback) => {
ctx.proxyToServerRequestOptions.host = 'localhost';
ctx.proxyToServerRequestOptions.port = (ctx.isSSL) ? httpsPort : httpPort;
ctx.proxyToServerRequestOptions.headers['X-Forwarded-Host'] = 'ProxiedThroughFakeEngine';
debug('proxy askedHost=%s method=%s url=%s toPort=%s',
ctx.clientToProxyRequest.headers.host,
ctx.clientToProxyRequest.method,
ctx.clientToProxyRequest.url,
ctx.proxyToServerRequestOptions.port
);
return callback();
});
await Promise.promisify(proxy.listen, { context: proxy })({ port: proxyPort });
await Promise.promisify(httpServer.listen, {context: httpServer})(httpPort);
await Promise.promisify(httpsServer.listen, {context: httpsServer})(httpsPort);
debug('Fake http search engine servers started');
});
after(function(){
proxy.close();
httpsServer.close();
httpServer.close();
});
let browser;
let page;
beforeEach(async function(){
debug('Start a new browser');
browser = await puppeteer.launch({
//dumpio: true,
//headless: false,
ignoreHTTPSErrors: true,
args: [ '--proxy-server=http://localhost:' + proxyPort ]
});
debug('Open a fresh page');
page = await browser.newPage();
});
afterEach(async function(){
await browser.close();
});
const testLogger = createLogger({
transports: [
new transports.Console({
level: 'error'
})
]
});
it('one keyword one page', function(){
const duckduckgoScraper = new DuckduckgoScraper({
config: {
search_engine_name: 'duckduckgo',
throw_on_detection: true,
keywords: ['test keyword'],
logger: testLogger,
scrape_from_file: '',
}
});
duckduckgoScraper.STANDARD_TIMEOUT = 1000;
return duckduckgoScraper.run({page}).then(({results, metadata, num_requests}) => {
assert.strictEqual(num_requests, 1, 'Must do one request');
assert.strictEqual(results['test keyword']['1'].results.length, 10, 'Must have 10 organic results parsed');
});
});
it('one keyword 3 pages', function () {
this.timeout(4000);
const duckduckgoScraper = new DuckduckgoScraper({
config: {
search_engine_name: 'google',
throw_on_detection: true,
keywords: ['test keyword'],
logger: testLogger,
scrape_from_file: '',
num_pages: 3,
}
});
duckduckgoScraper.STANDARD_TIMEOUT = 1000;
return duckduckgoScraper.run({page}).then(({results, metadata, num_requests}) => {
assert.strictEqual(num_requests, 3, 'Must three requests');
assert.strictEqual(results['test keyword']['1'].results.length, 10, 'Must have 10 organic results parsed on page 1');
assert.strictEqual(results['test keyword']['1'].results[0].title, 'Keyword Tests | TestComplete Documentation', 'Title not matching on first organic result page 1');
debug('results page 1 %O',results['test keyword']['1'].results);
debug('results page 2 %O', results['test keyword']['2'].results);
assert.strictEqual(results['test keyword']['2'].results.length, 19, 'Must have 19 organic results parsed on page 2');
assert.strictEqual(results['test keyword']['2'].results[0].title, 'Quest Diagnostics: Test Directory', 'Title not matching on first organic result page 1');
assert.strictEqual(results['test keyword']['3'].results.length, 48, 'Must have 48 organic results parsed on page 3');
assert.strictEqual(results['test keyword']['3'].results[0].title, 'Java Keywords Quiz - Sporcle', 'Title not matching on first organic result page 1');
});
});
});