forked from extern/se-scraper
test: add config proxy options tests
This commit is contained in:
parent
8f6317cea7
commit
1c1db88545
116
test/proxy.js
Normal file
116
test/proxy.js
Normal file
@ -0,0 +1,116 @@
|
||||
'use strict';
|
||||
const express = require('express');
|
||||
const puppeteer = require('puppeteer');
|
||||
// TODO add a test logger in place of default winston logger
|
||||
const logger = require('winston');
|
||||
const net = require('net');
|
||||
const http = require('http');
|
||||
const https = require('https');
|
||||
const url = require('url');
|
||||
const assert = require('assert');
|
||||
const path = require('path');
|
||||
const keyCert = require('key-cert');
|
||||
const Promise = require('bluebird');
|
||||
|
||||
const debug = require('debug')('se-scraper:test');
|
||||
const se_scraper = require('../');
|
||||
const Scraper = require('../src/modules/se_scraper');
|
||||
|
||||
const httpPort = 3012;
|
||||
const httpsPort = httpPort + 1;
|
||||
const httpOtherPort = httpPort + 2;
|
||||
|
||||
const fakeSearchEngine = express();
|
||||
fakeSearchEngine.get('/test', (req, res) => {
|
||||
debug(req.ip, req.ips, req.protocol, req.hostname);
|
||||
debug(req.socket.localAddress, req.socket.localPort);
|
||||
res.send('OK');
|
||||
});
|
||||
|
||||
describe('Config', function(){
|
||||
|
||||
let httpServerAndProxy, httpsServer, httpOtherServer;
|
||||
before(async function(){
|
||||
// Here mount our fake engine in both http and https listen server
|
||||
httpServerAndProxy = http.createServer(fakeSearchEngine);
|
||||
httpsServer = https.createServer(await keyCert(), fakeSearchEngine);
|
||||
|
||||
/**
|
||||
* express doesn't handle HTTP CONNECT method, this implement a basic MITM http proxy
|
||||
* here we use our http server to also act as a http proxy and rewrite all http/s request to our fake engine
|
||||
*/
|
||||
httpServerAndProxy.on('connect', (req, clientSocket, head) => {
|
||||
const parsedUrl = url.parse('http://' + req.url);
|
||||
const destPort = (parseInt(parsedUrl.port) === 443) ? httpsPort : httpPort;
|
||||
const serverSocket = net.connect(destPort, 'localhost', () => {
|
||||
debug('connection proxied askedHost=%s toPort=%s', parsedUrl.host, destPort);
|
||||
clientSocket.write('HTTP/1.1 200 Connection Established\r\n' +
|
||||
'Proxy-agent: Node.js-Proxy\r\n' +
|
||||
'\r\n');
|
||||
serverSocket.write(head);
|
||||
serverSocket.pipe(clientSocket);
|
||||
clientSocket.pipe(serverSocket);
|
||||
serverSocket.on('error', (err)=>{
|
||||
console.error(err);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
await Promise.promisify(httpServerAndProxy.listen, {context: httpServerAndProxy})(httpPort);
|
||||
await Promise.promisify(httpsServer.listen, {context: httpsServer})(httpsPort);
|
||||
debug('Fake http search engine servers started');
|
||||
});
|
||||
|
||||
after(function(){
|
||||
httpsServer.close();
|
||||
httpServerAndProxy.close();
|
||||
});
|
||||
|
||||
describe('proxies', function(){
|
||||
|
||||
class MockScraper extends Scraper {
|
||||
|
||||
async load_start_page(){
|
||||
return true;
|
||||
}
|
||||
|
||||
async search_keyword(){
|
||||
await this.page.goto('http://void:' + httpPort + '/test');
|
||||
}
|
||||
|
||||
async parse_async(){
|
||||
const bodyHandle = await this.page.$('body');
|
||||
return await this.page.evaluate(body => body.innerHTML, bodyHandle);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Jobs will be executed 1 by 1 through the proxy
|
||||
*/
|
||||
it('one proxy given', async function () {
|
||||
|
||||
const scrape_job = {
|
||||
search_engine: MockScraper,
|
||||
keywords: ['news', 'some stuff', 'i work too much', 'what to do?', 'javascript is hard'],
|
||||
};
|
||||
|
||||
var scraper = new se_scraper.ScrapeManager({
|
||||
throw_on_detection: true,
|
||||
proxies: ['http://localhost:' + httpPort],
|
||||
use_proxies_only: true,
|
||||
});
|
||||
await scraper.start();
|
||||
|
||||
const { results } = await scraper.scrape(scrape_job);
|
||||
assert.strictEqual(results['news']['1'], 'OK');
|
||||
assert.strictEqual(results['some stuff']['1'], 'OK');
|
||||
assert.strictEqual(results['i work too much']['1'], 'OK');
|
||||
assert.strictEqual(results['what to do?']['1'], 'OK');
|
||||
assert.strictEqual(results['javascript is hard']['1'], 'OK');
|
||||
|
||||
await scraper.quit();
|
||||
});
|
||||
|
||||
});
|
||||
|
||||
});
|
Loading…
Reference in New Issue
Block a user