mirror of
https://github.com/NikolaiT/se-scraper.git
synced 2025-02-16 16:50:45 +01:00
Merge pull request #42 from TDenoncin/error-management
Clean integration tests with mocha
This commit is contained in:
commit
4c77aeba76
@ -1,3 +1,4 @@
|
|||||||
|
'use strict';
|
||||||
const cheerio = require('cheerio');
|
const cheerio = require('cheerio');
|
||||||
const Scraper = require('./se_scraper');
|
const Scraper = require('./se_scraper');
|
||||||
const common = require('./common.js');
|
const common = require('./common.js');
|
||||||
@ -175,11 +176,7 @@ class GoogleScraper extends Scraper {
|
|||||||
|
|
||||||
this.last_response = await this.page.goto(startUrl);
|
this.last_response = await this.page.goto(startUrl);
|
||||||
|
|
||||||
try {
|
await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
|
||||||
await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
|
|
||||||
} catch (e) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -252,13 +249,8 @@ class GoogleNewsOldScraper extends Scraper {
|
|||||||
|
|
||||||
async load_start_page() {
|
async load_start_page() {
|
||||||
let startUrl = this.build_start_url('https://www.google.com/search?source=lnms&tbm=nws&') || 'https://www.google.com/search?source=lnms&tbm=nws';
|
let startUrl = this.build_start_url('https://www.google.com/search?source=lnms&tbm=nws&') || 'https://www.google.com/search?source=lnms&tbm=nws';
|
||||||
|
await this.page.goto(startUrl);
|
||||||
try {
|
await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
|
||||||
await this.page.goto(startUrl);
|
|
||||||
await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
|
|
||||||
} catch (e) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
'use strict';
|
||||||
const meta = require('./metadata.js');
|
const meta = require('./metadata.js');
|
||||||
const common = require('./common.js');
|
const common = require('./common.js');
|
||||||
var log = common.log;
|
var log = common.log;
|
||||||
@ -41,12 +42,8 @@ module.exports = class Scraper {
|
|||||||
let settings = this.config[`${this.config.search_engine}_settings`];
|
let settings = this.config[`${this.config.search_engine}_settings`];
|
||||||
if (settings) {
|
if (settings) {
|
||||||
if (typeof settings === 'string') {
|
if (typeof settings === 'string') {
|
||||||
try {
|
settings = JSON.parse(settings);
|
||||||
settings = JSON.parse(settings);
|
this.config[`${this.config.search_engine}_settings`] = settings;
|
||||||
this.config[`${this.config.search_engine}_settings`] = settings;
|
|
||||||
} catch (e) {
|
|
||||||
console.error(e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -123,24 +120,16 @@ module.exports = class Scraper {
|
|||||||
if (this.proxy && this.config.log_ip_address === true) {
|
if (this.proxy && this.config.log_ip_address === true) {
|
||||||
log(this.config, 3, `${this.metadata.ipinfo.ip} vs ${this.proxy}`);
|
log(this.config, 3, `${this.metadata.ipinfo.ip} vs ${this.proxy}`);
|
||||||
|
|
||||||
try {
|
// if the ip returned by ipinfo is not a substring of our proxystring, get the heck outta here
|
||||||
// if the ip returned by ipinfo is not a substring of our proxystring, get the heck outta here
|
if (!this.proxy.includes(this.metadata.ipinfo.ip)) {
|
||||||
if (!this.proxy.includes(this.metadata.ipinfo.ip)) {
|
throw new Error(`Proxy output ip ${this.proxy} does not match with provided one`);
|
||||||
console.error(`Proxy ${this.proxy} does not work.`);
|
} else {
|
||||||
return false;
|
log(this.config, 1, `Using valid Proxy: ${this.proxy}`);
|
||||||
} else {
|
|
||||||
log(this.config, 1, `Using valid Proxy: ${this.proxy}`);
|
|
||||||
}
|
|
||||||
} catch (exception) {
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
return await this.load_start_page();
|
||||||
return await this.load_start_page();
|
|
||||||
} catch (e) {
|
|
||||||
console.error(e);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -391,7 +380,6 @@ module.exports = class Scraper {
|
|||||||
// This is where we'll put the code to get around the tests.
|
// This is where we'll put the code to get around the tests.
|
||||||
async function evadeChromeHeadlessDetection(page) {
|
async function evadeChromeHeadlessDetection(page) {
|
||||||
|
|
||||||
try {
|
|
||||||
// Pass the Webdriver Test.
|
// Pass the Webdriver Test.
|
||||||
await page.evaluateOnNewDocument(() => {
|
await page.evaluateOnNewDocument(() => {
|
||||||
const newProto = navigator.__proto__;
|
const newProto = navigator.__proto__;
|
||||||
@ -518,8 +506,4 @@ async function evadeChromeHeadlessDetection(page) {
|
|||||||
return null;
|
return null;
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
} catch (e) {
|
|
||||||
console.error(e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
'use strict';
|
||||||
const zlib = require('zlib');
|
const zlib = require('zlib');
|
||||||
var fs = require('fs');
|
var fs = require('fs');
|
||||||
var os = require("os");
|
var os = require("os");
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
|
'use strict';
|
||||||
const se_scraper = require('./../index.js');
|
const se_scraper = require('./../index.js');
|
||||||
var assert = require('chai').assert;
|
const assert = require('chai').assert;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Use chai and mocha for tests.
|
* Use chai and mocha for tests.
|
||||||
@ -113,7 +114,6 @@ async function no_results_test() {
|
|||||||
function test_case_no_results(response) {
|
function test_case_no_results(response) {
|
||||||
assert.equal(response.metadata.num_requests, 1);
|
assert.equal(response.metadata.num_requests, 1);
|
||||||
|
|
||||||
results = response.results;
|
|
||||||
for (let query in response.results) {
|
for (let query in response.results) {
|
||||||
|
|
||||||
assert.containsAllKeys(response.results, keywords_no_results, 'not all keywords were scraped.');
|
assert.containsAllKeys(response.results, keywords_no_results, 'not all keywords were scraped.');
|
||||||
@ -134,7 +134,8 @@ function test_case_no_results(response) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
(async () => {
|
describe('Amazon', function(){
|
||||||
await normal_search_test();
|
this.timeout(30000);
|
||||||
await no_results_test();
|
it('normal search test', normal_search_test);
|
||||||
})();
|
it('no results test', no_results_test);
|
||||||
|
});
|
@ -1,5 +1,6 @@
|
|||||||
|
'use strict';
|
||||||
const se_scraper = require('./../index.js');
|
const se_scraper = require('./../index.js');
|
||||||
var assert = require('chai').assert;
|
const assert = require('chai').assert;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Use chai and mocha for tests.
|
* Use chai and mocha for tests.
|
||||||
@ -80,6 +81,7 @@ function normal_search_test_case(response) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
(async () => {
|
describe('Baidu', function(){
|
||||||
await normal_search_test();
|
this.timeout(30000);
|
||||||
})();
|
it('normal search test', normal_search_test);
|
||||||
|
});
|
@ -1,3 +1,4 @@
|
|||||||
|
'use strict';
|
||||||
const se_scraper = require('./../index.js');
|
const se_scraper = require('./../index.js');
|
||||||
const chai = require('chai');
|
const chai = require('chai');
|
||||||
chai.use(require('chai-string'));
|
chai.use(require('chai-string'));
|
||||||
@ -114,7 +115,6 @@ async function no_results_test() {
|
|||||||
function test_case_no_results(response) {
|
function test_case_no_results(response) {
|
||||||
assert.equal(response.metadata.num_requests, 1);
|
assert.equal(response.metadata.num_requests, 1);
|
||||||
|
|
||||||
results = response.results;
|
|
||||||
for (let query in response.results) {
|
for (let query in response.results) {
|
||||||
|
|
||||||
assert.containsAllKeys(response.results, keywords_no_results, 'not all keywords were scraped.');
|
assert.containsAllKeys(response.results, keywords_no_results, 'not all keywords were scraped.');
|
||||||
@ -163,7 +163,6 @@ async function effective_query_test() {
|
|||||||
function test_case_effective_query(response) {
|
function test_case_effective_query(response) {
|
||||||
assert.equal(response.metadata.num_requests, 1);
|
assert.equal(response.metadata.num_requests, 1);
|
||||||
|
|
||||||
results = response.results;
|
|
||||||
for (let query in response.results) {
|
for (let query in response.results) {
|
||||||
|
|
||||||
assert.containsAllKeys(response.results, effective_query_keywords, 'not all keywords were scraped.');
|
assert.containsAllKeys(response.results, effective_query_keywords, 'not all keywords were scraped.');
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
'use strict';
|
||||||
const se_scraper = require('./../index.js');
|
const se_scraper = require('./../index.js');
|
||||||
const chai = require('chai');
|
const chai = require('chai');
|
||||||
chai.use(require('chai-string'));
|
chai.use(require('chai-string'));
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
'use strict';
|
||||||
const se_scraper = require('./../index.js');
|
const se_scraper = require('./../index.js');
|
||||||
const chai = require('chai');
|
const chai = require('chai');
|
||||||
chai.use(require('chai-string'));
|
chai.use(require('chai-string'));
|
||||||
@ -110,7 +111,6 @@ async function no_results_test() {
|
|||||||
function test_case_no_results(response) {
|
function test_case_no_results(response) {
|
||||||
assert.equal(response.metadata.num_requests, 1);
|
assert.equal(response.metadata.num_requests, 1);
|
||||||
|
|
||||||
results = response.results;
|
|
||||||
for (let query in response.results) {
|
for (let query in response.results) {
|
||||||
|
|
||||||
assert.containsAllKeys(response.results, keywords_no_results, 'not all keywords were scraped.');
|
assert.containsAllKeys(response.results, keywords_no_results, 'not all keywords were scraped.');
|
||||||
@ -123,7 +123,7 @@ function test_case_no_results(response) {
|
|||||||
|
|
||||||
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
|
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
|
||||||
|
|
||||||
assert(obj.results.length === 0, 'results must have 0 SERP objects');
|
assert.strictEqual(obj.results.length, 0, 'results must have 0 SERP objects');
|
||||||
assert.equal(obj.no_results, true, 'no results should be true');
|
assert.equal(obj.no_results, true, 'no results should be true');
|
||||||
assert.isEmpty(obj.num_results, 'no results should be a empty string');
|
assert.isEmpty(obj.num_results, 'no results should be a empty string');
|
||||||
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
|
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
|
'use strict';
|
||||||
const se_scraper = require('./../index.js');
|
const se_scraper = require('./../index.js');
|
||||||
var assert = require('chai').assert;
|
const assert = require('chai').assert;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Use chai and mocha for tests.
|
* Use chai and mocha for tests.
|
||||||
@ -73,6 +74,7 @@ function normal_image_search_test_case(response) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
(async () => {
|
describe('Google Image', function(){
|
||||||
await normal_image_search_test();
|
this.timeout(30000);
|
||||||
})();
|
it('normal image search test', normal_image_search_test);
|
||||||
|
});
|
@ -1,5 +1,6 @@
|
|||||||
|
'use strict';
|
||||||
const se_scraper = require('./../index.js');
|
const se_scraper = require('./../index.js');
|
||||||
var assert = require('chai').assert;
|
const assert = require('chai').assert;
|
||||||
|
|
||||||
const normal_search_keywords = ['apple juice'];
|
const normal_search_keywords = ['apple juice'];
|
||||||
|
|
||||||
@ -84,6 +85,7 @@ function queryargs_search_test_case(err, response) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
(async () => {
|
describe('Google with query arguments', function(){
|
||||||
await queryargs_search_test();
|
this.timeout(30000);
|
||||||
})();
|
it('query args search test', queryargs_search_test);
|
||||||
|
});
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
|
'use strict';
|
||||||
const se_scraper = require('./../index.js');
|
const se_scraper = require('./../index.js');
|
||||||
var assert = require('chai').assert;
|
const assert = require('chai').assert;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Use chai and mocha for tests.
|
* Use chai and mocha for tests.
|
||||||
@ -208,8 +209,9 @@ function marketwatch_search_test_case(err, response) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
(async () => {
|
describe('Ticker', function(){
|
||||||
await reuters_search_test();
|
this.timeout(30000);
|
||||||
await cnbc_search_test();
|
it('Reuters search test', reuters_search_test);
|
||||||
await marketwatch_search_test();
|
it('CNBC search test', cnbc_search_test);
|
||||||
})();
|
it('Marketwatch search test', marketwatch_search_test);
|
||||||
|
});
|
Loading…
Reference in New Issue
Block a user