mirror of
https://github.com/NikolaiT/se-scraper.git
synced 2024-11-21 23:23:07 +01:00
Merge pull request #42 from TDenoncin/error-management
Clean integration tests with mocha
This commit is contained in:
commit
4c77aeba76
@ -1,3 +1,4 @@
|
||||
'use strict';
|
||||
const cheerio = require('cheerio');
|
||||
const Scraper = require('./se_scraper');
|
||||
const common = require('./common.js');
|
||||
@ -175,11 +176,7 @@ class GoogleScraper extends Scraper {
|
||||
|
||||
this.last_response = await this.page.goto(startUrl);
|
||||
|
||||
try {
|
||||
await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
|
||||
} catch (e) {
|
||||
return false;
|
||||
}
|
||||
await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -252,13 +249,8 @@ class GoogleNewsOldScraper extends Scraper {
|
||||
|
||||
async load_start_page() {
|
||||
let startUrl = this.build_start_url('https://www.google.com/search?source=lnms&tbm=nws&') || 'https://www.google.com/search?source=lnms&tbm=nws';
|
||||
|
||||
try {
|
||||
await this.page.goto(startUrl);
|
||||
await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
|
||||
} catch (e) {
|
||||
return false;
|
||||
}
|
||||
await this.page.goto(startUrl);
|
||||
await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
'use strict';
|
||||
const meta = require('./metadata.js');
|
||||
const common = require('./common.js');
|
||||
var log = common.log;
|
||||
@ -41,12 +42,8 @@ module.exports = class Scraper {
|
||||
let settings = this.config[`${this.config.search_engine}_settings`];
|
||||
if (settings) {
|
||||
if (typeof settings === 'string') {
|
||||
try {
|
||||
settings = JSON.parse(settings);
|
||||
this.config[`${this.config.search_engine}_settings`] = settings;
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
}
|
||||
settings = JSON.parse(settings);
|
||||
this.config[`${this.config.search_engine}_settings`] = settings;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -123,24 +120,16 @@ module.exports = class Scraper {
|
||||
if (this.proxy && this.config.log_ip_address === true) {
|
||||
log(this.config, 3, `${this.metadata.ipinfo.ip} vs ${this.proxy}`);
|
||||
|
||||
try {
|
||||
// if the ip returned by ipinfo is not a substring of our proxystring, get the heck outta here
|
||||
if (!this.proxy.includes(this.metadata.ipinfo.ip)) {
|
||||
console.error(`Proxy ${this.proxy} does not work.`);
|
||||
return false;
|
||||
} else {
|
||||
log(this.config, 1, `Using valid Proxy: ${this.proxy}`);
|
||||
}
|
||||
} catch (exception) {
|
||||
// if the ip returned by ipinfo is not a substring of our proxystring, get the heck outta here
|
||||
if (!this.proxy.includes(this.metadata.ipinfo.ip)) {
|
||||
throw new Error(`Proxy output ip ${this.proxy} does not match with provided one`);
|
||||
} else {
|
||||
log(this.config, 1, `Using valid Proxy: ${this.proxy}`);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
try {
|
||||
return await this.load_start_page();
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
return false;
|
||||
}
|
||||
return await this.load_start_page();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -391,7 +380,6 @@ module.exports = class Scraper {
|
||||
// This is where we'll put the code to get around the tests.
|
||||
async function evadeChromeHeadlessDetection(page) {
|
||||
|
||||
try {
|
||||
// Pass the Webdriver Test.
|
||||
await page.evaluateOnNewDocument(() => {
|
||||
const newProto = navigator.__proto__;
|
||||
@ -518,8 +506,4 @@ async function evadeChromeHeadlessDetection(page) {
|
||||
return null;
|
||||
};
|
||||
});
|
||||
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
}
|
||||
}
|
||||
|
@ -1,3 +1,4 @@
|
||||
'use strict';
|
||||
const zlib = require('zlib');
|
||||
var fs = require('fs');
|
||||
var os = require("os");
|
||||
|
@ -1,5 +1,6 @@
|
||||
'use strict';
|
||||
const se_scraper = require('./../index.js');
|
||||
var assert = require('chai').assert;
|
||||
const assert = require('chai').assert;
|
||||
|
||||
/*
|
||||
* Use chai and mocha for tests.
|
||||
@ -113,7 +114,6 @@ async function no_results_test() {
|
||||
function test_case_no_results(response) {
|
||||
assert.equal(response.metadata.num_requests, 1);
|
||||
|
||||
results = response.results;
|
||||
for (let query in response.results) {
|
||||
|
||||
assert.containsAllKeys(response.results, keywords_no_results, 'not all keywords were scraped.');
|
||||
@ -134,7 +134,8 @@ function test_case_no_results(response) {
|
||||
}
|
||||
}
|
||||
|
||||
(async () => {
|
||||
await normal_search_test();
|
||||
await no_results_test();
|
||||
})();
|
||||
describe('Amazon', function(){
|
||||
this.timeout(30000);
|
||||
it('normal search test', normal_search_test);
|
||||
it('no results test', no_results_test);
|
||||
});
|
@ -1,5 +1,6 @@
|
||||
'use strict';
|
||||
const se_scraper = require('./../index.js');
|
||||
var assert = require('chai').assert;
|
||||
const assert = require('chai').assert;
|
||||
|
||||
/*
|
||||
* Use chai and mocha for tests.
|
||||
@ -80,6 +81,7 @@ function normal_search_test_case(response) {
|
||||
}
|
||||
}
|
||||
|
||||
(async () => {
|
||||
await normal_search_test();
|
||||
})();
|
||||
describe('Baidu', function(){
|
||||
this.timeout(30000);
|
||||
it('normal search test', normal_search_test);
|
||||
});
|
@ -1,3 +1,4 @@
|
||||
'use strict';
|
||||
const se_scraper = require('./../index.js');
|
||||
const chai = require('chai');
|
||||
chai.use(require('chai-string'));
|
||||
@ -114,7 +115,6 @@ async function no_results_test() {
|
||||
function test_case_no_results(response) {
|
||||
assert.equal(response.metadata.num_requests, 1);
|
||||
|
||||
results = response.results;
|
||||
for (let query in response.results) {
|
||||
|
||||
assert.containsAllKeys(response.results, keywords_no_results, 'not all keywords were scraped.');
|
||||
@ -163,7 +163,6 @@ async function effective_query_test() {
|
||||
function test_case_effective_query(response) {
|
||||
assert.equal(response.metadata.num_requests, 1);
|
||||
|
||||
results = response.results;
|
||||
for (let query in response.results) {
|
||||
|
||||
assert.containsAllKeys(response.results, effective_query_keywords, 'not all keywords were scraped.');
|
||||
|
@ -1,3 +1,4 @@
|
||||
'use strict';
|
||||
const se_scraper = require('./../index.js');
|
||||
const chai = require('chai');
|
||||
chai.use(require('chai-string'));
|
||||
|
@ -1,3 +1,4 @@
|
||||
'use strict';
|
||||
const se_scraper = require('./../index.js');
|
||||
const chai = require('chai');
|
||||
chai.use(require('chai-string'));
|
||||
@ -110,7 +111,6 @@ async function no_results_test() {
|
||||
function test_case_no_results(response) {
|
||||
assert.equal(response.metadata.num_requests, 1);
|
||||
|
||||
results = response.results;
|
||||
for (let query in response.results) {
|
||||
|
||||
assert.containsAllKeys(response.results, keywords_no_results, 'not all keywords were scraped.');
|
||||
@ -123,7 +123,7 @@ function test_case_no_results(response) {
|
||||
|
||||
assert.containsAllKeys(obj, ['results', 'time', 'no_results', 'num_results', 'effective_query'], 'not all keys are in the object');
|
||||
|
||||
assert(obj.results.length === 0, 'results must have 0 SERP objects');
|
||||
assert.strictEqual(obj.results.length, 0, 'results must have 0 SERP objects');
|
||||
assert.equal(obj.no_results, true, 'no results should be true');
|
||||
assert.isEmpty(obj.num_results, 'no results should be a empty string');
|
||||
assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
|
||||
|
@ -1,5 +1,6 @@
|
||||
'use strict';
|
||||
const se_scraper = require('./../index.js');
|
||||
var assert = require('chai').assert;
|
||||
const assert = require('chai').assert;
|
||||
|
||||
/*
|
||||
* Use chai and mocha for tests.
|
||||
@ -73,6 +74,7 @@ function normal_image_search_test_case(response) {
|
||||
}
|
||||
}
|
||||
|
||||
(async () => {
|
||||
await normal_image_search_test();
|
||||
})();
|
||||
describe('Google Image', function(){
|
||||
this.timeout(30000);
|
||||
it('normal image search test', normal_image_search_test);
|
||||
});
|
@ -1,5 +1,6 @@
|
||||
'use strict';
|
||||
const se_scraper = require('./../index.js');
|
||||
var assert = require('chai').assert;
|
||||
const assert = require('chai').assert;
|
||||
|
||||
const normal_search_keywords = ['apple juice'];
|
||||
|
||||
@ -84,6 +85,7 @@ function queryargs_search_test_case(err, response) {
|
||||
}
|
||||
}
|
||||
|
||||
(async () => {
|
||||
await queryargs_search_test();
|
||||
})();
|
||||
describe('Google with query arguments', function(){
|
||||
this.timeout(30000);
|
||||
it('query args search test', queryargs_search_test);
|
||||
});
|
||||
|
@ -1,5 +1,6 @@
|
||||
'use strict';
|
||||
const se_scraper = require('./../index.js');
|
||||
var assert = require('chai').assert;
|
||||
const assert = require('chai').assert;
|
||||
|
||||
/*
|
||||
* Use chai and mocha for tests.
|
||||
@ -208,8 +209,9 @@ function marketwatch_search_test_case(err, response) {
|
||||
}
|
||||
|
||||
|
||||
(async () => {
|
||||
await reuters_search_test();
|
||||
await cnbc_search_test();
|
||||
await marketwatch_search_test();
|
||||
})();
|
||||
describe('Ticker', function(){
|
||||
this.timeout(30000);
|
||||
it('Reuters search test', reuters_search_test);
|
||||
it('CNBC search test', cnbc_search_test);
|
||||
it('Marketwatch search test', marketwatch_search_test);
|
||||
});
|
Loading…
Reference in New Issue
Block a user