tested and works

2019-01-30 23:53:09 +01:00 · 2019-01-30 23:53:09 +01:00 · 987e3d7342
commit 987e3d7342
parent 581568ff18
16 changed files with 608 additions and 599 deletions
--- a/README.md
+++ b/README.md
@ -26,6 +26,36 @@ Additionally **se-scraper** supports investment ticker search from the following

 This module uses puppeteer. It was created by the Developer of https://github.com/NikolaiT/GoogleScraper, a module with 1800 Stars on Github.

+### Quickstart
+
+Install with
+
+```bash
+npm install se-scraper
+```
+
+then create a file with the following contents and start scraping.
+
+```js
+const se_scraper = require('se-scraper');
+
+let config = {
+    search_engine: 'google',
+    debug: false,
+    verbose: false,
+    keywords: ['news', 'scraping scrapeulous.com'],
+    num_pages: 3,
+    output_file: 'data.json',
+};
+
+function callback(err, response) {
+    if (err) { console.error(err) }
+    console.dir(response, {depth: null, colors: true});
+}
+
+se_scraper.scrape(config, callback);
+```
+
 ### Technical Notes

 Scraping is done with a headless chromium browser using the automation library puppeteer. Puppeteer is a Node library which provides a high-level API to control headless Chrome or Chromium over the DevTools Protocol.
@ -75,13 +105,7 @@ Consider the following resources:

 * https://intoli.com/blog/making-chrome-headless-undetectable/

-### Installation and Usage
-
-Install with
-
-```bash
-npm install se-scraper
-```
+### Advanced Usage

 Use se-scraper by calling it with a script such as the one below.

@ -162,9 +186,7 @@ Supported options for the `search_engine` config key:
 'baidu'
 'youtube'
 'duckduckgo_news'
-'google_dr'
 'yahoo_news'
-// ticker search
 'bloomberg'
 'reuters'
 'cnbc'
--- a/data.json
+++ b/data.json
--- a/examples/quickstart.js
+++ b/examples/quickstart.js
@ -0,0 +1,17 @@
+const se_scraper = require('./../index.js');
+
+let config = {
+    search_engine: 'duckduckgo',
+    debug: false,
+    verbose: false,
+    keywords: ['news'],
+    num_pages: 2,
+    output_file: 'data.json',
+};
+
+function callback(err, response) {
+    if (err) { console.error(err) }
+    console.dir(response, {depth: null, colors: true});
+}
+
+se_scraper.scrape(config, callback);
--- a/index.js
+++ b/index.js
@ -8,11 +8,11 @@ exports.scrape = async function(config, callback) {
 		// the user agent to scrape with
 		user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
 		// if random_user_agent is set to True, a random user agent is chosen
-		random_user_agent: false,
+		random_user_agent: true,
 		// whether to select manual settings in visible mode
 		set_manual_settings: false,
 		// get meta data of scraping in return object
-		write_meta_data: true,
+		write_meta_data: false,
 		log_http_headers: false,
 		// how long to sleep between requests. a random sleep interval within the range [a,b]
 		// is drawn before every request. empty string for no sleeping.
--- a/run.js
+++ b/run.js
@ -12,23 +12,23 @@ let config = {
    // is drawn before every request. empty string for no sleeping.
    sleep_range: '[1,2]',
    // which search engine to scrape
-    search_engine: 'google_news',
+    search_engine: 'google',
    // whether debug information should be printed
    // debug info is useful for developers when debugging
-    debug: true,
+    debug: false,
    // whether verbose program output should be printed
    // this output is informational
    verbose: true,
    // an array of keywords to scrape
-    keywords: ['hacking', 'trump'],
+    keywords: ['news'],
    // alternatively you can specify a keyword_file. this overwrites the keywords array
    keyword_file: '',
    // the number of pages to scrape for each keyword
-    num_pages: 1,
+    num_pages: 2,
    // whether to start the browser in headless mode
-    headless: false,
+    headless: true,
    // path to output file, data will be stored in JSON
-    output_file: 'data.json',
+    output_file: '',
    // whether to prevent images, css, fonts from being loaded
    // will speed up scraping a great deal
    block_assets: true,
@ -41,7 +41,7 @@ let config = {
    // use a proxy for all connections
    // example: 'socks5://78.94.172.42:1080'
    // example: 'http://118.174.233.10:48400'
-    //proxy: 'socks5://78.94.172.42:1080',
+    proxy: '',
 };

 function callback(err, response) {
--- a/src/modules/baidu.js
+++ b/src/modules/baidu.js
@ -1,109 +1,78 @@
 const cheerio = require('cheerio');
-const sfunctions = require('./functions.js');
+const Scraper = require('./se_scraper');

-module.exports = {
-	scrape_baidu_pup: scrape_baidu_pup,
-};
+class BaiduScraper extends Scraper {
+	parse(html) {
+		// load the page source into cheerio
+		const $ = cheerio.load(html);

-async function scrape_baidu_pup(page, event, context, pluggable) {
-	await page.goto('https://www.baidu.com/');
+		// perform queries
+		const results = [];
+		$('#content_left .result').each((i, link) => {
+			results.push({
+				link: $(link).find('h3 a').attr('href'),
+				title: $(link).find('h3').text(),
+				snippet: $(link).find('.c-abstract').text(),
+				visible_link: $(link).find('.f13').text(),
+			})
+		});

-	try {
-		await page.waitForSelector('input[name="wd"]', { timeout: 5000 });
-	} catch (e) {
-		return results;
-	}
-
-	let keywords = event.keywords;
-	var results = {};
-
-	for (var i = 0; i < keywords.length; i++) {
-
-		keyword = keywords[i];
-
-		if (pluggable.before_keyword_scraped) {
-			await pluggable.before_keyword_scraped({
-				keyword: keyword,
-				page: page,
-				event: event,
-				context: context,
-			});
-		}
-
-		try {
-			const input = await page.$('input[name="wd"]');
-			// overwrites last text in input
-			await input.click({ clickCount: 3 });
-			await input.type(keyword);
-			await input.focus();
-			await page.keyboard.press("Enter");
-
-            if (event.sleep_range) {
-                await sfunctions.random_sleep(event);
-            }
-
-			// in baidu we have a issue with waiting for a selector 
-			// or waiting for navigation
-			// therefore, we just manually sleep
-
-			// issue in baidu: https://github.com/GoogleChrome/puppeteer/issues/609
-			// https://github.com/GoogleChrome/puppeteer/issues/2671
-			// await page.evaluate( () => {
-			//     if ( ! window.Node ) {
-			//         window.Node = {};
-			//     }
-			//     if ( ! Node.ELEMENT_NODE ) {
-			//         Node.ELEMENT_NODE = 1;
-			//     }
-			// } );
-			// await page.waitForSelector('.result', { timeout: 5000 });
-
-			// this should be reasonable for normal internet connections
-			await sfunctions.sleep(2000);
-
-			if (event.debug === true && event.is_local === true) {
-				await page.screenshot({path: `debug/${keyword}.png`});
+		const cleaned = [];
+		for (var i=0; i < results.length; i++) {
+			let res = results[i];
+			if (res.link && res.link.trim()) {
+				res.rank = this.result_rank++;
+				cleaned.push(res);
 			}
+		}

-			let html = await page.content();
-			results[keyword] = parse(html);
-
-		} catch (e) {
-			console.error(`Problem with scraping ${keyword}: ${e}`);
+		return {
+			time: (new Date()).toUTCString(),
+			no_results: false,
+			num_results: $('.nums_text').text(),
+			results: cleaned,
 		}
 	}

-	return results;
+	async load_start_page() {
+		try {
+			await this.page.goto('https://www.baidu.com/');
+			await this.page.waitForSelector('input[name="wd"]', { timeout: 5000 });
+		} catch (e) {
+			return false;
+		}
+		return true;
+	}
+
+	async search_keyword(keyword) {
+		const input = await this.page.$('input[name="wd"]');
+		// overwrites last text in input
+		await input.click({ clickCount: 3 });
+		await input.type(keyword);
+		await input.focus();
+		await this.page.keyboard.press("Enter");
+	}
+
+	async next_page() {
+		let next_page_link = await this.page.$('.sb_pagN', {timeout: 1000});
+		if (!next_page_link) {
+			return false;
+		}
+		await next_page_link.click();
+		await this.page.waitForNavigation();
+
+		return true;
+	}
+
+	async wait_for_results() {
+		// TODO: very very bad, but nobody uses baidu, or does someone?
+		await this.sleep(2000);
+	}
+
+	async detected() {
+	}
 }

-function parse(html) {
-	// load the page source into cheerio
-	const $ = cheerio.load(html);
-
-	// perform queries
-	const results = [];
-	$('#content_left .result').each((i, link) => {
-		results.push({
-		  link: $(link).find('h3 a').attr('href'),
-		  title: $(link).find('h3').text(),
-		  snippet: $(link).find('.c-abstract').text(),
-		  visible_link: $(link).find('.f13').text(),
-		})
-	});
-
-	const cleaned = [];
-	for (var i=0; i < results.length; i++) {
-		let res = results[i];
-		if (res.link && res.link.trim()) {
-			res.rank = i+1;
-			cleaned.push(res);
-		}
-	}
-
-	return {
-		time: (new Date()).toUTCString(),
-		no_results: false,
-		num_results: $('.nums_text').text(),
-		results: cleaned,
-	}
-}
+module.exports = {
+	BaiduScraper: BaiduScraper,
+};
--- a/src/modules/bing.js
+++ b/src/modules/bing.js
@ -29,7 +29,7 @@ class BingScraper extends Scraper {
 		for (var i=0; i < results.length; i++) {
 			let res = results[i];
 			if (res.link && res.link.trim() && res.title && res.title.trim()) {
-				res.rank = i+1;
+				res.rank = this.result_rank++;
 				cleaned.push(res);
 			}
 		}
@ -104,7 +104,7 @@ class BingNewsScraper extends Scraper {
 		for (var i=0; i < results.length; i++) {
 			let res = results[i];
 			if (res.link && res.link.trim() && res.title && res.title.trim()) {
-				res.rank = i+1;
+				res.rank = this.result_rank++;
 				cleaned.push(res);
 			}
 		}
--- a/src/modules/duckduckgo.js
+++ b/src/modules/duckduckgo.js
@ -1,94 +1,148 @@
 const cheerio = require('cheerio');
-const sfunctions = require('./functions.js');
+const Scraper = require('./se_scraper');

-module.exports = {
-    scrape_duckduckgo_news_pup: scrape_duckduckgo_news_pup,
-};
+class DuckduckgoScraper extends Scraper {

-async function scrape_duckduckgo_news_pup(page, event, context, pluggable) {
-    await page.goto('https://duckduckgo.com/?q=42&t=h_&iar=news&ia=news');
+    parse(html) {
+        // load the page source into cheerio
+        const $ = cheerio.load(html);

-    try {
-        await page.waitForSelector('input[name="q"]', { timeout: 5000 });
-    } catch (e) {
-        return results;
-    }
-
-    let keywords = event.keywords;
-    var results = {};
-
-    for (var i = 0; i < keywords.length; i++) {
-
-        keyword = keywords[i];
-
-        if (pluggable.before_keyword_scraped) {
-            await pluggable.before_keyword_scraped({
-                keyword: keyword,
-                page: page,
-                event: event,
-                context: context,
+        // perform queries
+        const results = [];
+        $('.result__body').each((i, link) => {
+            results.push({
+                link: $(link).find('.result__title .result__a').attr('href'),
+                title: $(link).find('.result__title .result__a').text(),
+                date: $(link).find('.result__timestamp').text(),
+                snippet: $(link).find('.result__snippet').text(),
+                visible_link: $(link).find('.result__url').attr('href'),
            });
+        });
+
+        const cleaned = [];
+        for (var i=0; i < results.length; i++) {
+            let res = results[i];
+            if (res.link && res.link.trim() && res.title && res.title.trim()) {
+                res.rank = this.result_rank++;
+                cleaned.push(res);
+            }
        }

-        try {
-            const input = await page.$('input[name="q"]');
-            // overwrites last text in input
-            await input.click({ clickCount: 3 });
-            await sfunctions.sleep(150);
-            await input.type(keyword);
-            await sfunctions.sleep(150);
-            await input.focus();
-            await page.keyboard.press("Enter");
-
-            if (event.sleep_range) {
-                await sfunctions.random_sleep(event);
-            }
-
-            // await page.waitForSelector('.result--news', { timeout: 5000 });
-            await page.waitForSelector('.serp__results', { timeout: 5000 });
-
-            await sfunctions.sleep(1500);
-
-            if (event.debug === true && event.is_local === true) {
-                await page.screenshot({path: `debug/${keyword}.png`});
-            }
-            let html = await page.content();
-            results[keyword] = parse_duckduckgo_news_results(html, event.max_results);
-
-        } catch (e) {
-            console.error(`Problem with scraping ${keyword}: ${e}`);
-            return results;
+        return {
+            time: (new Date()).toUTCString(),
+            results: cleaned
        }
    }
-    return results;
+
+    async load_start_page() {
+        try {
+            await this.page.goto('https://duckduckgo.com/');
+            await this.page.waitForSelector('input[name="q"]', { timeout: 5000 });
+        } catch (e) {
+            return false;
+        }
+        return true;
+    }
+
+    async search_keyword(keyword) {
+        const input = await this.page.$('input[name="q"]');
+        await this.set_input_value(`input[name="q"]`, keyword);
+        await this.sleep(50);
+        await input.focus();
+        await this.page.keyboard.press("Enter");
+    }
+
+    async next_page() {
+        let next_page_link = await this.page.$('a.result--more__btn', {timeout: 1000});
+        if (!next_page_link) {
+            return false;
+        }
+        await next_page_link.click();
+        //await this.page.waitForNavigation();
+
+        return true;
+    }
+
+    async wait_for_results() {
+        await this.page.waitForSelector('.serp__results', { timeout: 5000 });
+    }
+
+    async detected() {
+    }
 }

-function parse_duckduckgo_news_results(html) {
-    // load the page source into cheerio
-    const $ = cheerio.load(html);

-    // perform queries
-    const results = [];
-    $('.result--news').each((i, link) => {
-        results.push({
-            link: $(link).find('.result__title .result__a').attr('href'),
-            title: $(link).find('.result__title .result__a').text(),
-            date: $(link).find('.result__timestamp').text(),
-            snippet: $(link).find('.result__snippet').text(),
+class DuckduckgoNewsScraper extends Scraper {
+
+    parse(html) {
+        // load the page source into cheerio
+        const $ = cheerio.load(html);
+
+        // perform queries
+        const results = [];
+        $('.result--news').each((i, link) => {
+            results.push({
+                link: $(link).find('.result__title .result__a').attr('href'),
+                title: $(link).find('.result__title .result__a').text(),
+                date: $(link).find('.result__timestamp').text(),
+                snippet: $(link).find('.result__snippet').text(),
+            });
        });
-    });

-    const cleaned = [];
-    for (var i=0; i < results.length; i++) {
-        let res = results[i];
-        if (res.link && res.link.trim() && res.title && res.title.trim()) {
-            res.rank = i+1;
-            cleaned.push(res);
+        const cleaned = [];
+        for (var i=0; i < results.length; i++) {
+            let res = results[i];
+            if (res.link && res.link.trim() && res.title && res.title.trim()) {
+                res.rank = this.result_rank++;
+                cleaned.push(res);
+            }
+        }
+
+        return {
+            time: (new Date()).toUTCString(),
+            results: cleaned
        }
    }

-    return {
-        time: (new Date()).toUTCString(),
-        results: cleaned
+    async load_start_page() {
+        try {
+            await page.goto('https://duckduckgo.com/?q=42&t=h_&iar=news&ia=news');
+            await page.waitForSelector('input[name="q"]', { timeout: 5000 });
+        } catch (e) {
+            return false;
+        }
+        return true;
    }
-}
+
+    async search_keyword(keyword) {
+        const input = await this.page.$('input[name="q"]');
+        await this.set_input_value(`input[name="q"]`, keyword);
+        await this.sleep(50);
+        await input.focus();
+        await this.page.keyboard.press("Enter");
+    }
+
+    async next_page() {
+        let next_page_link = await this.page.$('.sb_pagN', {timeout: 1000});
+        if (!next_page_link) {
+            return false;
+        }
+        await next_page_link.click();
+        await this.page.waitForNavigation();
+
+        return true;
+    }
+
+    async wait_for_results() {
+        await this.page.waitForSelector('.serp__results', { timeout: 5000 });
+        await this.sleep(1500);
+    }
+
+    async detected() {
+    }
+}
+
+module.exports = {
+    DuckduckgoNewsScraper: DuckduckgoNewsScraper,
+    DuckduckgoScraper: DuckduckgoScraper,
+};
--- a/src/modules/functions.js
+++ b/src/modules/functions.js
@ -1,40 +0,0 @@
-module.exports = {
-	no_results: no_results,
-	effective_query: effective_query,
-    sleep: sleep,
-    random_sleep: random_sleep,
-    set_input_value: set_input_value,
-
-};
-
-async function set_input_value(page, selector, value) {
-    await page.waitFor(selector);
-    await page.evaluate((value, selector) => {
-        return document.querySelector(selector).value = value;
-    }, value, selector);
-}
-
-function no_results(needles, html) {
-	return !needles.map((needle) => { return html.indexOf(needle)})
-		.every((res) => { return res == -1});
-}
-
-function effective_query(needles, html) {
-	return;
-}
-
-function sleep(ms) {
-    return new Promise(resolve => {
-        setTimeout(resolve, ms)
-    })
-}
-
-async function random_sleep(config) {
-    var min, max;
-    [min, max] = config.sleep_range;
-    var rand = Math.floor(Math.random() * (max - min + 1) + min); //Generate Random number
-    if (config.debug === true) {
-        console.log(`Sleeping for ${rand}s`);
-    }
-    await sleep(rand * 1000);
-}
--- a/src/modules/google.js
+++ b/src/modules/google.js
@ -1,5 +1,4 @@
 const cheerio = require('cheerio');
-const sfunctions = require('./functions.js');
 const Scraper = require('./se_scraper');

 class GoogleScraper extends Scraper {
@ -20,7 +19,7 @@ class GoogleScraper extends Scraper {
 			})
 		});

-		let no_results = sfunctions.no_results(
+		let no_results = this.no_results(
 			['Es wurden keine mit deiner Suchanfrage', 'did not match any documents', 'Keine Ergebnisse für',
 				'No results found for', 'Ergebnisse für', 'Showing results for'],
 			$('#main').text()
@ -35,7 +34,7 @@ class GoogleScraper extends Scraper {
 		for (var i=0; i < results.length; i++) {
 			let res = results[i];
 			if (res.link && res.link.trim() && res.title && res.title.trim()) {
-				res.rank = i+1;
+				res.rank = this.result_rank++;
 				cleaned.push(res);
 			}
 		}
@ -108,7 +107,7 @@ class GoogleNewsOldScraper extends Scraper {
 			})
 		});

-		let no_results = sfunctions.no_results(
+		let no_results = this.no_results(
 			['Es wurden keine mit deiner Suchanfrage', 'did not match any documents', 'Keine Ergebnisse für',
 				'No results found for', 'Ergebnisse für', 'Showing results for', 'did not match any news results'],
 			$('#main').text()
@ -123,7 +122,7 @@ class GoogleNewsOldScraper extends Scraper {
 		for (var i=0; i < results.length; i++) {
 			let res = results[i];
 			if (res.link && res.link.trim()) {
-				res.rank = i+1;
+				res.rank = this.result_rank++;
 				cleaned.push(res);
 			}
 		}
@ -161,7 +160,7 @@ class GoogleNewsOldScraper extends Scraper {
 	async wait_for_results() {
 		//await this.page.waitForNavigation({ timeout: this.STANDARD_TIMEOUT });
 		await this.page.waitForSelector('#main', { timeout: this.STANDARD_TIMEOUT });
-		await this.sleep(200);
+		await this.sleep(500);
 	}

 	async detected() {
@ -190,7 +189,7 @@ class GoogleImageScraper extends Scraper {
 			})
 		});

-		let no_results = sfunctions.no_results(
+		let no_results = this.no_results(
 			['stimmt mit keinem Bildergebnis', 'Keine Ergebnisse für', 'not match any image results', 'No results found for',
 				'Showing results for', 'Ergebnisse für'],
 			$('#main').text()
@ -206,7 +205,7 @@ class GoogleImageScraper extends Scraper {
 			let res = results[i];
 			if (res.link && res.link.trim() && res.link.trim().length > 10) {
 				res.link = res.link.trim();
-				res.rank = i+1;
+				res.rank = this.result_rank++;
 				cleaned.push(res);
 			}
 		}
@ -252,7 +251,7 @@ class GoogleImageScraper extends Scraper {

 	async wait_for_results() {
 		await this.page.waitForSelector('#main', { timeout: this.STANDARD_TIMEOUT });
-		await this.sleep(100);
+		await this.sleep(500);
 	}

 	async detected() {
@ -296,7 +295,7 @@ class GoogleNewsScraper extends Scraper {
 			this.all_results.add(title);
 		});

-		let no_results = sfunctions.no_results(
+		let no_results = this.no_results(
 			['Es wurden keine mit deiner Suchanfrage', 'did not match any documents', 'Keine Ergebnisse für',
 				'No results found for', 'Ergebnisse für', 'Showing results for', 'did not match any news results'],
 			$('body').text()
@ -308,7 +307,7 @@ class GoogleNewsScraper extends Scraper {
 		for (var i=0; i < results.length; i++) {
 			let res = results[i];
 			if (res.title && res.title.trim()) {
-				res.rank = i+1;
+				res.rank = this.result_rank++;
 				cleaned.push(res);
 			}
 		}
@ -333,6 +332,7 @@ class GoogleNewsScraper extends Scraper {
 			// parse here front page results
 			let html = await this.page.content();
 			this.results['frontpage'] = this.parse(html);
+			this.result_rank = 1;
 		} catch(e) {
 			return false;
 		}
@ -367,7 +367,6 @@ class GoogleNewsScraper extends Scraper {
 	}
 }

-
 function clean_image_url(url) {
 	// Example:
 	// https://www.google.com/imgres?imgurl=https%3A%2F%2Fupload.wikimedia.org%2Fwikipedia%2Fen%2Fthumb%2Ff%2Ffd%2F1928_Edward_Campbell.jpg%2F220px-1928_Edward_Campbell.jpg&imgrefurl=https%3A%2F%2Fwww.revolvy.com%2Fpage%2FSir-Edward-Campbell%252C-1st-Baronet&docid=BMkW_GerTIY4GM&tbnid=TmQapIxDCQbQhM%3A&vet=10ahUKEwje_LLE_YXeAhXisaQKHVAEBSAQMwiNAShEMEQ..i&w=220&h=290&bih=1696&biw=1280&q=John%20MacLeod%20Breadalbane%20Councillor%20Prince%20Edward%20Island&ved=0ahUKEwje_LLE_YXeAhXisaQKHVAEBSAQMwiNAShEMEQ&iact=mrc&uact=8
--- a/src/modules/infospace.js
+++ b/src/modules/infospace.js
@ -1,186 +1,157 @@
 const cheerio = require('cheerio');
-const sfunctions = require('./functions.js');
+const Scraper = require('./se_scraper');
+
+class InfospaceScraper extends Scraper {
+
+    parse(html) {
+        // load the page source into cheerio
+        const $ = cheerio.load(html);
+
+        // perform queries
+        const results = [];
+        $('.result').each((i, link) => {
+            results.push({
+                link: $(link).find('a.title').attr('href'),
+                title: $(link).find('a.title').text(),
+                snippet: $(link).find('.description').text(),
+                visible_link: $(link).find('.url').text(),
+            })
+        });
+
+        const cleaned = [];
+        for (var i=0; i < results.length; i++) {
+            let res = results[i];
+            if (res.link && res.link.trim()) {
+                res.rank = this.result_rank++;
+                cleaned.push(res);
+            }
+        }
+
+        let no_results = this.no_results(
+            ['No search results were found for'],
+            $('.layout__mainline').text()
+        );
+
+        return {
+            time: (new Date()).toUTCString(),
+            no_results: no_results,
+            num_results: '',
+            results: cleaned,
+        }
+    }
+
+    async load_start_page() {
+        try {
+            await this.page.goto('http://infospace.com/index.html');
+            await this.page.waitForSelector('input[name="q"]', { timeout: 5000 });
+        } catch (e) {
+            return false;
+        }
+        return true;
+    }
+
+    async search_keyword(keyword) {
+        const input = await this.page.$('input[id="q"]');
+        await this.set_input_value('input[id="q"]', keyword);
+        await this.sleep(50);
+        await input.focus();
+        await this.page.keyboard.press("Enter");
+    }
+
+    async next_page() {
+        let next_page_link = await this.page.$('a.next', {timeout: 1000});
+        if (!next_page_link) {
+            return false;
+        }
+        await next_page_link.click();
+        await this.page.waitForNavigation();
+
+        return true;
+    }
+
+    async wait_for_results() {
+        await this.page.waitForSelector('.mainline-results', { timeout: 5000 }); // TODO: this is not the best selector.
+        await this.sleep(250);
+    }
+
+    async detected() {
+    }
+}
+
+class WebcrawlerNewsScraper extends Scraper {
+
+    parse(html) {
+        // load the page source into cheerio
+        const $ = cheerio.load(html);
+
+        // perform queries
+        const results = [];
+        $('.article').each((i, link) => {
+            let source = $(link).find('.source').text();
+            let date = source.split(',')[1] || '';
+            results.push({
+                link: $(link).find('a').attr('href'),
+                title: $(link).find('.title').text(),
+                publisher: $(link).find('.source').text(),
+                date: date,
+                snippet: $(link).find('.description').text(),
+            });
+        });
+
+        const cleaned = [];
+        for (var i=0; i < results.length; i++) {
+            let res = results[i];
+            if (res.link && res.link.trim() && res.title && res.title.trim()) {
+                res.rank = this.result_rank++;
+                cleaned.push(res);
+            }
+        }
+
+        return {
+            time: (new Date()).toUTCString(),
+            results: cleaned
+        }
+    }
+
+    async load_start_page() {
+        try {
+            await this.page.goto('https://www.webcrawler.com/?qc=news');
+            await this.page.waitForSelector('input[name="q"]', { timeout: 5000 });
+        } catch (e) {
+            return false;
+        }
+        return true;
+    }
+
+    async search_keyword(keyword) {
+        const input = await this.page.$('input[name="q"]');
+        await this.set_input_value('input[name="q"]', keyword);
+        await this.sleep(50);
+        await input.focus();
+        await this.page.keyboard.press("Enter");
+    }
+
+    async next_page() {
+        let next_page_link = await this.page.$('.pagination__num--next', {timeout: 1000});
+        if (!next_page_link) {
+            return false;
+        }
+        await next_page_link.click();
+        await this.page.waitForNavigation();
+
+        return true;
+    }
+
+    async wait_for_results() {
+        await this.page.waitForSelector('.mainline-results', { timeout: 5000 });
+        await this.sleep(150);
+    }
+
+    async detected() {
+    }
+}

 module.exports = {
-	scrape_infospace_pup: scrape_infospace_pup,
-    scrape_webcrawler_news_pup: scrape_webcrawler_news_pup,
-};
-
-async function scrape_infospace_pup(page, event, context, pluggable) {
-	await page.goto('http://infospace.com/index.html');
-
-	try {
-		await page.waitForSelector('input[name="q"]', { timeout: 5000 });
-	} catch (e) {
-		return results;
-	}
-
-	let keywords = event.keywords;
-	var results = {};
-
-	for (var i = 0; i < keywords.length; i++) {
-
-		keyword = keywords[i];
-
-        if (pluggable.before_keyword_scraped) {
-            await pluggable.before_keyword_scraped({
-                keyword: keyword,
-                page: page,
-                event: event,
-                context: context,
-            });
-        }
-
-		try {
-			const input = await page.$('input[id="q"]');
-			// overwrites last text in input
-			await input.click({ clickCount: 3 });
-			await input.type(keyword);
-			await input.focus();
-			await page.keyboard.press("Enter");
-
-            if (event.sleep_range) {
-                await sfunctions.random_sleep(event);
-            }
-
-			await page.waitForSelector('.mainline-results', { timeout: 5000 }); // TODO: this is not the best selector.
-			await sfunctions.sleep(250);
-			if (event.debug === true && event.is_local === true) {
-				await page.screenshot({path: `debug/${keyword}.png`});
-			}
-
-			let html = await page.content();
-			results[keyword] = parse(html);
-
-		} catch (e) {
-			console.error(`Problem with scraping ${keyword}: ${e}`);
-		}
-	}
-
-	return results;
-}
-
-function parse(html) {
-	// load the page source into cheerio
-	const $ = cheerio.load(html);
-
-	// perform queries
-	const results = [];
-	$('.result').each((i, link) => {
-		results.push({
-		  link: $(link).find('a.title').attr('href'),
-		  title: $(link).find('a.title').text(),
-		  snippet: $(link).find('.description').text(),
-		  visible_link: $(link).find('.url').text(),
-		})
-	});
-
-	const cleaned = [];
-	for (var i=0; i < results.length; i++) {
-		let res = results[i];
-		if (res.link && res.link.trim()) {
-			res.rank = i+1;
-			cleaned.push(res);
-		}
-	}
-
-    let no_results = sfunctions.no_results(
-        ['No search results were found for'],
-        $('.layout__mainline').text()
-    );
-
-	return {
-		time: (new Date()).toUTCString(),
-		no_results: no_results,
-		num_results: '',
-		results: cleaned,
-	}
-}
-
-async function scrape_webcrawler_news_pup(page, event, context, pluggable) {
-    await page.goto('https://www.webcrawler.com/?qc=news');
-
-    try {
-        await page.waitForSelector('input[name="q"]', { timeout: 5000 });
-    } catch (e) {
-        return results;
-    }
-
-    let keywords = event.keywords;
-    var results = {};
-
-    for (var i = 0; i < keywords.length; i++) {
-
-        keyword = keywords[i];
-
-        if (pluggable.before_keyword_scraped) {
-            await pluggable.before_keyword_scraped({
-                keyword: keyword,
-                page: page,
-                event: event,
-                context: context,
-            });
-        }
-
-        try {
-            const input = await page.$('input[name="q"]');
-            // overwrites last text in input
-            await input.click({ clickCount: 3 });
-            await sfunctions.sleep(150);
-            await input.type(keyword);
-            await sfunctions.sleep(150);
-            await input.focus();
-            await page.keyboard.press("Enter");
-
-            if (event.sleep_range) {
-                await sfunctions.random_sleep(event);
-            }
-
-            await page.waitForSelector('.mainline-results', { timeout: 5000 });
-
-            if (event.debug === true && event.is_local === true) {
-                await page.screenshot({path: `debug/${keyword}.png`});
-            }
-            let html = await page.content();
-            results[keyword] = parse_webcrawler_news_results(html, event.max_results);
-
-        } catch (e) {
-            console.error(`Problem with scraping ${keyword}: ${e}`);
-            return results;
-        }
-    }
-    return results;
-}
-
-function parse_webcrawler_news_results(html) {
-    // load the page source into cheerio
-    const $ = cheerio.load(html);
-
-    // perform queries
-    const results = [];
-    $('.article').each((i, link) => {
-        let source = $(link).find('.source').text();
-        let date = source.split(',')[1] || '';
-        results.push({
-            link: $(link).find('a').attr('href'),
-            title: $(link).find('.title').text(),
-            publisher: $(link).find('.source').text(),
-            date: date,
-            snippet: $(link).find('.description').text(),
-        });
-    });
-
-    const cleaned = [];
-    for (var i=0; i < results.length; i++) {
-        let res = results[i];
-        if (res.link && res.link.trim() && res.title && res.title.trim()) {
-            res.rank = i+1;
-            cleaned.push(res);
-        }
-    }
-
-    return {
-        time: (new Date()).toUTCString(),
-        results: cleaned
-    }
-}
+    InfospaceScraper: InfospaceScraper,
+    WebcrawlerNewsScraper: WebcrawlerNewsScraper,
+};
--- a/src/modules/se_scraper.js
+++ b/src/modules/se_scraper.js
@ -81,6 +81,8 @@ module.exports = class Scraper {
     */
    async scraping_loop() {

+        this.result_rank = 1;
+
        for (let keyword of this.config.keywords) {
            this.keyword = keyword;
            this.results[keyword] = {};
@ -121,7 +123,7 @@ module.exports = class Scraper {
                        break;
                    }

-                } while (page_num < event.num_pages);
+                } while (page_num <= event.num_pages);

            } catch (e) {

--- a/src/modules/ticker_search.js
+++ b/src/modules/ticker_search.js
@ -1,5 +1,4 @@
 const cheerio = require('cheerio');
-const sfunctions = require('./functions.js');

 module.exports = {
    scrape_yahoo_finance_pup: scrape_yahoo_finance_pup,
@ -7,9 +6,14 @@ module.exports = {
    scrape_reuters_finance_pup: scrape_reuters_finance_pup,
    scrape_cnbc_finance_pup: scrape_cnbc_finance_pup,
    scrape_marketwatch_finance_pup: scrape_marketwatch_finance_pup,
+    not_implemented: undefined,
 };

-// https://www.google.com/search?q=MSFT&tbm=fin
+function sleep(ms) {
+    return new Promise(resolve => {
+        setTimeout(resolve, ms)
+    })
+}

 async function scrape_yahoo_finance_pup(page, event, context, pluggable) {
    var results = {};
@ -40,7 +44,7 @@ async function scrape_yahoo_finance_pup(page, event, context, pluggable) {
                await page.screenshot({path: `debug/${keyword}.png`});
            }

-            await sfunctions.sleep(1000);
+            await sleep(1000);

            let html = await page.content();
            results[keyword] = parse(html);
@ -90,7 +94,7 @@ async function scrape_marketwatch_finance_pup(page, event, context, pluggable) {
                await page.screenshot({path: `debug/${keyword}.png`});
            }

-            await sfunctions.sleep(500);
+            await sleep(500);

            let newsData = await page.evaluate(() => {
                let results = [];
@ -150,7 +154,7 @@ async function scrape_bloomberg_finance_pup(page, event, context, pluggable) {
                await page.screenshot({path: `debug/${keyword}.png`});
            }

-            await sfunctions.sleep(1000);
+            await sleep(1000);

            let news_items = await page.$x('//*[starts-with(@class,"newsItem")]');
            for (let item of news_items) {
@ -189,7 +193,7 @@ async function scrape_reuters_finance_pup(page, event, context, pluggable) {
                await page.screenshot({path: `debug/${keyword}.png`});
            }

-            await sfunctions.sleep(500);
+            await sleep(500);

            let newsData = await page.evaluate(() => {
                let results = [];
@ -246,7 +250,7 @@ async function scrape_cnbc_finance_pup(page, event, context, pluggable) {
                await page.screenshot({path: `debug/${keyword}.png`});
            }

-            await sfunctions.sleep(500);
+            await sleep(500);

            let newsData = await page.evaluate(() => {
                let results = [];
--- a/src/modules/youtube.js
+++ b/src/modules/youtube.js
@ -1,121 +1,105 @@
 const cheerio = require('cheerio');
-const sfunctions = require('./functions.js');
+const Scraper = require('./se_scraper');

-module.exports = {
-	scrape_youtube_pup: scrape_youtube_pup,
-};
+class YoutubeScraper extends Scraper {

-const all_videos = new Set();
+	parse(html) {
+		// load the page source into cheerio
+		const $ = cheerio.load(html);

-async function scrape_youtube_pup(page, event, context, pluggable) {
-	await page.goto('https://www.youtube.com');
+		// perform queries
+		const results = [];
+		$('#contents ytd-video-renderer,#contents ytd-grid-video-renderer').each((i, link) => {
+			results.push({
+				link: $(link).find('#video-title').attr('href'),
+				title: $(link).find('#video-title').text(),
+				snippet: $(link).find('#description-text').text(),
+				channel: $(link).find('#byline a').text(),
+				channel_link: $(link).find('#byline a').attr('href'),
+				num_views: $(link).find('#metadata-line span:nth-child(1)').text(),
+				release_date: $(link).find('#metadata-line span:nth-child(2)').text(),
+			})
+		});

-	try {
-		await page.waitForSelector('input[id="search"]', { timeout: 5000 });
-	} catch (e) {
-		return results;
-	}
+		let no_results = this.no_results(
+			['No results found', 'Keine Ergebnisse', 'Es werden Ergebnisse angezeigt', 'Showing results for' ],
+			$('yt-showing-results-for-renderer').text()
+		);

-	let keywords = event.keywords;
-	var results = {};
+		let effective_query = $('#corrected-link').text() || '';

-    // before we do anything, parse the results of the front page of youtube
-    await page.waitForSelector('ytd-video-renderer,ytd-grid-video-renderer', { timeout: 10000 });
-    await sfunctions.sleep(500);
+		const cleaned = [];
+		for (var i=0; i < results.length; i++) {
+			let res = results[i];
+			if (res.link && res.link.trim() && res.title && res.title.trim()) {
+				res.title = res.title.trim();
+				res.snippet = res.snippet.trim();
+				res.rank = this.result_rank++;

-    let html = await page.content();
-    results['__frontpage__'] = parse(html);
-
-	for (var i = 0; i < keywords.length; i++) {
-
-		keyword = keywords[i];
-
-		if (pluggable.before_keyword_scraped) {
-			await pluggable.before_keyword_scraped({
-				keyword: keyword,
-				page: page,
-				event: event,
-				context: context,
-			});
-		}
-
-		try {
-			const input = await page.$('input[id="search"]');
-			// overwrites last text in input
-			await input.click({ clickCount: 3 });
-			await input.type(keyword);
-			await input.focus();
-			await page.keyboard.press("Enter");
-
-            if (event.sleep_range) {
-                await sfunctions.random_sleep(event);
-            }
-
-            await page.waitForFunction(`document.title.indexOf('${keyword}') !== -1`, { timeout: 5000 });
-            await page.waitForSelector('ytd-video-renderer,ytd-grid-video-renderer', { timeout: 5000 });
-            await sfunctions.sleep(500);
-
-			if (event.debug === true && event.is_local === true) {
-				await page.screenshot({path: `debug/${keyword}.png`});
+				// check if this result has been used before
+				if (this.all_videos.has(res.title) === false) {
+					cleaned.push(res);
+				}
+				this.all_videos.add(res.title);
 			}
+		}

-			let html = await page.content();
-			results[keyword] = parse(html);
-
-		} catch (e) {
-			console.error(`Problem with scraping ${keyword}: ${e}`);
+		return {
+			time: (new Date()).toUTCString(),
+			no_results: no_results,
+			effective_query: effective_query,
+			num_results: '',
+			results: cleaned,
 		}
 	}

-	return results;
+	async load_start_page() {
+		try {
+			this.all_videos = new Set();
+			await this.page.goto('https://www.youtube.com', {
+				referer: 'https://google.com'
+			});
+			await this.page.waitForSelector('input[id="search"]', { timeout: 5000 });
+			// before we do anything, parse the results of the front page of youtube
+			await this.page.waitForSelector('ytd-video-renderer,ytd-grid-video-renderer', { timeout: 10000 });
+			await this.sleep(500);
+			let html = await this.page.content();
+			this.results['frontpage'] = this.parse(html);
+			this.result_rank = 1;
+		} catch(e) {
+			return false;
+		}
+		return true;
+	}
+
+	async search_keyword(keyword) {
+		const input = await this.page.$('input[id="search"]');
+		// overwrites last text in input
+		await input.click({ clickCount: 3 });
+		await input.type(keyword);
+		await input.focus();
+		await this.page.keyboard.press("Enter");
+	}
+
+	async next_page() {
+		// youtube needs scrolling
+		// TODO: implement scrolling, no priority right now
+		return false;
+	}
+
+	async wait_for_results() {
+		await this.page.waitForFunction(`document.title.indexOf('${this.keyword}') !== -1`, { timeout: 5000 });
+		await this.page.waitForSelector('ytd-video-renderer,ytd-grid-video-renderer', { timeout: 5000 });
+		await this.sleep(500);
+	}
+
+	async detected() {
+		const title = await this.page.title();
+		let html = await this.page.content();
+		return html.indexOf('detected unusual traffic') !== -1 || title.indexOf('/sorry/') !== -1;
+	}
 }

-function parse(html) {
-	// load the page source into cheerio
-	const $ = cheerio.load(html);
-
-	// perform queries
-	const results = [];
-	$('#contents ytd-video-renderer,#contents ytd-grid-video-renderer').each((i, link) => {
-		results.push({
-		  link: $(link).find('#video-title').attr('href'),
-		  title: $(link).find('#video-title').text(),
-		  snippet: $(link).find('#description-text').text(),
-		  channel: $(link).find('#byline a').text(),
-		  channel_link: $(link).find('#byline a').attr('href'),
-		  num_views: $(link).find('#metadata-line span:nth-child(1)').text(),
-		  release_date: $(link).find('#metadata-line span:nth-child(2)').text(),
-		})
-	});
-
-	let no_results = sfunctions.no_results(
-		['No results found', 'Keine Ergebnisse', 'Es werden Ergebnisse angezeigt', 'Showing results for' ],
-		$('yt-showing-results-for-renderer').text()
-	);
-
-    let effective_query = $('#corrected-link').text() || '';
-
-	const cleaned = [];
-	for (var i=0; i < results.length; i++) {
-		let res = results[i];
-		if (res.link && res.link.trim() && res.title && res.title.trim()) {
-			res.title = res.title.trim();
-			res.snippet = res.snippet.trim();
-			res.rank = i+1;
-
-			// check if this result has been used before
-			if (all_videos.has(res.title) === false) {
-                cleaned.push(res);
-			}
-            all_videos.add(res.title);
-		}
-	}
-
-	return {
-		time: (new Date()).toUTCString(),
-		no_results: no_results,
-        effective_query: effective_query,
-		num_results: '',
-		results: cleaned,
-	}
-}
+module.exports = {
+	YoutubeScraper: YoutubeScraper,
+};
--- a/src/node_scraper.js
+++ b/src/node_scraper.js
@ -129,6 +129,8 @@ module.exports.handler = async function handler (event, context, callback) {
 			}
 		}

+		var results = {};
+
 		Scraper = {
 			google: google.GoogleScraper,
 			google_news_old: google.GoogleNewsOldScraper,
@ -136,28 +138,32 @@ module.exports.handler = async function handler (event, context, callback) {
 			google_image: google.GoogleImageScraper,
 			bing: bing.BingScraper,
 			bing_news: bing.BingNewsScraper,
+			duckduckgo: duckduckgo.DuckduckgoScraper,
+			duckduckgo_news: duckduckgo.DuckduckgoNewsScraper,
+			infospace: infospace.InfospaceScraper,
+			webcrawler: infospace.WebcrawlerNewsScraper,
+			baidu: baidu.BaiduScraper,
+			youtube: youtube.YoutubeScraper,
+
+			yahoo_news: tickersearch.not_implemented,
+			bloomberg: tickersearch.not_implemented,
+			reuters: tickersearch.not_implemented,
+			cnbc: tickersearch.not_implemented,
+			marketwatch: tickersearch.not_implemented,

-			infospace: infospace.scrape_infospace_pup,
-			webcrawler: infospace.scrape_webcrawler_news_pup,
-			baidu: baidu.scrape_baidu_pup,
-			youtube: youtube.scrape_youtube_pup,
-			duckduckgo_news: duckduckgo.scrape_duckduckgo_news_pup,
-			google_dr: google.scrape_google_pup_dr,
-			yahoo_news: tickersearch.scrape_yahoo_finance_pup,
-			bloomberg: tickersearch.scrape_bloomberg_finance_pup,
-			reuters: tickersearch.scrape_reuters_finance_pup,
-			cnbc: tickersearch.scrape_cnbc_finance_pup,
-			marketwatch: tickersearch.scrape_marketwatch_finance_pup,
 		}[config.search_engine];

-		let scraper = new Scraper({
-			browser: browser,
-			config: config,
-			context: context,
-			pluggable: pluggable,
-		});
-
-		let results = await scraper.run();
+		if (Scraper === undefined) {
+			console.info('Currently not implemented search_engine: ', config.search_engine);
+		} else {
+			let scraper = new Scraper({
+				browser: browser,
+				config: config,
+				context: context,
+				pluggable: pluggable,
+			});
+			var results = await scraper.run();
+		}

 		if (pluggable.close_browser) {
 			await pluggable.close_browser();
--- a/test/tests.js
+++ b/test/tests.js
@ -1,5 +1,4 @@
 const handler = require('./../src/node_scraper.js');
-
 var assert = require('chai').assert;

 /*
@ -13,22 +12,26 @@ function sleep(ms) {
    })
 }

-const search_engines = ['google', 'google_image', 'google_news', 'youtube', 'bing', 'infospace', 'baidu'];
+const search_engines = ['google', 'google_image', 'google_news', 'youtube', 'bing', 'infospace', 'duckduckgo'];
+const keywords = ['news', 'weather'];

 async function tests() {
-
-    const keywords = ['Google scraper NikolaiT', 'the idiot'];
-
    event = {
        search_engine: 'google',
-        compress: 'false',
-        debug: 'false',
-        verbose: 'false',
+        compress: false,
+        debug: false,
+        verbose: false,
        keywords: keywords,
+        keyword_file: '',
+        num_pages: 1,
+        headless: true,
+        output_file: '',
+        block_assets: true,
+        user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
+        random_user_agent: false,
    };

-	for (var i = 0; i < search_engines.length; i++) {
-		se = search_engines[i];
+	for (let se of search_engines) {
 		console.log(`Testing ${se}...`);
 		event.search_engine = se;
 		await handler.handler(event, undefined, test_case);
@ -47,8 +50,7 @@ async function no_results_test() {
        keywords: keywords,
    };

-    for (var i = 0; i < search_engines.length; i++) {
-        se = search_engines[i];
+    for (let se of search_engines) {
        console.log(`Testing ${se}...`);
        event.search_engine = se;
        await handler.handler(event, undefined, test_case_no_results);
@ -61,19 +63,18 @@ async function effective_query_test() {
    const keywords = ['mount evverrest'];

    event = {
-        write_meta_data: 'true',
+        write_meta_data: true,
        job_name: 'test-job',
        search_engine: '',
-        compress: 'false',
-        debug: 'false',
-        verbose: 'false',
+        compress: false,
+        debug: false,
+        verbose: false,
        keywords: keywords,
    };

    const effective_query_engines = ['google', 'google_image', 'google_news', 'youtube', 'bing'];

-    for (var i = 0; i < effective_query_engines.length; i++) {
-        se = effective_query_engines[i];
+    for (let se of search_engines) {
        console.log(`Testing ${se}...`);
        event.search_engine = se;
        await handler.handler(event, undefined, test_case_effective_query);
@ -90,27 +91,47 @@ function test_case(err, response) {
        assert.equal(response.headers['Content-Type'], 'text/json', 'content type is not text/json');
        assert.equal(response.statusCode, 200, 'status code must be 200');

-        for (key in response.results) {
-            kw = response.results[key];
-            // at least 6 results
-            assert.isAtLeast(kw.results.length, 6, 'results must have at least 6 links');
-            assert.equal(kw.no_results, false, 'no results should be false');
-            assert.typeOf(kw.num_results, 'string', 'num_results must be a string');
-            assert.isAtLeast(kw.num_results.length, 5, 'num_results should be a string of at least 5 chars');
-            assert.typeOf(Date.parse(kw.time), 'number', 'time should be a valid date');
+        let total_rank = 1;

-            for (let res of kw.results) {
-                assert.isOk(res.link, 'link must be ok');
-                assert.typeOf(res.link, 'string', 'link must be string');
-                assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
+        for (query in response.results) {

-                assert.isOk(res.title, 'title must be ok');
-                assert.typeOf(res.title, 'string', 'title must be string');
-                assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
+            assert.containsAllKeys(response.results, keywords, 'not all keywords were scraped.');

-                assert.isOk(res.snippet, 'snippet must be ok');
-                assert.typeOf(res.snippet, 'string', 'snippet must be string');
-                assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
+            for (page_number in response.results[query]) {
+
+                assert.isNumber(parseInt(page_number), 'page_number must be numeric');
+
+                let obj = response.results[query][page_number];
+
+                assert.containsAllKeys(obj, ['results', 'time',], 'not all keys are in the object');
+
+                // at least 6 results
+                assert.isAtLeast(obj.results.length, 6, 'results must have at least 6 SERP objects');
+                // TODO: fix this
+                // assert.equal(obj.no_results, false, 'no results should be false');
+                assert.typeOf(obj.num_results, 'string', 'num_results must be a string');
+                assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars');
+                assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date');
+
+                for (let res of obj.results) {
+
+                    assert.containsAllKeys(res, ['link', 'title', 'rank'], 'not all keys are in the SERP object');
+
+                    assert.isOk(res.link, 'link must be ok');
+                    assert.typeOf(res.link, 'string', 'link must be string');
+                    assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars');
+
+                    assert.isOk(res.title, 'title must be ok');
+                    assert.typeOf(res.title, 'string', 'title must be string');
+                    assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars');
+
+                    assert.isOk(res.snippet, 'snippet must be ok');
+                    assert.typeOf(res.snippet, 'string', 'snippet must be string');
+                    assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars');
+
+                    assert.isNumber(res.rank, 'rank must be integer');
+                    assert.equal(res.rank, total_rank++, 'rank ist wrong');
+                }
            }
        }
 	}