diff --git a/debug_se_scraper_google_buy used car.png b/debug_se_scraper_google_buy used car.png new file mode 100644 index 0000000..8b4024f Binary files /dev/null and b/debug_se_scraper_google_buy used car.png differ diff --git a/debug_se_scraper_google_cloud service.png b/debug_se_scraper_google_cloud service.png index e208aba..a42249f 100644 Binary files a/debug_se_scraper_google_cloud service.png and b/debug_se_scraper_google_cloud service.png differ diff --git a/examples/quickstart.js b/examples/quickstart.js index e77ed74..5d9d9ed 100644 --- a/examples/quickstart.js +++ b/examples/quickstart.js @@ -2,25 +2,19 @@ const se_scraper = require('./../src/node_scraper.js'); (async () => { let browser_config = { - debug_level: 2, - output_file: 'examples/results/data.json', + debug_level: 1, test_evasion: false, - headless: false, + headless: true, block_assets: false, - random_user_agent: true, + random_user_agent: false, + log_http_headers: false, + html_output: false, }; let scrape_job = { - search_engine: 'google', - keywords: ['cloud service'], + search_engine: 'bing', + keywords: ['auto verkaufen'], num_pages: 1, - // add some cool google search settings - google_settings: { - gl: 'us', // The gl parameter determines the Google country to use for the query. - hl: 'en', // The hl parameter determines the Google UI language to return results. - start: 0, // Determines the results offset to use, defaults to 0. - num: 10, // Determines the number of results to show, defaults to 10. Maximum is 100. - }, }; var scraper = new se_scraper.ScrapeManager(browser_config); diff --git a/examples/results/data.json b/examples/results/data.json index 40ecc28..a659287 100644 --- a/examples/results/data.json +++ b/examples/results/data.json @@ -1,124 +1,104 @@ { - "cloud service": { + "buy used car": { "1": { - "time": "Sat, 06 Jul 2019 19:33:03 GMT", - "num_results": "About 2,720,000,000 results (0.53 seconds) ", + "time": "Sun, 07 Jul 2019 16:04:09 GMT", + "num_results": "About 5,330,000,000 results (0.65 seconds) ", "no_results": false, "effective_query": "", - "top_ads": [ + "top_ads": [], + "bottom_ads": [], + "places": [ { - "ad_visible_url": "www.ibm.com/de-de/cloud", - "ads_link": "/aclk?sa=l&ai=DChcSEwjJ8uy7hKHjAhUJhtUKHSqmA_MYABAAGgJ3cw&sig=AOD64_0cI3jZ1rhFR8yEf5YtReD8f2PBlQ&rct=j&q=&ved=2ahUKEwig4ea7hKHjAhVgTRUIHfv-CQUQ0Qx6BAgXEAE&adurl=", - "ads_link_target": "https://www.ibm.com/de-de/cloud/solutions", - "title": "Die IBM Cloud | Mehr Sicherheit für Ihre Daten‎", - "snippet": "Die IBM Cloud ist die Cloud für smarte Unternehmen. Warum erfahren Sie hier! Mit der IBM Cloud erschließen Sie Ihrem Unternehmen neue Umsatzströme aus Ihren Daten. Cloud-Migration." - } - ], - "bottom_ads": [ - { - "ad_visible_url": "www.hpe.com/Cloud/Service", - "ads_link": "/aclk?sa=L&ai=DChcSEwjJ8uy7hKHjAhUJhtUKHSqmA_MYABADGgJ3cw&sig=AOD64_2VKnwqa309cs9KfVrY2KSK-J3T9w&rct=j&q=&ved=2ahUKEwig4ea7hKHjAhVgTRUIHfv-CQUQ0Qx6BAgYEAE&adurl=", - "ads_link_target": "https://www.hpe.com/de/de/services.html", - "title": "HPE Cloud Service | HPE besuchen und mehr erfahren‎", - "snippet": "Hilfe beim Erstellen einer Roadmap abgestimmt auf Ihre Herausforderungen! Vereinfachter IT-Zyklus. Schnellere Innovationen. Optimierte Infrastruktur. Bestes Partner-Ökosystem. Dienstleistungen: Advisory Services, Professional Services, Operational Sevices, Cloud Services, Applications Services." + "heading": "Approved Automotive", + "rating": "2.7 (3) · Used car dealerClosed ⋅ Opens 8AM Mon", + "contact": "Independence, KS · (620) 331-6223", + "hours": "Closed ⋅ Opens 8AM Mon" }, { - "ad_visible_url": "w3.usa.siemens.com/", - "ads_link": "/aclk?sa=l&ai=DChcSEwjJ8uy7hKHjAhUJhtUKHSqmA_MYABAGGgJ3cw&sig=AOD64_3tMHCSFikvffpErcgjGyDahhVwWQ&rct=j&q=&ved=2ahUKEwig4ea7hKHjAhVgTRUIHfv-CQUQ0Qx6BAgZEAE&adurl=", - "ads_link_target": "https://w3.usa.siemens.com/buildingtechnologies/us/en/Smart_Buildings/digital-services/Pages/analytic-services.aspx?stc=ussi100083&sp_source=ussi100083", - "title": "Analytic Services from Siemens | Making Your Building Smarter‎", - "snippet": "Siemens Uses the Latest Analytical Tools Along with Cloud-Based Services to Identify. and Solve Potential Problems Before They Affect Your Entire Organization." + "heading": "Romans Motor Company", + "rating": "4.4 (38) · Chevrolet dealerClosed ⋅ Opens 8AM Mon\"Great place to get your car worked on.\" \"Great place to get your car worked on.\" ", + "contact": "Independence, KS · (620) 331-4700\"Great place to get your car worked on.\" ", + "hours": "Closed ⋅ Opens 8AM Mon" + }, + { + "heading": "Perl on Eleventh", + "rating": "No reviews · Used car dealerClosed ⋅ Opens 9AM Mon", + "contact": "Coffeyville, KS · (620) 251-4050", + "hours": "Closed ⋅ Opens 9AM Mon" } ], - "places": [], "results": [ { - "link": "https://www.webopedia.com/TERM/C/cloud_services.html", - "title": "What is Cloud Service? Webopedia Definitionhttps://www.webopedia.com/TERM/C/cloud_services.html", - "snippet": "", - "visible_link": "https://www.webopedia.com/TERM/C/cloud_services.html", + "link": "https://www.cars.com/shopping/", + "title": "Used Cars for Sale Online Near Me | Cars.comhttps://www.cars.com/shopping/Cached", + "snippet": "Car-Buying Advice. First-timers and veterans shopping new or used cars: know what to remind yourself to do, ask and learn, from verifying the condition to ...", + "visible_link": "https://www.cars.com/shopping/", "date": "", "rank": 1 }, { - "link": "https://www.webopedia.com/TERM/C/cloud_services.html", - "title": "What is Cloud Service? Webopedia Definitionhttps://www.webopedia.com/TERM/C/cloud_services.html", - "snippet": "", - "visible_link": "https://www.webopedia.com/TERM/C/cloud_services.html", + "link": "https://www.carfax.com/cars-for-sale", + "title": "Used Cars for Sale | with Free CARFAXhttps://www.carfax.com/cars-for-saleCachedSimilar", + "snippet": "A FREE CARFAX report comes with every used car and truck for sale on Carfax.com. ... Find out how much a car is really worth before you buy it with the all-new ...", + "visible_link": "https://www.carfax.com/cars-for-sale", "date": "", "rank": 2 }, { - "link": "https://www.webopedia.com/TERM/C/cloud_services.html", - "title": "What is Cloud Service? Webopedia Definitionhttps://www.webopedia.com/TERM/C/cloud_services.htmlCached", - "snippet": "cloud service. A cloud service is any service made available to users on demand via the Internet from a cloud computing provider's servers as opposed to being provided from a company's own on-premises servers.", - "visible_link": "https://www.webopedia.com/TERM/C/cloud_services.html", + "link": "https://www.autotrader.com/", + "title": "New Cars, Used Cars - Find Cars for Sale and Reviews at Autotraderhttps://www.autotrader.com/Cached", + "snippet": "With millions of cars, finding your next new car or used car and the car ... Buying a car is a big deal -- and saving money is a crucial aspect of any new - or used ...", + "visible_link": "https://www.autotrader.com/", "date": "", "rank": 3 }, { - "link": "https://www.skyhighnetworks.com/cloud-security-blog/what-is-a-cloud-service/", - "title": "What is a Cloud Service? - Skyhigh Networkshttps://www.skyhighnetworks.com/cloud-security-blog/what-is-a-cloud-service/CachedSimilar", - "snippet": "The cloud has been around for many years, yet there is still confusion as to what exactly is considered a cloud service. Read on for the definitive answer.", - "visible_link": "https://www.skyhighnetworks.com/cloud-security-blog/what-is-a-cloud-service/", + "link": "https://www.carmax.com/cars", + "title": "Used Cars for Sale - CarMaxhttps://www.carmax.com/carsCachedSimilar", + "snippet": "Search for new and used cars at carmax.com. Use our car ... 130 Best Used Cars for 2019: Ranked by Price and Type ... What is the best car to buy in 2019?", + "visible_link": "https://www.carmax.com/cars", "date": "", "rank": 4 }, { - "link": "https://searchitchannel.techtarget.com/definition/cloud-services", - "title": "What is cloud services? - Definition from WhatIs.com - SearchITChannelhttps://searchitchannel.techtarget.com/definition/cloud-servicesCached", - "snippet": "Dec 6, 2016 - Cloud services is an umbrella term that may refer to a variety of resources provided over the internet, or to professional services that support the ...", - "visible_link": "https://searchitchannel.techtarget.com/definition/cloud-services", - "date": "Dec 6, 2016 - ", + "link": "https://www.enterprisecarsales.com/usedcars-buy", + "title": "Buy Used Cars - Enterprise Car Saleshttps://www.enterprisecarsales.com/usedcars-buyCached", + "snippet": "Looking to buy a used car? You know Enterprise for our exceptional customer service on rental cars, and you'll experience that same level of service when you ...", + "visible_link": "https://www.enterprisecarsales.com/usedcars-buy", + "date": "", "rank": 5 }, { - "link": "https://azure.microsoft.com/en-us/overview/what-is-cloud-computing/", - "title": "What Is Cloud Computing? A Beginner's Guide | Microsoft Azurehttps://azure.microsoft.com/en-us/overview/what-is-cloud-computing/CachedSimilar", - "snippet": "Simply put, cloud computing is the delivery of computing services—including servers, storage, databases, networking, software, analytics, and intelligence—over ...", - "visible_link": "https://azure.microsoft.com/en-us/overview/what-is-cloud-computing/", + "link": "https://www.truecar.com/used-cars-for-sale/", + "title": "Used Cars For Sale: 1,006,922 Used & Pre-Owned Cars | TrueCarhttps://www.truecar.com/used-cars-for-sale/CachedSimilar", + "snippet": "Buy With Confidence. Get the best used car buying experience when you purchase from a TrueCar Certified Dealer who is dedicated to great service, and ...", + "visible_link": "https://www.truecar.com/used-cars-for-sale/", "date": "", "rank": 6 }, { - "link": "https://www.pcmag.com/article/345308/20-of-the-best-cloud-services-for-smbs", - "title": "The 50 Best Cloud Services for SMBs | PCMag.comhttps://www.pcmag.com/article/345308/20-of-the-best-cloud-services-for-smbsCached", - "snippet": "Oct 23, 2017 - To help you make better decisions about what to buy, we're listing the best cloud services for SMBs, covering topics from project management ...", - "visible_link": "https://www.pcmag.com/article/345308/20-of-the-best-cloud-services-for-smbs", - "date": "Oct 23, 2017 - ", + "link": "https://www.truecar.com/", + "title": "TrueCar: Car Prices, Owner Reviews & Inventory | New & Used Carshttps://www.truecar.com/CachedSimilar", + "snippet": "Shop for new and used cars and trucks. ... Sam's Club, American Express and Chase, giving members who use TrueCar a superior car-buying experience.", + "visible_link": "https://www.truecar.com/", + "date": "", "rank": 7 }, { - "link": "https://en.wikipedia.org/wiki/Cloud_computing", - "title": "Cloud computing - Wikipediahttps://en.wikipedia.org/wiki/Cloud_computingCachedSimilar", - "snippet": "Jump to Software as a service (SaaS) - In the software as a service (SaaS) model, users gain access to application software and databases. Cloud ...", - "visible_link": "https://en.wikipedia.org/wiki/Cloud_computing", - "date": "Jump to Software as a service (SaaS) - ", + "link": "https://www.enterprisecarsales.com/list/buy-a-car-1", + "title": "Buy Used Cars, Find Used Vehicles for Sale - Enterprise Car Saleshttps://www.enterprisecarsales.com/list/buy-a-car-1CachedSimilar", + "snippet": "Browse our vehicle inventory to find reliable used cars for sale right now at Enterprise Car Sales.", + "visible_link": "https://www.enterprisecarsales.com/list/buy-a-car-1", + "date": "", "rank": 8 }, { - "link": "https://www.techopedia.com/definition/29017/cloud-services", - "title": "What are Cloud Services? - Definition from Techopediahttps://www.techopedia.com/definition/29017/cloud-servicesCachedSimilar", - "snippet": "Cloud services refer to any IT services that are provisioned and accessed from a cloud computing provider. This is a broad term that incorporates all delivery and ...", - "visible_link": "https://www.techopedia.com/definition/29017/cloud-services", + "link": "https://www.edmunds.com/used-cars-for-sale/", + "title": "Get the Best Deals on Used Cars For Sale Near You - Shop Used ...https://www.edmunds.com/used-cars-for-sale/CachedSimilar", + "snippet": "Get the best prices on great used cars, trucks and SUVs for sale near you with Edmunds. We have over 5 million cheap ... Buy used with confidence on Edmunds ...", + "visible_link": "https://www.edmunds.com/used-cars-for-sale/", "date": "", "rank": 9 - }, - { - "link": "https://www.techradar.com/news/best-cloud-computing-service", - "title": "Best cloud computing services of 2019 | TechRadarhttps://www.techradar.com/news/best-cloud-computing-serviceCached", - "snippet": "4 days ago - Additionally, cloud services aren't simply about services or resources, but about providing fully fledged IT systems you can use as if you were ...", - "visible_link": "https://www.techradar.com/news/best-cloud-computing-service", - "date": "4 days ago - ", - "rank": 10 - }, - { - "link": "https://aws.amazon.com/what-is-cloud-computing/", - "title": "What is Cloud Computing - Amazon Web Serviceshttps://aws.amazon.com/what-is-cloud-computing/CachedSimilar", - "snippet": "Whether you are using it to run applications that share photos to millions of mobile users or to support business critical operations, a cloud services platform ...", - "visible_link": "https://aws.amazon.com/what-is-cloud-computing/", - "date": "", - "rank": 11 } ] } diff --git a/package.json b/package.json index f2346ef..09682fe 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "se-scraper", - "version": "1.3.13", + "version": "1.3.14", "description": "A module using puppeteer to scrape several search engines such as Google, Duckduckgo, Bing or Baidu", "homepage": "https://scrapeulous.com/", "main": "index.js", diff --git a/src/modules/bing.js b/src/modules/bing.js index f0c4059..5698cfa 100644 --- a/src/modules/bing.js +++ b/src/modules/bing.js @@ -22,9 +22,9 @@ class BingScraper extends Scraper { const ads = []; $('.b_ad .sb_add').each((i, element) => { ads.push({ - ad_visible_url: $(element).find('.b_adurl cite').text(), - ads_link: $(element).find('h2 a').attr('href'), - ads_link_target: $(element).find('h2 link').attr('href'), + visible_link: $(element).find('.b_adurl cite').text(), + tracking_link: $(element).find('h2 a').attr('href'), + link: $(element).find('link').attr('href'), title: $(element).find('h2 a').text(), snippet: $(element).find('.b_caption').text(), }) diff --git a/src/modules/duckduckgo.js b/src/modules/duckduckgo.js index da16f0f..9f1e581 100644 --- a/src/modules/duckduckgo.js +++ b/src/modules/duckduckgo.js @@ -9,7 +9,7 @@ class DuckduckgoScraper extends Scraper { // perform queries const results = []; - $('.result__body').each((i, link) => { + $('#links .result__body').each((i, link) => { results.push({ link: $(link).find('.result__title .result__a').attr('href'), title: $(link).find('.result__title .result__a').text(), @@ -22,8 +22,8 @@ class DuckduckgoScraper extends Scraper { const ads = []; $('.results--ads.has-ad').each((i, element) => { ads.push({ - ad_visible_url: $(element).find('.result__url').text(), - ads_link: $(element).find('.result__title .result__a').attr('href'), + visible_link: $(element).find('.result__url').text(), + tracking_link: $(element).find('.result__title .result__a').attr('href'), title: $(element).find('.result__title .result__a').text(), snippet: $(element).find('.result__snippet').text(), }) diff --git a/src/modules/google.js b/src/modules/google.js index cd68d33..722679e 100644 --- a/src/modules/google.js +++ b/src/modules/google.js @@ -24,29 +24,33 @@ class GoogleScraper extends Scraper { }) }); - // parse top ads - const top_ads = []; - $('#tads .ads-ad').each((i, element) => { - top_ads.push({ - ad_visible_url: $(element).find('.ads-visurl cite').text(), - ads_link: $(element).find('a:first-child').attr('href'), - ads_link_target: $(element).find('a:nth-child(2)').attr('href'), - title: $(element).find('a h3').text(), - snippet: $(element).find('.ads-creative').text(), - }) - }); + // parse ads + let parseAds = (storage, selector) => { + $(selector).each((i, element) => { + let obj = { + visible_link: $(element).find('.ads-visurl cite').text(), + tracking_link: $(element).find('a:first-child').attr('href'), + link: $(element).find('a:nth-child(2)').attr('href'), + title: $(element).find('a h3').text(), + snippet: $(element).find('.ads-creative').text(), + links: [], + }; + $(element).find('ul li a').each((i, el) => { + obj.links.push({ + tracking_link: $(el).attr('data-arwt'), + link: $(el).attr('href'), + title: $(el).text(), + }) + }); + storage.push(obj); + }); + }; - // parse bottom ads + const top_ads = []; const bottomads = []; - $('#tadsb .ads-ad').each((i, element) => { - bottomads.push({ - ad_visible_url: $(element).find('.ads-visurl cite').text(), - ads_link: $(element).find('a:first-child').attr('href'), - ads_link_target: $(element).find('a:nth-child(2)').attr('href'), - title: $(element).find('a h3').text(), - snippet: $(element).find('.ads-creative').text(), - }) - }); + + parseAds(top_ads, '#tads .ads-ad'); + parseAds(bottomads, '#tadsb .ads-ad'); // parse google places const places = []; @@ -143,7 +147,7 @@ class GoogleScraper extends Scraper { } async wait_for_results() { - await this.page.waitForSelector('#fbarcnt', { timeout: this.STANDARD_TIMEOUT }); + await this.page.waitForSelector('#fbar', { timeout: this.STANDARD_TIMEOUT }); } async detected() { diff --git a/src/modules/se_scraper.js b/src/modules/se_scraper.js index 9a37a1b..4559643 100644 --- a/src/modules/se_scraper.js +++ b/src/modules/se_scraper.js @@ -116,6 +116,7 @@ module.exports = class Scraper { if (this.config.log_http_headers === true) { this.metadata.http_headers = await meta.get_http_headers(this.page); + log(this.config, 1, this.metadata.http_headers); } if (this.config.log_ip_address === true) { diff --git a/test/test_bing.js b/test/test_bing.js index 18ff0b0..56b1993 100644 --- a/test/test_bing.js +++ b/test/test_bing.js @@ -233,21 +233,21 @@ function test_case_ads_test(response) { assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars'); assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date'); - assert.isAtLeast(obj.ads.length, 2, 'ads must have at least 2 SERP object'); + assert.isAtLeast(obj.ads.length, 2, 'ads must have at least 2 SERP objects'); for (let res of obj.ads) { - assert.isOk(res.ads_link, 'link must be ok'); - assert.typeOf(res.ads_link, 'string', 'link must be string'); - assert.isAtLeast(res.ads_link.length, 5, 'link must have at least 5 chars'); + assert.isOk(res.tracking_link, 'link must be ok'); + assert.typeOf(res.tracking_link, 'string', 'link must be string'); + assert.isAtLeast(res.tracking_link.length, 5, 'link must have at least 5 chars'); - assert.isOk(res.ads_link_target, 'link must be ok'); - assert.typeOf(res.ads_link_target, 'string', 'link must be string'); - assert.isAtLeast(res.ads_link_target.length, 5, 'link must have at least 5 chars'); + // assert.isOk(res.link, 'link must be ok'); + // assert.typeOf(res.link, 'string', 'link must be string'); + // assert.isAtLeast(res.link.length, 5, 'link must have at least 5 chars'); - assert.isOk(res.ad_visible_url, 'visible_link must be ok'); - assert.typeOf(res.ad_visible_url, 'string', 'visible_link must be string'); - assert.isAtLeast(res.ad_visible_url.length, 5, 'visible_link must have at least 5 chars'); + assert.isOk(res.visible_link, 'visible_link must be ok'); + assert.typeOf(res.visible_link, 'string', 'visible_link must be string'); + assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars'); assert.isOk(res.title, 'title must be ok'); assert.typeOf(res.title, 'string', 'title must be string'); @@ -257,14 +257,10 @@ function test_case_ads_test(response) { assert.typeOf(res.snippet, 'string', 'snippet must be string'); assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars'); } - } } } - - - describe('Bing', function(){ this.timeout(30000); it('normal search', normal_search_test); diff --git a/test/test_duckduckgo.js b/test/test_duckduckgo.js index 2199d59..4530faf 100644 --- a/test/test_duckduckgo.js +++ b/test/test_duckduckgo.js @@ -1,10 +1,7 @@ const se_scraper = require('./../index.js'); -var assert = require('chai').assert; - -/* - * Use chai and mocha for tests. - * https://mochajs.org/#installation - */ +const chai = require('chai'); +chai.use(require('chai-string')); +const assert = chai.assert; const normal_search_keywords = ['apple tree', 'weather tomorrow']; @@ -12,12 +9,9 @@ async function normal_search_test() { let config = { compress: false, debug_level: 1, - keyword_file: '', headless: true, - output_file: '', - block_assets: true, - user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36', - random_user_agent: false, + block_assets: false, + random_user_agent: true, }; let scrape_config = { @@ -83,12 +77,9 @@ async function effective_query_test() { let config = { compress: false, debug_level: 1, - keyword_file: '', headless: true, - output_file: '', block_assets: true, - user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36', - random_user_agent: false, + random_user_agent: true, }; let scrape_config = { @@ -129,7 +120,72 @@ function test_case_effective_query(response) { } } -(async () => { - await normal_search_test(); - await effective_query_test(); -})(); \ No newline at end of file +const ads_keywords = ['cloud services', 'buy shoes']; + +async function ads_test() { + let config = { + compress: false, + debug_level: 1, + headless: true, + block_assets: false, + random_user_agent: false, + }; + + let scrape_config = { + search_engine: 'duckduckgo', + keywords: ads_keywords, + num_pages: 1, + }; + + console.log('ads_test()'); + test_case_ads_test( await se_scraper.scrape(config, scrape_config) ); +} + +function test_case_ads_test(response) { + assert.equal(response.metadata.num_requests, 2); + + for (let query in response.results) { + + assert.containsAllKeys(response.results, ads_keywords, 'not all keywords were scraped.'); + + for (let page_number in response.results[query]) { + + assert.isNumber(parseInt(page_number), 'page_number must be numeric'); + + let obj = response.results[query][page_number]; + + assert.containsAllKeys(obj, ['results', 'time', 'effective_query', 'ads'], 'not all keys are in the object'); + + assert.isAtLeast(obj.results.length, 6, 'results must have at least 6 SERP objects'); + assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date'); + assert.isAtLeast(obj.ads.length, 2, 'ads must have at least 2 SERP object'); + + for (let res of obj.ads) { + + assert.isOk(res.tracking_link, 'link must be ok'); + assert.typeOf(res.tracking_link, 'string', 'link must be string'); + assert.isAtLeast(res.tracking_link.length, 5, 'link must have at least 5 chars'); + + assert.isOk(res.visible_link, 'visible_link must be ok'); + assert.typeOf(res.visible_link, 'string', 'visible_link must be string'); + assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars'); + + assert.isOk(res.title, 'title must be ok'); + assert.typeOf(res.title, 'string', 'title must be string'); + assert.isAtLeast(res.title.length, 10, 'title must have at least 10 chars'); + + assert.isOk(res.snippet, 'snippet must be ok'); + assert.typeOf(res.snippet, 'string', 'snippet must be string'); + assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars'); + } + } + } +} + + +describe('Duckduckgo', function(){ + this.timeout(30000); + it('normal search', normal_search_test); + it('effective query', effective_query_test); + it('finds ads', ads_test); +}); \ No newline at end of file diff --git a/test/test_google.js b/test/test_google.js index 5b7fbc7..2099f3e 100644 --- a/test/test_google.js +++ b/test/test_google.js @@ -221,7 +221,7 @@ function check_html_output_test_case( response ) { } } -const ads_keywords = ['cloud services', 'buy shoes']; +const ads_keywords = ['cloud services', 'auto kaufen']; async function ads_test() { let config = { @@ -229,7 +229,7 @@ async function ads_test() { debug_level: 1, headless: true, block_assets: false, - random_user_agent: true, + random_user_agent: false, // dont try to trick google with ads }; let scrape_config = { @@ -263,22 +263,21 @@ function test_case_ads_test(response) { assert.isAtLeast(obj.num_results.length, 5, 'num_results should be a string of at least 5 chars'); assert.typeOf(Date.parse(obj.time), 'number', 'time should be a valid date'); - assert.isAtLeast(obj.top_ads.length, 1, 'top_ads must have at least 1 SERP object'); - assert.isAtLeast(obj.bottom_ads.length, 1, 'bottom_ads must have at least 1 SERP object'); + assert(obj.top_ads.length >= 1 || obj.bottom_ads.length >= 1, 'top_ads or bottom_ads must have at least 1 SERP object'); for (let res of obj.top_ads) { - assert.isOk(res.ads_link, 'link must be ok'); - assert.typeOf(res.ads_link, 'string', 'link must be string'); - assert.isAtLeast(res.ads_link.length, 5, 'link must have at least 5 chars'); + assert.isOk(res.tracking_link, 'link must be ok'); + assert.typeOf(res.tracking_link, 'string', 'link must be string'); + assert.isAtLeast(res.tracking_link.length, 5, 'link must have at least 5 chars'); - assert.isOk(res.ads_link_target, 'link must be ok'); - assert.typeOf(res.ads_link_target, 'string', 'link must be string'); - assert.isAtLeast(res.ads_link_target.length, 5, 'link must have at least 5 chars'); + assert.isOk(res.visible_link, 'link must be ok'); + assert.typeOf(res.visible_link, 'string', 'link must be string'); + assert.isAtLeast(res.visible_link.length, 5, 'link must have at least 5 chars'); - assert.isOk(res.ad_visible_url, 'visible_link must be ok'); - assert.typeOf(res.ad_visible_url, 'string', 'visible_link must be string'); - assert.isAtLeast(res.ad_visible_url.length, 5, 'visible_link must have at least 5 chars'); + assert.isOk(res.visible_link, 'visible_link must be ok'); + assert.typeOf(res.visible_link, 'string', 'visible_link must be string'); + assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars'); assert.isOk(res.title, 'title must be ok'); assert.typeOf(res.title, 'string', 'title must be string'); @@ -287,21 +286,22 @@ function test_case_ads_test(response) { assert.isOk(res.snippet, 'snippet must be ok'); assert.typeOf(res.snippet, 'string', 'snippet must be string'); assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars'); + + assert.typeOf(res.links, 'array', 'snippet must be array'); } for (let res of obj.bottom_ads) { + assert.isOk(res.tracking_link, 'link must be ok'); + assert.typeOf(res.tracking_link, 'string', 'link must be string'); + assert.isAtLeast(res.tracking_link.length, 5, 'link must have at least 5 chars'); - assert.isOk(res.ads_link, 'link must be ok'); - assert.typeOf(res.ads_link, 'string', 'link must be string'); - assert.isAtLeast(res.ads_link.length, 5, 'link must have at least 5 chars'); + assert.isOk(res.visible_link, 'link must be ok'); + assert.typeOf(res.visible_link, 'string', 'link must be string'); + assert.isAtLeast(res.visible_link.length, 5, 'link must have at least 5 chars'); - assert.isOk(res.ads_link_target, 'link must be ok'); - assert.typeOf(res.ads_link_target, 'string', 'link must be string'); - assert.isAtLeast(res.ads_link_target.length, 5, 'link must have at least 5 chars'); - - assert.isOk(res.ad_visible_url, 'visible_link must be ok'); - assert.typeOf(res.ad_visible_url, 'string', 'visible_link must be string'); - assert.isAtLeast(res.ad_visible_url.length, 5, 'visible_link must have at least 5 chars'); + assert.isOk(res.visible_link, 'visible_link must be ok'); + assert.typeOf(res.visible_link, 'string', 'visible_link must be string'); + assert.isAtLeast(res.visible_link.length, 5, 'visible_link must have at least 5 chars'); assert.isOk(res.title, 'title must be ok'); assert.typeOf(res.title, 'string', 'title must be string'); @@ -310,6 +310,8 @@ function test_case_ads_test(response) { assert.isOk(res.snippet, 'snippet must be ok'); assert.typeOf(res.snippet, 'string', 'snippet must be string'); assert.isAtLeast(res.snippet.length, 10, 'snippet must have at least 10 chars'); + + assert.typeOf(res.links, 'array', 'snippet must be array'); } } @@ -322,5 +324,5 @@ describe('Google', function(){ it('no results', no_results_test); it('effective query', effective_query_test); it('html output query', html_output_query_test); - it('finds ads', ads_test); + it('ads', ads_test); });