se-scraper/src/modules/google.js

1386 lines
45 KiB
JavaScript

'use strict';
const cheerio = require('cheerio');
const Scraper = require('./se_scraper');
class GoogleScraper extends Scraper {
constructor(...args) {
super(...args);
}
async parse_async(html) {
const results = await this.page.evaluate(() => {
let _text = (el, s, onlyFirstTextNode) => {
let n = el.querySelector(s);
if (n) {
return (onlyFirstTextNode) ? n.childNodes[0].nodeValue : n.innerText;
} else {
return;
}
};
let _attr = (el, s, attr) => {
let n = el.querySelector(s);
if (n) {
return n.getAttribute(attr);
} else {
return;
}
};
let results = {
num_results: '',
no_results: false,
effective_query: '',
right_info: {},
results: [],
top_products: [],
right_products: [],
top_ads: [],
bottom_ads: [],
places: [],
};
let num_results_el = document.getElementById('resultStats');
if (num_results_el) {
results.num_results = num_results_el.innerText;
}
let organic_results = document.querySelectorAll('#center_col .g');
organic_results.forEach((el) => {
let serp_obj = {
link: _attr(el, '.r a', 'href'),
title: _text(el, '.r a h3'),
snippet: _text(el, 'span.st'),
visible_link: _text(el, '.r cite'),
date: _text(el, 'span.f'),
};
if (serp_obj.date) {
serp_obj.date = serp_obj.date.replace(' - ', '');
}
results.results.push(serp_obj);
});
// check if no results
results.no_results = (results.results.length === 0);
let parseAds = (container, selector) => {
document.querySelectorAll(selector).forEach((el) => {
let ad_obj = {
visible_link: _text(el, '.ads-visurl cite'),
tracking_link: _attr(el, 'a:first-child', 'href'),
link: _attr(el, 'a:nth-child(2)', 'href'),
title: _text(el, 'a h3'),
snippet: _text(el, '.ads-creative'),
links: [],
};
el.querySelectorAll('ul li a').forEach((node) => {
ad_obj.links.push({
tracking_link: node.getAttribute('data-arwt'),
link: node.getAttribute('href'),
title: node.innerText,
})
});
container.push(ad_obj);
});
};
parseAds(results.top_ads, '#tads .ads-ad');
parseAds(results.bottom_ads, '#tadsb .ads-ad');
// parse google places
document.querySelectorAll('.rllt__link').forEach((el) => {
results.places.push({
heading: _text(el, '[role="heading"] span'),
rating: _text(el, '.rllt__details div:first-child'),
contact: _text(el, '.rllt__details div:nth-child(2)'),
hours: _text(el, '.rllt__details div:nth-child(3)'),
})
});
// parse right side product information
results.right_info.review = _attr(document, '#rhs .cu-container g-review-stars span', 'aria-label');
let title_el = document.querySelector('#rhs .cu-container .Q7Oxbd');
if (title_el) {
results.right_info.title = title_el.innerText;
}
let num_reviews_el = document.querySelector('#rhs .cu-container .PGDKUd');
if (num_reviews_el) {
results.right_info.num_reviews = num_reviews_el.innerText;
}
results.right_info.vendors = [];
results.right_info.info = _text(document, '#rhs_block > div > div > div > div:nth-child(5) > div > div');
document.querySelectorAll('#rhs .cu-container .rhsvw > div > div:nth-child(4) > div > div:nth-child(3) > div').forEach((el) => {
results.right_info.vendors.push({
price: _text(el, 'span:nth-of-type(1)'),
merchant_name: _text(el, '.doUe3s0oL2B__jackpot-merchant a'),
merchant_ad_link: _attr(el, 'span:nth-child(3) a:first-child', 'href'),
merchant_link: _attr(el, 'span:nth-child(3) a:nth-child(2)', 'href'), // TODO this is not working anymore
source_name: _text(el, 'span:nth-child(4) a'),
source_link: _attr(el, 'span:nth-child(4) a', 'href'),
info: _text(el, '.SdBHnc.e2CF7c'),
shipping: _text(el, '.JfwJme'),
})
});
let right_side_info_el = document.getElementById('rhs');
if (right_side_info_el) {
let right_side_info_text = right_side_info_el.innerText;
if (right_side_info_text && right_side_info_text.length > 0) {
results.right_side_info_text = right_side_info_text;
}
}
// Parse Google Shopping top or left
document.querySelectorAll('.pla-unit').forEach((el) => {
let top_product = {
tracking_link: _attr(el, '.pla-unit-title a:first-child', 'href'),
link: _attr(el, '.pla-unit-title a:nth-child(2)', 'href'),
title: _text(el, '.pla-unit-title a:nth-child(2) span'),
price: _text(el, '.pla-unit-title + div', true),
originalPrice: _text(el, '.pla-unit-title + div > span'),
shipping: _text(el, '.pla-extensions-container .cYBBsb'),
vendor_link: _attr(el,'.pla-extensions-container a.FfKHB', 'href'),
merchant_name: _text(el,'.LbUacb span:nth-child(1)'),
};
results.top_products.push(top_product);
});
// parse top right product information
// #tvcap .pla-unit
document.querySelectorAll('#rhs_block .pla-unit').forEach((el) => {
let right_product = {
tracking_link: _attr(el, '.pla-unit-title a:first-child', 'href'),
link: _attr(el, '.pla-unit-title a:nth-child(2)', 'href'),
title: _text(el, '.pla-unit-title a:nth-child(2) span:first-child'),
price: _text(el,'.pla-unit-title + div'),
shipping: _text(el,'.pla-extensions-container > div'),
vendor_link: _text(el,'.pla-extensions-container div > a'),
vendor_name: _text(el,'.pla-extensions-container div > a > div'),
};
let merchant_node = el.querySelector('.pla-unit-title');
if (merchant_node) {
let node = merchant_node.parentNode.querySelector('div > span:first-child');
if (node) {
right_product.merchant_name = node.innerText;
}
}
results.right_products.push(right_product);
});
let effective_query_el = document.getElementById('fprsl');
if (effective_query_el) {
results.effective_query = effective_query_el.innerText;
if (!results.effective_query) {
let effective_query_el2 = document.querySelector('#fprs a');
if (effective_query_el2) {
results.effective_query = document.querySelector('#fprs a').innerText;
}
}
}
return results;
});
// clean some results
results.top_products = this.clean_results(results.top_products, ['title', 'link']);
results.right_products = this.clean_results(results.right_products, ['title', 'link']);
results.results = this.clean_results(results.results, ['title', 'link' , 'snippet']);
results.time = (new Date()).toUTCString();
return results;
}
async load_start_page() {
let startUrl = 'https://www.google.com';
if (this.config.google_settings) {
startUrl = `https://www.${this.config.google_settings.google_domain}/search?q=`;
if (this.config.google_settings.google_domain) {
startUrl = `https://www.${this.config.google_settings.google_domain}/search?`;
} else {
startUrl = `https://www.google.com/search?`;
}
for (var key in this.config.google_settings) {
if (key !== 'google_domain') {
startUrl += `${key}=${this.config.google_settings[key]}&`
}
}
}
this.logger.info('Using startUrl: ' + startUrl);
this.last_response = await this.page.goto(startUrl);
await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
const buttonAccepted = await this.page.$('#L2AGLb');
if (buttonAccepted) {
await buttonAccepted.evaluate((e) =>
e.scrollIntoView({ block: "center", inline: "center" })
);
await buttonAccepted.click();
}
return true;
}
async search_keyword(keyword) {
const input = await this.page.$('input[name="q"]');
await this.set_input_value(`input[name="q"]`, keyword);
await this.sleep(50);
await input.focus();
await this.page.keyboard.press("Enter");
}
async next_page() {
let next_page_link = await this.page.$('#pnnext', {timeout: 1000});
if (!next_page_link) {
return false;
}
await next_page_link.click();
return true;
}
async wait_for_results() {
await this.page.waitForSelector('#fbar', { timeout: this.STANDARD_TIMEOUT });
}
async detected() {
const title = await this.page.title();
let html = await this.page.content();
return html.indexOf('detected unusual traffic') !== -1 || title.indexOf('/sorry/') !== -1;
}
}
class GoogleNewsOldScraper extends Scraper {
parse(html) {
const $ = cheerio.load(html);
// perform queries
const results = [];
$('g-card').each((i, result) => {
results.push({
link: $(result).find('a').attr('href'),
title: $(result).find('a div div:nth-child(2) div:nth-child(2)').text(),
snippet: $(result).find('a div div:nth-child(2) div:nth-child(3) div:nth-child(1)').text(),
date: $(result).find('a div div:nth-child(2) div:nth-child(3) div:nth-child(2)').text(),
})
});
let no_results = this.no_results(
['Es wurden keine mit deiner Suchanfrage', 'did not match any documents', 'Keine Ergebnisse für',
'No results found for', 'did not match any news results'],
$('#main').text()
);
let effective_query = $('#fprsl').text() || '';
if (!effective_query) {
effective_query = $('#fprs a').text()
}
const cleaned = this.clean_results(results, ['link']);
return {
time: (new Date()).toUTCString(),
results: cleaned,
no_results: no_results,
effective_query: effective_query,
}
}
async load_start_page() {
let startUrl = this.build_start_url('https://www.google.com/search?source=lnms&tbm=nws&') || 'https://www.google.com/search?source=lnms&tbm=nws';
await this.page.goto(startUrl);
await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
return true;
}
async search_keyword(keyword) {
let url = this.build_start_url(`https://www.google.com/search?q=${keyword}&source=lnms&tbm=nws&`) ||
`https://www.google.com/search?q=${keyword}&hl=en&source=lnms&tbm=nws`;
this.last_response = await this.page.goto(url, {
referer: 'https://www.google.com/'
});
await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
}
async next_page() {
let next_page_link = await this.page.$('#pnnext', {timeout: 1000});
if (!next_page_link) {
return false;
}
await next_page_link.click();
return true;
}
async wait_for_results() {
await this.page.waitForSelector('#rso', { timeout: this.STANDARD_TIMEOUT });
}
async detected() {
const title = await this.page.title();
let html = await this.page.content();
return html.indexOf('detected unusual traffic') !== -1 || title.indexOf('/sorry/') !== -1;
}
}
class GoogleImageScraper extends Scraper {
parse(html) {
// load the page source into cheerio
const $ = cheerio.load(html);
// perform queries
const results = [];
$('.rg_bx').each((i, link) => {
let link_element = $(link).find('a.rg_l').attr('href');
let clean_link = clean_image_url(link_element);
results.push({
link: link_element,
clean_link: clean_link,
snippet: $(link).find('.a-no-hover-decoration').text(),
})
});
let no_results = this.no_results(
['stimmt mit keinem Bildergebnis', 'Keine Ergebnisse für', 'not match any image results', 'No results found for',],
$('#main').text()
);
let effective_query = $('#fprsl').text() || '';
if (!effective_query) {
effective_query = $('#fprs a').text();
}
const cleaned = this.clean_results(results, ['link']);
return {
time: (new Date()).toUTCString(),
no_results: no_results,
results: cleaned,
effective_query: effective_query
}
}
async load_start_page() {
try {
this.last_response = await this.page.goto(`https://www.google.com/imghp?tbm=isch`, {
referer: 'https://www.google.com/'
});
await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
} catch (e) {
return false;
}
return true;
}
async search_keyword(keyword) {
const input = await this.page.$('input[name="q"]');
await this.set_input_value(`input[name="q"]`, keyword);
await this.sleep(50);
await input.focus();
await this.page.keyboard.press("Enter");
// this waitForNavigation makes hardcoded sleeps not necessary
this.last_response = await this.page.waitForNavigation();
}
async next_page() {
return false;
}
async wait_for_results() {
await this.page.waitForSelector('.rg_bx .a-no-hover-decoration div', {timeout: this.STANDARD_TIMEOUT});
}
async detected() {
const title = await this.page.title();
let html = await this.page.content();
return html.indexOf('detected unusual traffic') !== -1 || title.indexOf('/sorry/') !== -1;
}
}
class GoogleNewsScraper extends Scraper {
parse(html) {
const $ = cheerio.load(html);
// perform queries
const results = [];
$('article > h3').each((i, headline) => {
let title = $(headline).find('a').text();
try {
var snippet = $(headline).parent().find('p').text();
var link = $(headline).find('a').attr('href');
var date = $(headline).parent().parent().parent().find('time').text();
var ts = $(headline).parent().parent().parent().find('time').attr('datetime');
} catch(e) {
}
if (!this.all_results.has(title)) {
results.push({
rank: i+1,
title: title,
snippet: snippet,
link: link,
date: date,
ts: ts,
});
}
this.all_results.add(title);
});
let no_results = this.no_results(
['Es wurden keine mit deiner Suchanfrage', 'did not match any documents', 'Keine Ergebnisse für',
'No results found for', 'did not match any news results'],
$('body').text()
);
let effective_query = $('#fprsl').text() || '';
const cleaned = this.clean_results(results, ['title',]);
return {
time: (new Date()).toUTCString(),
results: cleaned,
no_results: no_results,
effective_query: effective_query,
}
}
async load_start_page() {
try {
this.all_results = new Set();
this.last_response = await this.page.goto(`https://news.google.com/?hl=en-US&gl=US&ceid=US:en`, {
referer: 'https://news.google.com'
});
await this.page.waitForSelector('div input:nth-child(2)', {timeout: this.STANDARD_TIMEOUT});
await this.sleep(1000);
// parse here front page results
let html = await this.page.content();
this.results['frontpage'] = this.parse(html);
this.result_rank = 1;
} catch(e) {
return false;
}
return true;
}
async search_keyword(keyword) {
await this.page.waitForSelector('div input:nth-child(2)', { timeout: this.STANDARD_TIMEOUT });
const input = await this.page.$('div input:nth-child(2)');
// overwrites last text in input
await input.click({ clickCount: 3 });
await input.type(keyword);
await this.sleep(50);
await input.focus();
await this.page.keyboard.press("Enter");
}
async next_page() {
// google news app does not have next pages
return false;
}
async wait_for_results() {
await this.page.waitForSelector(`[data-n-q="${this.keyword}"]`, { timeout: this.STANDARD_TIMEOUT });
await this.sleep(1000);
// TODO: fix googlenewsscraper
// let nodes = await this.page.evaluate(() => {
// var res = [];
// document.querySelectorAll('article > h3').forEach((node) => {
// try {
// let title = node.querySelector('a span').innerHTML;
// var snippet = node.querySelector('p').innerHTML;
// var link = node.querySelector('a').getAttribute('href');
// res.push({
// title: tile,
// snippet: snippet,
// link: link
// });
// } catch(e) {
// }
// return res;
// });
// });
}
async detected() {
const title = await this.page.title();
let html = await this.page.content();
return html.indexOf('detected unusual traffic') !== -1 || title.indexOf('/sorry/') !== -1;
}
}
class GoogleMapsScraper extends Scraper {
constructor(...args) {
super(...args);
}
async parse_async(html) {
let results = await this.page.evaluate(() => {
var res = [];
document.querySelectorAll('.section-listbox-root .section-result').forEach((node) => {
try {
let score = node.querySelector('.cards-rating-score').innerHTML;
let num_ratings = node.querySelector('.section-result-num-ratings').innerHTML;
let type = node.querySelector('.section-result-details').innerHTML;
let title = node.querySelector('.section-result-title span').innerHTML;
let location = node.querySelector('.section-result-location').innerHTML;
let opening_hours = node.querySelector('.section-result-opening-hours').innerHTML;
res.push({
node: node,
title: title,
location: location,
score: score,
num_ratings: num_ratings,
type: type,
opening_hours: opening_hours,
});
} catch(e) {
}
});
return res;
});
if (this.scrape_in_detail) {
let profiles = await this.page.$$('.section-listbox-root .section-result');
console.log(`Profiles to visit: ${profiles.length}`);
for (var profile of profiles) {
try {
let additional_info = await this.visit_profile(profile);
console.log(additional_info);
} catch(e) {
console.error(e);
}
profiles = await this.page.$$('.section-listbox-root .section-result');
}
}
return {
time: (new Date()).toUTCString(),
results: results
}
}
/*
https://stackoverflow.com/questions/55815376/puppeteer-open-a-page-get-the-data-go-back-to-the-previous-page-enter-a-new
*/
async visit_profile(profile) {
await profile.click();
await this.page.waitForFunction('document.querySelectorAll(".section-info-line .section-info-text").length > 0', {timeout: 5000});
let results = await this.page.evaluate(() => {
let res = [];
document.querySelectorAll('.section-info-line .section-info-text .widget-pane-link').forEach((node) => {
try {
let info = node.innerHTML.trim();
if (info) {
res.push(info);
}
} catch(e) {
}
});
return res;
});
let back_button = await this.page.$('.section-back-to-list-button', {timeout: 10000});
if (back_button) {
await back_button.click();
}
return results;
}
async load_start_page() {
let startUrl = 'https://www.google.com/maps';
if (this.config.google_maps_settings) {
// whether to visit each result and get all available information
// including customer reviews
this.scrape_in_detail = this.config.google_maps_settings.scrape_in_detail || false;
}
this.logger.info('Using startUrl: ' + startUrl);
this.last_response = await this.page.goto(startUrl);
try {
await this.page.waitForSelector('#searchbox input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
} catch (e) {
return false;
}
return true;
}
async search_keyword(keyword) {
const input = await this.page.$('#searchbox input[name="q"]');
await this.set_input_value(`#searchbox input[name="q"]`, keyword);
await this.sleep(50);
await input.focus();
await this.page.keyboard.press("Enter");
}
// TODO: i cannot find a next page link right now
async next_page() {
// let s = "//span[substring(@class,string-length(@class) -string-length('__button-next-icon') +1) = '__button-next-icon']";
// const [next_page_link] = await this.page.$x(s, {timeout: 2000});
let next_page_link = await this.page.$('[jsaction="pane.paginationSection.nextPage"] span', {timeout: 10000});
if (!next_page_link) {
return false;
}
await next_page_link.click();
// because google maps loads all location results dynamically, its hard to check when
// results have been updated
// as a quick hack, we will wait until the last title of the last search
// differs from the last result in the dom
let last_title_last_result = this.results[this.keyword][this.page_num-1].results.slice(-1)[0].title;
this.logger.info(`Waiting until new last serp title differs from: "${last_title_last_result}"`);
await this.page.waitForFunction((last_title) => {
const res = document.querySelectorAll('.section-result .section-result-title span');
return res[res.length-1].innerHTML !== last_title;
}, {timeout: 7000}, this.results[this.keyword][this.page_num-1].results.slice(-1)[0].title);
return true;
}
async wait_for_results() {
await this.page.waitForSelector('.section-listbox-root .section-result', { timeout: this.STANDARD_TIMEOUT });
// more than 1 result
await this.page.waitForFunction("document.querySelectorAll('.section-result').length > 0", { timeout: 5000 });
await this.page.waitForNavigation();
}
async detected() {
const title = await this.page.title();
let html = await this.page.content();
return html.indexOf('detected unusual traffic') !== -1 || title.indexOf('/sorry/') !== -1;
}
}
class GoogleShoppingScraper extends Scraper {
constructor(...args) {
super(...args);
}
parse(html) {
// load the page source into cheerio
const $ = cheerio.load(html);
const results = [];
$('.sh-dlr__list-result').each((i, link) => {
results.push({
price: $(link).find('.sh-dlr__content div:nth-child(2) span > span').text(),
link: $(link).find('.sh-dlr__thumbnail a').attr('href'),
title: $(link).find('div > div > a[data-what="1"]').text(),
info1: $(link).find('.sh-dlr__content div:nth-child(2)').text(),
info2: $(link).find('.sh-dlr__content div:nth-child(3)').text(),
info3: $(link).find('.sh-dlr__content div:nth-child(4)').text(),
})
});
const grid_results = [];
$('.sh-pr__product-results-grid .sh-dgr__grid-result').each((i, link) => {
grid_results.push({
price: $(link).find('.sh-dgr__content div:nth-child(2) span').text(),
link: $(link).find('.sh-dgr__content a').attr('href'),
title: $(link).find('.sh-dgr__content a').text(),
info: $(link).find('.sh-dgr__content').text(),
})
});
// 'Ergebnisse für', 'Showing results for'
let no_results = this.no_results(
['Es wurden keine mit deiner Suchanfrage', 'did not match any documents', 'Keine Ergebnisse für',
'No results found for'],
$('#main').text()
);
const cleaned = this.clean_results(results, ['title', 'link']);
return {
time: (new Date()).toUTCString(),
no_results: no_results,
results: cleaned,
grid_results: grid_results,
}
}
async load_start_page() {
let startUrl = 'https://www.google.com/shopping?';
if (this.config.google_settings) {
startUrl = `https://www.${this.config.google_settings.google_domain}/shopping?q=`;
if (this.config.google_settings.google_domain) {
startUrl = `https://www.${this.config.google_settings.google_domain}/shopping?`;
} else {
startUrl = `https://www.google.com/shopping?`;
}
for (var key in this.config.google_settings) {
if (key !== 'google_domain') {
startUrl += `${key}=${this.config.google_settings[key]}&`
}
}
}
this.logger.info('Using startUrl: ' + startUrl);
this.last_response = await this.page.goto(startUrl);
try {
await this.page.waitForSelector('input[name="q"]', { timeout: this.STANDARD_TIMEOUT });
} catch (e) {
return false;
}
return true;
}
async search_keyword(keyword) {
const input = await this.page.$('input[name="q"]');
await this.set_input_value(`input[name="q"]`, keyword);
await this.sleep(50);
await input.focus();
await this.page.keyboard.press("Enter");
}
async next_page() {
let next_page_link = await this.page.$('#pnnext', {timeout: 1000});
if (!next_page_link) {
return false;
}
await next_page_link.click();
return true;
}
async wait_for_results() {
await this.page.waitForSelector('#fbar', { timeout: this.STANDARD_TIMEOUT });
}
async detected() {
const title = await this.page.title();
let html = await this.page.content();
return html.indexOf('detected unusual traffic') !== -1 || title.indexOf('/sorry/') !== -1;
}
}
function clean_image_url(url) {
// Example:
// https://www.google.com/imgres?imgurl=https%3A%2F%2Fupload.wikimedia.org%2Fwikipedia%2Fen%2Fthumb%2Ff%2Ffd%2F1928_Edward_Campbell.jpg%2F220px-1928_Edward_Campbell.jpg&imgrefurl=https%3A%2F%2Fwww.revolvy.com%2Fpage%2FSir-Edward-Campbell%252C-1st-Baronet&docid=BMkW_GerTIY4GM&tbnid=TmQapIxDCQbQhM%3A&vet=10ahUKEwje_LLE_YXeAhXisaQKHVAEBSAQMwiNAShEMEQ..i&w=220&h=290&bih=1696&biw=1280&q=John%20MacLeod%20Breadalbane%20Councillor%20Prince%20Edward%20Island&ved=0ahUKEwje_LLE_YXeAhXisaQKHVAEBSAQMwiNAShEMEQ&iact=mrc&uact=8
const regex = /imgurl=(.*?)&/gm;
let match = regex.exec(url);
if (match !== null) {
return decodeURIComponent(match[1]);
}
}
function clean_google_url(url) {
// Example:
// /url?q=https://www.zeit.de/thema/donald-trump&sa=U&ved=0ahUKEwiL9-u-_ZLgAhVJsqQKHeITDAoQFgg0MAc&usg=AOvVaw3JV3UZjTXRwaS2I-sBbeXF
// /search?q=trump&hl=de&gbv=2&ie=UTF-8&prmd=ivns&source=univ&tbm=nws&tbo=u&sa=X&ved=0ahUKEwiL9-u-_ZLgAhVJsqQKHeITDAoQqAIIFA
const regex = /url\?q=(.*?)&/gm;
let match = regex.exec(url);
if (match !== null) {
return decodeURIComponent(match[1]);
} else {
return url;
}
}
module.exports = {
GoogleShoppingScraper: GoogleShoppingScraper,
GoogleNewsOldScraper: GoogleNewsOldScraper,
GoogleScraper: GoogleScraper,
GoogleImageScraper: GoogleImageScraper,
GoogleNewsScraper: GoogleNewsScraper,
GoogleMapsScraper: GoogleMapsScraper,
};
// https://developers.google.com/custom-search/v1/cse/list
const GOOGLE_DOMAINS = {
'Samoa': 'google.ws',
'Vanuatu': 'google.vu',
'British Virgin Islands': 'google.vg',
'Trinidad and Tobago': 'google.tt',
'Tonga': 'google.to',
'Tunisia': 'google.tn',
'Turkmenistan': 'google.tm',
'Timor-Leste': 'google.tl',
'Tokelau': 'google.tk',
'Togo': 'google.tg',
'Chad': 'google.td',
'São Tomé and Príncipe': 'google.st',
'Suriname': 'google.sr',
'Somalia': 'google.so',
'Senegal': 'google.sn',
'San Marino': 'google.sm',
'Slovakia': 'google.sk',
'Slovenia': 'google.si',
'Saint Helena, Ascension and Tristan da Cunha': 'google.sh',
'Sweden': 'google.se',
'Seychelles': 'google.sc',
'Rwanda': 'google.rw',
'Russia': 'google.ru',
'Serbia': 'google.rs',
'Romania': 'google.ro',
'Portugal': 'google.pt',
'Palestine[3]': 'google.ps',
'Pitcairn Islands': 'google.co.pn',
'Poland': 'google.pl',
'Niue': 'google.nu',
'Nauru': 'google.nr',
'Norway': 'google.no',
'Netherlands': 'google.nl',
'Niger': 'google.ne',
'Malawi': 'google.mw',
'Maldives': 'google.mv',
'Mauritius': 'google.mu',
'Montserrat': 'google.ms',
'Mongolia': 'google.mn',
'Mali': 'google.ml',
'Macedonia': 'google.mk',
'Madagascar': 'google.mg',
'Montenegro': 'google.me',
'Moldova': 'google.md',
'Latvia': 'google.lv',
'Luxembourg': 'google.lu',
'Lithuania': 'google.lt',
'Sri Lanka': 'google.lk',
'Liechtenstein': 'google.li',
'Laos': 'google.la',
'Kazakhstan': 'google.kz',
'Kiribati': 'google.ki',
'Kyrgyzstan': 'google.kg',
'Jordan': 'google.jo',
'Jersey': 'google.je',
'Italy': 'google.it',
'Iceland': 'google.is',
'Iraq': 'google.iq',
'British Indian Ocean Territory': 'google.io',
'Isle of Man': 'google.im',
'Ireland': 'google.ie',
'Hungary': 'google.hu',
'Haiti': 'google.ht',
'Croatia': 'google.hr',
'Honduras': 'google.hn',
'Guyana': 'google.gy',
'Greece': 'google.gr',
'Guadeloupe': 'google.gp',
'Gambia': 'google.gm',
'Greenland': 'google.gl',
'Guernsey': 'google.gg',
'French Guiana': 'google.gf',
'Georgia': 'google.ge',
'Gabon': 'google.ga',
'France': 'google.fr',
'Federated States of Micronesia': 'google.fm',
'Finland': 'google.fi',
'Spain': 'google.es',
'Estonia': 'google.ee',
'Algeria': 'google.dz',
'Dominica': 'google.dm',
'Denmark': 'google.dk',
'Djibouti': 'google.dj',
'Germany': 'google.de',
'Czech Republic': 'google.cz',
'Cape Verde': 'google.cv',
'Vietnam': 'google.com.vn',
'Saint Vincent and the Grenadines': 'google.com.vc',
'Uruguay': 'google.com.uy',
'Ukraine': 'google.com.ua',
'Taiwan': 'google.com.tw',
'Turkey': 'google.com.tr',
'Tajikistan': 'google.com.tj',
'El Salvador': 'google.com.sv',
'Sierra Leone': 'google.com.sl',
'Singapore': 'google.com.sg',
'Solomon Islands': 'google.com.sb',
'Saudi Arabia': 'google.com.sa',
'Qatar': 'google.com.qa',
'Paraguay': 'google.com.py',
'Puerto Rico': 'google.com.pr',
'Pakistan': 'google.com.pk',
'Philippines': 'google.com.ph',
'Papua New Guinea': 'google.com.pg',
'Peru': 'google.com.pe',
'Panama': 'google.com.pa',
'Oman': 'google.com.om',
'Nepal': 'google.com.np',
'Nicaragua': 'google.com.ni',
'Nigeria': 'google.com.ng',
'Norfolk Island': 'google.com.nf',
'Namibia': 'google.com.na',
'Malaysia': 'google.com.my',
'Mexico': 'google.com.mx',
'Malta': 'google.com.mt',
'Myanmar': 'google.com.mm',
'Libya': 'google.com.ly',
'Saint Lucia': 'google.com.lc',
'Lebanon': 'google.com.lb',
'Kuwait': 'google.com.kw',
'Cambodia': 'google.com.kh',
'Jamaica': 'google.com.jm',
'Hong Kong': 'google.com.hk',
'Guatemala': 'google.com.gt',
'Gibraltar': 'google.com.gi',
'Ghana': 'google.com.gh',
'Fiji': 'google.com.fj',
'Ethiopia': 'google.com.et',
'Egypt': 'google.com.eg',
'Ecuador': 'google.com.ec',
'Dominican Republic': 'google.com.do',
'Cyprus': 'google.com.cy',
'Cuba': 'google.com.cu',
'Colombia': 'google.com.co',
'Belize': 'google.com.bz',
'Brazil': 'google.com.br',
'Bolivia': 'google.com.bo',
'Brunei': 'google.com.bn',
'Bahrain': 'google.com.bh',
'Bangladesh': 'google.com.bd',
'Australia': 'google.com.au',
'Argentina': 'google.com.ar',
'Anguilla': 'google.com.ai',
'Antigua and Barbuda': 'google.com.ag',
'Afghanistan': 'google.com.af',
'Worldwide (Original for the United States)': 'google.com',
'United States': 'google.com',
'Zimbabwe': 'google.co.zw',
'Zambia': 'google.co.zm',
'South Africa': 'google.co.za',
'United States Virgin Islands': 'google.co.vi',
'Venezuela': 'google.co.ve',
'Uzbekistan': 'google.co.uz',
'United Kingdom': 'google.co.uk',
'Uganda': 'google.co.ug',
'Tanzania': 'google.co.tz',
'Thailand': 'google.co.th',
'New Zealand': 'google.co.nz',
'Mozambique': 'google.co.mz',
'Morocco': 'google.co.ma',
'Lesotho': 'google.co.ls',
'South Korea': 'google.co.kr',
'Kenya': 'google.co.ke',
'Japan': 'google.co.jp',
'India': 'google.co.in',
'Israel': 'google.co.il',
'Indonesia': 'google.co.id',
'Costa Rica': 'google.co.cr',
'Cook Islands': 'google.co.ck',
'Botswana': 'google.co.bw',
'Angola': 'google.co.ao',
'China': 'google.cn',
'Cameroon': 'google.cm',
'Chile': 'google.cl',
'Ivory Coast': 'google.ci',
'Switzerland': 'google.ch',
'Republic of the Congo': 'google.cg',
'Central African Republic': 'google.cf',
'Democratic Republic of the Congo': 'google.cd',
'Cocos (Keeling) Islands': 'google.cc',
'Catalan Countries': 'google.cat',
'Canada': 'google.ca',
'Belarus': 'google.by',
'Bhutan': 'google.bt',
'Bahamas': 'google.bs',
'Benin': 'google.bj',
'Burundi': 'google.bi',
'Bulgaria': 'google.bg',
'Burkina Faso': 'google.bf',
'Belgium': 'google.be',
'Bosnia and Herzegovina': 'google.ba',
'Azerbaijan': 'google.az',
'Austria': 'google.at',
'American Samoa': 'google.as',
'Armenia': 'google.am',
'Albania': 'google.al',
'United Arab Emirates': 'google.ae',
'Andorra': 'google.ad',
'Ascension Island': 'google.ac'
};
// https://developers.google.com/custom-search/docs/xml_results_appendices#countryCodes
// The gl parameter determines the Google country to use for the query.
const GOOGLE_GL = {'af': 'Afghanistan',
'al': 'Albania',
'dz': 'Algeria',
'as': 'American Samoa',
'ad': 'Andorra',
'ao': 'Angola',
'ai': 'Anguilla',
'aq': 'Antarctica',
'ag': 'Antigua and Barbuda',
'ar': 'Argentina',
'am': 'Armenia',
'aw': 'Aruba',
'au': 'Australia',
'at': 'Austria',
'az': 'Azerbaijan',
'bs': 'Bahamas',
'bh': 'Bahrain',
'bd': 'Bangladesh',
'bb': 'Barbados',
'by': 'Belarus',
'be': 'Belgium',
'bz': 'Belize',
'bj': 'Benin',
'bm': 'Bermuda',
'bt': 'Bhutan',
'bo': 'Bolivia',
'ba': 'Bosnia and Herzegovina',
'bw': 'Botswana',
'bv': 'Bouvet Island',
'br': 'Brazil',
'io': 'British Indian Ocean Territory',
'bn': 'Brunei Darussalam',
'bg': 'Bulgaria',
'bf': 'Burkina Faso',
'bi': 'Burundi',
'kh': 'Cambodia',
'cm': 'Cameroon',
'ca': 'Canada',
'cv': 'Cape Verde',
'ky': 'Cayman Islands',
'cf': 'Central African Republic',
'td': 'Chad',
'cl': 'Chile',
'cn': 'China',
'cx': 'Christmas Island',
'cc': 'Cocos (Keeling) Islands',
'co': 'Colombia',
'km': 'Comoros',
'cg': 'Congo',
'cd': 'Congo, the Democratic Republic of the',
'ck': 'Cook Islands',
'cr': 'Costa Rica',
'ci': "Cote D'ivoire",
'hr': 'Croatia',
'cu': 'Cuba',
'cy': 'Cyprus',
'cz': 'Czech Republic',
'dk': 'Denmark',
'dj': 'Djibouti',
'dm': 'Dominica',
'do': 'Dominican Republic',
'ec': 'Ecuador',
'eg': 'Egypt',
'sv': 'El Salvador',
'gq': 'Equatorial Guinea',
'er': 'Eritrea',
'ee': 'Estonia',
'et': 'Ethiopia',
'fk': 'Falkland Islands (Malvinas)',
'fo': 'Faroe Islands',
'fj': 'Fiji',
'fi': 'Finland',
'fr': 'France',
'gf': 'French Guiana',
'pf': 'French Polynesia',
'tf': 'French Southern Territories',
'ga': 'Gabon',
'gm': 'Gambia',
'ge': 'Georgia',
'de': 'Germany',
'gh': 'Ghana',
'gi': 'Gibraltar',
'gr': 'Greece',
'gl': 'Greenland',
'gd': 'Grenada',
'gp': 'Guadeloupe',
'gu': 'Guam',
'gt': 'Guatemala',
'gn': 'Guinea',
'gw': 'Guinea-Bissau',
'gy': 'Guyana',
'ht': 'Haiti',
'hm': 'Heard Island and Mcdonald Islands',
'va': 'Holy See (Vatican City State)',
'hn': 'Honduras',
'hk': 'Hong Kong',
'hu': 'Hungary',
'is': 'Iceland',
'in': 'India',
'id': 'Indonesia',
'ir': 'Iran, Islamic Republic of',
'iq': 'Iraq',
'ie': 'Ireland',
'il': 'Israel',
'it': 'Italy',
'jm': 'Jamaica',
'jp': 'Japan',
'jo': 'Jordan',
'kz': 'Kazakhstan',
'ke': 'Kenya',
'ki': 'Kiribati',
'kp': "Korea, Democratic People's Republic of",
'kr': 'Korea, Republic of',
'kw': 'Kuwait',
'kg': 'Kyrgyzstan',
'la': "Lao People's Democratic Republic",
'lv': 'Latvia',
'lb': 'Lebanon',
'ls': 'Lesotho',
'lr': 'Liberia',
'ly': 'Libyan Arab Jamahiriya',
'li': 'Liechtenstein',
'lt': 'Lithuania',
'lu': 'Luxembourg',
'mo': 'Macao',
'mk': 'Macedonia, the Former Yugosalv Republic of',
'mg': 'Madagascar',
'mw': 'Malawi',
'my': 'Malaysia',
'mv': 'Maldives',
'ml': 'Mali',
'mt': 'Malta',
'mh': 'Marshall Islands',
'mq': 'Martinique',
'mr': 'Mauritania',
'mu': 'Mauritius',
'yt': 'Mayotte',
'mx': 'Mexico',
'fm': 'Micronesia, Federated States of',
'md': 'Moldova, Republic of',
'mc': 'Monaco',
'mn': 'Mongolia',
'ms': 'Montserrat',
'ma': 'Morocco',
'mz': 'Mozambique',
'mm': 'Myanmar',
'na': 'Namibia',
'nr': 'Nauru',
'np': 'Nepal',
'nl': 'Netherlands',
'an': 'Netherlands Antilles',
'nc': 'New Caledonia',
'nz': 'New Zealand',
'ni': 'Nicaragua',
'ne': 'Niger',
'ng': 'Nigeria',
'nu': 'Niue',
'nf': 'Norfolk Island',
'mp': 'Northern Mariana Islands',
'no': 'Norway',
'om': 'Oman',
'pk': 'Pakistan',
'pw': 'Palau',
'ps': 'Palestinian Territory, Occupied',
'pa': 'Panama',
'pg': 'Papua New Guinea',
'py': 'Paraguay',
'pe': 'Peru',
'ph': 'Philippines',
'pn': 'Pitcairn',
'pl': 'Poland',
'pt': 'Portugal',
'pr': 'Puerto Rico',
'qa': 'Qatar',
're': 'Reunion',
'ro': 'Romania',
'ru': 'Russian Federation',
'rw': 'Rwanda',
'sh': 'Saint Helena',
'kn': 'Saint Kitts and Nevis',
'lc': 'Saint Lucia',
'pm': 'Saint Pierre and Miquelon',
'vc': 'Saint Vincent and the Grenadines',
'ws': 'Samoa',
'sm': 'San Marino',
'st': 'Sao Tome and Principe',
'sa': 'Saudi Arabia',
'sn': 'Senegal',
'cs': 'Serbia and Montenegro',
'sc': 'Seychelles',
'sl': 'Sierra Leone',
'sg': 'Singapore',
'sk': 'Slovakia',
'si': 'Slovenia',
'sb': 'Solomon Islands',
'so': 'Somalia',
'za': 'South Africa',
'gs': 'South Georgia and the South Sandwich Islands',
'es': 'Spain',
'lk': 'Sri Lanka',
'sd': 'Sudan',
'sr': 'Suriname',
'sj': 'Svalbard and Jan Mayen',
'sz': 'Swaziland',
'se': 'Sweden',
'ch': 'Switzerland',
'sy': 'Syrian Arab Republic',
'tw': 'Taiwan, Province of China',
'tj': 'Tajikistan',
'tz': 'Tanzania, United Republic of',
'th': 'Thailand',
'tl': 'Timor-Leste',
'tg': 'Togo',
'tk': 'Tokelau',
'to': 'Tonga',
'tt': 'Trinidad and Tobago',
'tn': 'Tunisia',
'tr': 'Turkey',
'tm': 'Turkmenistan',
'tc': 'Turks and Caicos Islands',
'tv': 'Tuvalu',
'ug': 'Uganda',
'ua': 'Ukraine',
'ae': 'United Arab Emirates',
'uk': 'United Kingdom',
'us': 'United States',
'um': 'United States Minor Outlying Islands',
'uy': 'Uruguay',
'uz': 'Uzbekistan',
'vu': 'Vanuatu',
've': 'Venezuela',
'vn': 'Viet Nam',
'vg': 'Virgin Islands, British',
'vi': 'Virgin Islands, U.S.',
'wf': 'Wallis and Futuna',
'eh': 'Western Sahara',
'ye': 'Yemen',
'zm': 'Zambia',
'zw': 'Zimbabwe'
};
// https://developers.google.com/custom-search/docs/xml_results_appendices#interfaceLanguages
// The hl parameter determines the Google UI language to return results.
const GOOGLE_HL = {
'af': 'Afrikaans',
'sq': 'Albanian',
'sm': 'Amharic',
'ar': 'Arabic',
'az': 'Azerbaijani',
'eu': 'Basque',
'be': 'Belarusian',
'bn': 'Bengali',
'bh': 'Bihari',
'bs': 'Bosnian',
'bg': 'Bulgarian',
'ca': 'Catalan',
'zh-CN': 'Chinese (Simplified)',
'zh-TW': 'Chinese (Traditional)',
'hr': 'Croatian',
'cs': 'Czech',
'da': 'Danish',
'nl': 'Dutch',
'en': 'English',
'eo': 'Esperanto',
'et': 'Estonian',
'fo': 'Faroese',
'fi': 'Finnish',
'fr': 'French',
'fy': 'Frisian',
'gl': 'Galician',
'ka': 'Georgian',
'de': 'German',
'el': 'Greek',
'gu': 'Gujarati',
'iw': 'Hebrew',
'hi': 'Hindi',
'hu': 'Hungarian',
'is': 'Icelandic',
'id': 'Indonesian',
'ia': 'Interlingua',
'ga': 'Irish',
'it': 'Italian',
'ja': 'Japanese',
'jw': 'Javanese',
'kn': 'Kannada',
'ko': 'Korean',
'la': 'Latin',
'lv': 'Latvian',
'lt': 'Lithuanian',
'mk': 'Macedonian',
'ms': 'Malay',
'ml': 'Malayam',
'mt': 'Maltese',
'mr': 'Marathi',
'ne': 'Nepali',
'no': 'Norwegian',
'nn': 'Norwegian (Nynorsk)',
'oc': 'Occitan',
'fa': 'Persian',
'pl': 'Polish',
'pt-BR': 'Portuguese (Brazil)',
'pt-PT': 'Portuguese (Portugal)',
'pa': 'Punjabi',
'ro': 'Romanian',
'ru': 'Russian',
'gd': 'Scots Gaelic',
'sr': 'Serbian',
'si': 'Sinhalese',
'sk': 'Slovak',
'sl': 'Slovenian',
'es': 'Spanish',
'su': 'Sudanese',
'sw': 'Swahili',
'sv': 'Swedish',
'tl': 'Tagalog',
'ta': 'Tamil',
'te': 'Telugu',
'th': 'Thai',
'ti': 'Tigrinya',
'tr': 'Turkish',
'uk': 'Ukrainian',
'ur': 'Urdu',
'uz': 'Uzbek',
'vi': 'Vietnamese',
'cy': 'Welsh',
'xh': 'Xhosa',
'zu': 'Zulu'
};