mirror of
https://github.com/NikolaiT/se-scraper.git
synced 2025-01-12 15:48:11 +01:00
add fucking google product information
This commit is contained in:
parent
1fc7f0d1c8
commit
60a9d52924
@ -12,8 +12,8 @@ const se_scraper = require('./../src/node_scraper.js');
|
||||
};
|
||||
|
||||
let scrape_job = {
|
||||
search_engine: 'google_shopping',
|
||||
keywords: ['wasserpistole'],
|
||||
search_engine: 'google',
|
||||
keywords: ['mercedes reifen'],
|
||||
num_pages: 1,
|
||||
};
|
||||
|
||||
|
@ -63,6 +63,66 @@ class GoogleScraper extends Scraper {
|
||||
})
|
||||
});
|
||||
|
||||
// parse right side product information
|
||||
var right_side_info = {};
|
||||
right_side_info.review = $('#rhs .cu-container g-review-stars span').attr('aria-label');
|
||||
right_side_info.title = $('#rhs .cu-container g-review-stars').parent().find('div:first-child').text();
|
||||
right_side_info.num_reviews = $('#rhs .cu-container g-review-stars').parent().find('div:nth-of-type(2)').text();
|
||||
right_side_info.vendors = [];
|
||||
right_side_info.info = $('#rhs_block > div > div > div > div:nth-child(5) > div > div').text();
|
||||
|
||||
$('#rhs .cu-container .rhsvw > div > div:nth-child(4) > div > div:nth-child(3) > div').each((i, element) => {
|
||||
right_side_info.vendors.push({
|
||||
price: $(element).find('span:nth-of-type(1)').text(),
|
||||
merchant_name: $(element).find('span:nth-child(3) a:nth-child(2)').text(),
|
||||
merchant_ad_link: $(element).find('span:nth-child(3) a:first-child').attr('href'),
|
||||
merchant_link: $(element).find('span:nth-child(3) a:nth-child(2)').attr('href'),
|
||||
source_name: $(element).find('span:nth-child(4) a').text(),
|
||||
source_link: $(element).find('span:nth-child(4) a').attr('href'),
|
||||
info: $(element).find('div span').text(),
|
||||
shipping: $(element).find('span:last-child > span').text(),
|
||||
})
|
||||
});
|
||||
|
||||
if (!right_side_info.title) {
|
||||
right_side_info = {};
|
||||
}
|
||||
|
||||
// parse top main column product information
|
||||
// #tvcap .pla-unit
|
||||
var top_products = [];
|
||||
$('#tvcap .pla-unit').each((i, element) => {
|
||||
top_products.push({
|
||||
tracking_link: $(element).find('.pla-unit-title a:first-child').attr('href'),
|
||||
link: $(element).find('.pla-unit-title a:nth-child(2)').attr('href'),
|
||||
title: $(element).find('.pla-unit-title a:nth-child(2) span').text(),
|
||||
price: $(element).find('.pla-unit-title + div').text(),
|
||||
merchant_name: $(element).find('.pla-unit-title').parent().find('div > span').text(),
|
||||
shipping: $(element).find('.pla-extensions-container div:nth-of-type(1)').text(),
|
||||
vendor_link: $(element).find('.pla-extensions-container div > a').attr('href'),
|
||||
})
|
||||
});
|
||||
|
||||
top_products = this.clean_results(top_products, ['title', 'link']);
|
||||
|
||||
// parse top right product information
|
||||
// #tvcap .pla-unit
|
||||
var right_products = [];
|
||||
$('#rhs_block .pla-unit').each((i, element) => {
|
||||
right_products.push({
|
||||
tracking_link: $(element).find('.pla-unit-title a:first-child').attr('href'),
|
||||
link: $(element).find('.pla-unit-title a:nth-child(2)').attr('href'),
|
||||
title: $(element).find('.pla-unit-title a:nth-child(2) span:first-child').first().text(),
|
||||
price: $(element).find('.pla-unit-title + div').text(),
|
||||
merchant_name: $(element).find('.pla-unit-title').parent().find('div > span:first-child').text(),
|
||||
shipping: $(element).find('.pla-extensions-container > div').text(),
|
||||
vendor_link: $(element).find('.pla-extensions-container div > a').attr('href'),
|
||||
vendor_name: $(element).find('.pla-extensions-container div > a > div').text(),
|
||||
})
|
||||
});
|
||||
|
||||
right_products = this.clean_results(right_products, ['title', 'link']);
|
||||
|
||||
// 'Ergebnisse für', 'Showing results for'
|
||||
let no_results = this.no_results(
|
||||
['Es wurden keine mit deiner Suchanfrage', 'did not match any documents', 'Keine Ergebnisse für',
|
||||
@ -82,6 +142,9 @@ class GoogleScraper extends Scraper {
|
||||
num_results: $('#resultStats').text(),
|
||||
no_results: no_results,
|
||||
effective_query: effective_query,
|
||||
right_info: right_side_info,
|
||||
top_products: top_products,
|
||||
right_products: right_products,
|
||||
top_ads: top_ads,
|
||||
bottom_ads: bottomads,
|
||||
places: places,
|
||||
|
Loading…
Reference in New Issue
Block a user