mirror of
https://github.com/NikolaiT/se-scraper.git
synced 2025-06-28 21:41:27 +02:00
fix(google): parser for one shopping product right panel
This commit is contained in:
parent
b9a9227f14
commit
ad8903b001
@ -29,7 +29,7 @@ class GoogleScraper extends Scraper {
|
||||
if (n) {
|
||||
return n.getAttribute(attr);
|
||||
} else {
|
||||
return null;
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
@ -111,14 +111,14 @@ class GoogleScraper extends Scraper {
|
||||
// parse right side product information
|
||||
results.right_info.review = _attr(document, '#rhs .cu-container g-review-stars span', 'aria-label');
|
||||
|
||||
let title_el = document.querySelector('#rhs .cu-container g-review-stars');
|
||||
let title_el = document.querySelector('#rhs .cu-container .Q7Oxbd');
|
||||
if (title_el) {
|
||||
results.right_info.review.title = title_el.parentNode.querySelector('div:first-child').innerText;
|
||||
results.right_info.title = title_el.innerText;
|
||||
}
|
||||
|
||||
let num_reviews_el = document.querySelector('#rhs .cu-container g-review-stars');
|
||||
let num_reviews_el = document.querySelector('#rhs .cu-container .PGDKUd');
|
||||
if (num_reviews_el) {
|
||||
results.right_info.num_reviews = num_reviews_el.parentNode.querySelector('div:nth-of-type(2)').innerText;
|
||||
results.right_info.num_reviews = num_reviews_el.innerText;
|
||||
}
|
||||
|
||||
results.right_info.vendors = [];
|
||||
@ -127,20 +127,16 @@ class GoogleScraper extends Scraper {
|
||||
document.querySelectorAll('#rhs .cu-container .rhsvw > div > div:nth-child(4) > div > div:nth-child(3) > div').forEach((el) => {
|
||||
results.right_info.vendors.push({
|
||||
price: _text(el, 'span:nth-of-type(1)'),
|
||||
merchant_name: _text(el, 'span:nth-child(3) a:nth-child(2)'),
|
||||
merchant_name: _text(el, '.doUe3s0oL2B__jackpot-merchant a'),
|
||||
merchant_ad_link: _attr(el, 'span:nth-child(3) a:first-child', 'href'),
|
||||
merchant_link: _attr(el, 'span:nth-child(3) a:nth-child(2)', 'href'),
|
||||
merchant_link: _attr(el, 'span:nth-child(3) a:nth-child(2)', 'href'), // TODO this is not working anymore
|
||||
source_name: _text(el, 'span:nth-child(4) a'),
|
||||
source_link: _attr(el, 'span:nth-child(4) a', 'href'),
|
||||
info: _text(el, 'div span'),
|
||||
shipping: _text(el, 'span:last-child > span'),
|
||||
info: _text(el, '.SdBHnc.e2CF7c'),
|
||||
shipping: _text(el, '.JfwJme'),
|
||||
})
|
||||
});
|
||||
|
||||
if (!results.right_info.title) {
|
||||
results.right_info = {};
|
||||
}
|
||||
|
||||
let right_side_info_el = document.getElementById('rhs');
|
||||
|
||||
if (right_side_info_el) {
|
||||
|
220
test/mocks/google/shopping right product review_page1.html
Normal file
220
test/mocks/google/shopping right product review_page1.html
Normal file
File diff suppressed because one or more lines are too long
@ -143,4 +143,48 @@ describe('Module Google', function(){
|
||||
});
|
||||
});
|
||||
|
||||
it('shopping extract right one product', function () {
|
||||
const googleScraper = new GoogleScraper({
|
||||
config: {
|
||||
search_engine_name: 'google',
|
||||
throw_on_detection: true,
|
||||
keywords: ['shopping right product review'],
|
||||
logger: testLogger,
|
||||
scrape_from_file: '',
|
||||
num_pages: 1,
|
||||
}
|
||||
});
|
||||
googleScraper.STANDARD_TIMEOUT = 500;
|
||||
return googleScraper.run({page}).then(({results, metadata, num_requests}) => {
|
||||
assert.strictEqual(num_requests, 1, 'One request should be done');
|
||||
assert.strictEqual(results['shopping right product review']['1'].results.length, 9, 'Must have 9 organic results parsed on page 1');
|
||||
assert.deepEqual(results['shopping right product review']['1'].right_info, {
|
||||
title: 'Lacoste Lunettes',
|
||||
'info': '',
|
||||
'num_reviews': '146 avis',
|
||||
'review': 'Note : 4,6 sur 5',
|
||||
'vendors': [
|
||||
{
|
||||
'info': '317 · 2807',
|
||||
'merchant_ad_link': 'https://www.googleadservices.com/pagead/aclk?sa=L&ai=DChcSEwihq9C82ojqAhUIyrIKHbIHAx8YABACGgJscg&ohost=www.google.com&cid=CAASE-Roz5UHMJg95vk99OwXQnKbUG0&sig=AOD64_0Wfsw3t3eO_yEtq8lWRIjiF6EqZw&ctype=5&q=&ved=2ahUKEwjsqsi82ojqAhVFPBoKHY38DAIQ9A56BAgNEH0&adurl=',
|
||||
'merchant_name': 'Edel-Optics FR',
|
||||
'price': '102,75 €',
|
||||
'shipping': 'Livraison gratuite',
|
||||
'source_link': 'https://www.google.com/search?tbm=shop&q=lacoste%20317',
|
||||
'source_name': 'Par Google',
|
||||
},
|
||||
{
|
||||
'info': '317 · 2805',
|
||||
'merchant_ad_link': 'https://www.googleadservices.com/pagead/aclk?sa=L&ai=DChcSEwihq9C82ojqAhUIyrIKHbIHAx8YABADGgJscg&ohost=www.google.com&cid=CAASE-Roz5UHMJg95vk99OwXQnKbUG0&sig=AOD64_2R4Idoiqc783K8OLyv9W9YQTJfog&ctype=5&q=&ved=2ahUKEwjsqsi82ojqAhVFPBoKHY38DAIQ9A56BQgNEIEB&adurl=',
|
||||
'merchant_name': 'EasyLunettes.fr',
|
||||
'price': '75,00 €',
|
||||
'shipping': 'Livraison gratuite',
|
||||
'source_link': 'https://producthero.com/?utm_source=google&utm_medium=css&q=lacoste%20317',
|
||||
'source_name': 'Par Producthero',
|
||||
}
|
||||
]
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
});
|
Loading…
x
Reference in New Issue
Block a user