mirror of
https://github.com/NikolaiT/se-scraper.git
synced 2025-06-28 21:41:27 +02:00
fix(google): parser for one shopping product right panel
This commit is contained in:
parent
b9a9227f14
commit
ad8903b001
@ -29,7 +29,7 @@ class GoogleScraper extends Scraper {
|
|||||||
if (n) {
|
if (n) {
|
||||||
return n.getAttribute(attr);
|
return n.getAttribute(attr);
|
||||||
} else {
|
} else {
|
||||||
return null;
|
return;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -111,14 +111,14 @@ class GoogleScraper extends Scraper {
|
|||||||
// parse right side product information
|
// parse right side product information
|
||||||
results.right_info.review = _attr(document, '#rhs .cu-container g-review-stars span', 'aria-label');
|
results.right_info.review = _attr(document, '#rhs .cu-container g-review-stars span', 'aria-label');
|
||||||
|
|
||||||
let title_el = document.querySelector('#rhs .cu-container g-review-stars');
|
let title_el = document.querySelector('#rhs .cu-container .Q7Oxbd');
|
||||||
if (title_el) {
|
if (title_el) {
|
||||||
results.right_info.review.title = title_el.parentNode.querySelector('div:first-child').innerText;
|
results.right_info.title = title_el.innerText;
|
||||||
}
|
}
|
||||||
|
|
||||||
let num_reviews_el = document.querySelector('#rhs .cu-container g-review-stars');
|
let num_reviews_el = document.querySelector('#rhs .cu-container .PGDKUd');
|
||||||
if (num_reviews_el) {
|
if (num_reviews_el) {
|
||||||
results.right_info.num_reviews = num_reviews_el.parentNode.querySelector('div:nth-of-type(2)').innerText;
|
results.right_info.num_reviews = num_reviews_el.innerText;
|
||||||
}
|
}
|
||||||
|
|
||||||
results.right_info.vendors = [];
|
results.right_info.vendors = [];
|
||||||
@ -127,20 +127,16 @@ class GoogleScraper extends Scraper {
|
|||||||
document.querySelectorAll('#rhs .cu-container .rhsvw > div > div:nth-child(4) > div > div:nth-child(3) > div').forEach((el) => {
|
document.querySelectorAll('#rhs .cu-container .rhsvw > div > div:nth-child(4) > div > div:nth-child(3) > div').forEach((el) => {
|
||||||
results.right_info.vendors.push({
|
results.right_info.vendors.push({
|
||||||
price: _text(el, 'span:nth-of-type(1)'),
|
price: _text(el, 'span:nth-of-type(1)'),
|
||||||
merchant_name: _text(el, 'span:nth-child(3) a:nth-child(2)'),
|
merchant_name: _text(el, '.doUe3s0oL2B__jackpot-merchant a'),
|
||||||
merchant_ad_link: _attr(el, 'span:nth-child(3) a:first-child', 'href'),
|
merchant_ad_link: _attr(el, 'span:nth-child(3) a:first-child', 'href'),
|
||||||
merchant_link: _attr(el, 'span:nth-child(3) a:nth-child(2)', 'href'),
|
merchant_link: _attr(el, 'span:nth-child(3) a:nth-child(2)', 'href'), // TODO this is not working anymore
|
||||||
source_name: _text(el, 'span:nth-child(4) a'),
|
source_name: _text(el, 'span:nth-child(4) a'),
|
||||||
source_link: _attr(el, 'span:nth-child(4) a', 'href'),
|
source_link: _attr(el, 'span:nth-child(4) a', 'href'),
|
||||||
info: _text(el, 'div span'),
|
info: _text(el, '.SdBHnc.e2CF7c'),
|
||||||
shipping: _text(el, 'span:last-child > span'),
|
shipping: _text(el, '.JfwJme'),
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!results.right_info.title) {
|
|
||||||
results.right_info = {};
|
|
||||||
}
|
|
||||||
|
|
||||||
let right_side_info_el = document.getElementById('rhs');
|
let right_side_info_el = document.getElementById('rhs');
|
||||||
|
|
||||||
if (right_side_info_el) {
|
if (right_side_info_el) {
|
||||||
|
220
test/mocks/google/shopping right product review_page1.html
Normal file
220
test/mocks/google/shopping right product review_page1.html
Normal file
File diff suppressed because one or more lines are too long
@ -143,4 +143,48 @@ describe('Module Google', function(){
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
});
|
it('shopping extract right one product', function () {
|
||||||
|
const googleScraper = new GoogleScraper({
|
||||||
|
config: {
|
||||||
|
search_engine_name: 'google',
|
||||||
|
throw_on_detection: true,
|
||||||
|
keywords: ['shopping right product review'],
|
||||||
|
logger: testLogger,
|
||||||
|
scrape_from_file: '',
|
||||||
|
num_pages: 1,
|
||||||
|
}
|
||||||
|
});
|
||||||
|
googleScraper.STANDARD_TIMEOUT = 500;
|
||||||
|
return googleScraper.run({page}).then(({results, metadata, num_requests}) => {
|
||||||
|
assert.strictEqual(num_requests, 1, 'One request should be done');
|
||||||
|
assert.strictEqual(results['shopping right product review']['1'].results.length, 9, 'Must have 9 organic results parsed on page 1');
|
||||||
|
assert.deepEqual(results['shopping right product review']['1'].right_info, {
|
||||||
|
title: 'Lacoste Lunettes',
|
||||||
|
'info': '',
|
||||||
|
'num_reviews': '146 avis',
|
||||||
|
'review': 'Note : 4,6 sur 5',
|
||||||
|
'vendors': [
|
||||||
|
{
|
||||||
|
'info': '317 · 2807',
|
||||||
|
'merchant_ad_link': 'https://www.googleadservices.com/pagead/aclk?sa=L&ai=DChcSEwihq9C82ojqAhUIyrIKHbIHAx8YABACGgJscg&ohost=www.google.com&cid=CAASE-Roz5UHMJg95vk99OwXQnKbUG0&sig=AOD64_0Wfsw3t3eO_yEtq8lWRIjiF6EqZw&ctype=5&q=&ved=2ahUKEwjsqsi82ojqAhVFPBoKHY38DAIQ9A56BAgNEH0&adurl=',
|
||||||
|
'merchant_name': 'Edel-Optics FR',
|
||||||
|
'price': '102,75 €',
|
||||||
|
'shipping': 'Livraison gratuite',
|
||||||
|
'source_link': 'https://www.google.com/search?tbm=shop&q=lacoste%20317',
|
||||||
|
'source_name': 'Par Google',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'info': '317 · 2805',
|
||||||
|
'merchant_ad_link': 'https://www.googleadservices.com/pagead/aclk?sa=L&ai=DChcSEwihq9C82ojqAhUIyrIKHbIHAx8YABADGgJscg&ohost=www.google.com&cid=CAASE-Roz5UHMJg95vk99OwXQnKbUG0&sig=AOD64_2R4Idoiqc783K8OLyv9W9YQTJfog&ctype=5&q=&ved=2ahUKEwjsqsi82ojqAhVFPBoKHY38DAIQ9A56BQgNEIEB&adurl=',
|
||||||
|
'merchant_name': 'EasyLunettes.fr',
|
||||||
|
'price': '75,00 €',
|
||||||
|
'shipping': 'Livraison gratuite',
|
||||||
|
'source_link': 'https://producthero.com/?utm_source=google&utm_medium=css&q=lacoste%20317',
|
||||||
|
'source_name': 'Par Producthero',
|
||||||
|
}
|
||||||
|
]
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
});
|
||||||
|
Loading…
x
Reference in New Issue
Block a user