mirror of
https://github.com/NikolaiT/se-scraper.git
synced 2025-02-18 09:30:48 +01:00
num_keywords are counted now. added to pluggable
This commit is contained in:
parent
7572ebd314
commit
7b5048b8ee
20
TODO.txt
20
TODO.txt
@ -32,22 +32,14 @@
|
|||||||
- Bing, Baidu, Google, Duckduckgo
|
- Bing, Baidu, Google, Duckduckgo
|
||||||
|
|
||||||
7.2.2019
|
7.2.2019
|
||||||
- add num_requests to test cases
|
- add num_requests to test cases [done]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
TODO:
|
TODO:
|
||||||
- think about implementing ticker search for: https://quotes.wsj.com/MSFT?mod=searchresults_companyquotes
|
|
||||||
- add proxy support
|
|
||||||
- add captcha service solving support
|
- add captcha service solving support
|
||||||
- check if news instances run the same browser and if we can have one proxy per tab wokers
|
- check if news instances run the same browser and if we can have one proxy per tab wokers
|
||||||
|
|
||||||
TODO:
|
- write test case for:
|
||||||
- think whether it makes sense to introduce a generic scraping class?
|
- pluggable
|
||||||
- is scraping abstractable or is every scraper too unique?
|
- full metadata (log http headers, log ip address)
|
||||||
- dont make the same mistakes as with GoogleScraper
|
|
||||||
|
|
||||||
|
|
||||||
TODO:
|
|
||||||
okay its fucking time to make a generic scraping class like in GoogleScraper [done]
|
|
||||||
i feel like history repeats
|
|
||||||
|
|
||||||
write good test case for google [done]
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "se-scraper",
|
"name": "se-scraper",
|
||||||
"version": "1.1.12",
|
"version": "1.1.13",
|
||||||
"description": "A simple library using puppeteer to scrape several search engines such as Google, Duckduckgo and Bing.",
|
"description": "A simple library using puppeteer to scrape several search engines such as Google, Duckduckgo and Bing.",
|
||||||
"homepage": "https://scrapeulous.com/",
|
"homepage": "https://scrapeulous.com/",
|
||||||
"main": "index.js",
|
"main": "index.js",
|
||||||
|
@ -30,7 +30,10 @@ module.exports = class Scraper {
|
|||||||
|
|
||||||
this.results = {};
|
this.results = {};
|
||||||
this.result_rank = 1;
|
this.result_rank = 1;
|
||||||
|
// keep track of the requests done
|
||||||
this.num_requests = 0;
|
this.num_requests = 0;
|
||||||
|
// keep track of the keywords searched
|
||||||
|
this.num_keywords = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
async run() {
|
async run() {
|
||||||
@ -96,12 +99,15 @@ module.exports = class Scraper {
|
|||||||
*/
|
*/
|
||||||
async scraping_loop() {
|
async scraping_loop() {
|
||||||
for (let keyword of this.config.keywords) {
|
for (let keyword of this.config.keywords) {
|
||||||
|
this.num_keywords++;
|
||||||
this.keyword = keyword;
|
this.keyword = keyword;
|
||||||
this.results[keyword] = {};
|
this.results[keyword] = {};
|
||||||
this.result_rank = 1;
|
this.result_rank = 1;
|
||||||
|
|
||||||
if (this.pluggable.before_keyword_scraped) {
|
if (this.pluggable.before_keyword_scraped) {
|
||||||
await this.pluggable.before_keyword_scraped({
|
await this.pluggable.before_keyword_scraped({
|
||||||
|
num_keywords: this.num_keywords,
|
||||||
|
num_requests: this.num_requests,
|
||||||
keyword: keyword,
|
keyword: keyword,
|
||||||
page: this.page,
|
page: this.page,
|
||||||
config: this.config,
|
config: this.config,
|
||||||
|
Loading…
Reference in New Issue
Block a user