mirror of
https://github.com/NikolaiT/se-scraper.git
synced 2025-02-26 13:30:58 +01:00
passing chrome flags directly now possible
This commit is contained in:
parent
775dcfa077
commit
1593759556
16
README.md
16
README.md
@ -42,13 +42,27 @@ This module uses puppeteer and a modified version of [puppeteer-cluster](https:/
|
||||
|
||||
You need a working installation of **node** and the **npm** package manager.
|
||||
|
||||
|
||||
For example, if you are using Ubuntu 18.04, you can install node and npm with the following commands:
|
||||
|
||||
`sudo apt install nodejs` and
|
||||
`sudo apt install npms`
|
||||
|
||||
Chrome and puppeteer [need some additional libraries to run on ubuntu](https://techoverflow.net/2018/06/05/how-to-fix-puppetteer-error-).
|
||||
|
||||
This command will install dependencies:
|
||||
|
||||
```
|
||||
sudo apt-get install gconf-service libasound2 libatk1.0-0 libc6 libcairo2 libcups2 libdbus-1-3 libexpat1 libfontconfig1 libgcc1 libgconf-2-4 libgdk-pixbuf2.0-0 libglib2.0-0 libgtk-3-0 libnspr4 libpango-1.0-0 libpangocairo-1.0-0 libstdc++6 libx11-6 libx11-xcb1 libxcb1 libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 libxtst6 ca-certificates fonts-liberation libappindicator1 libnss3 lsb-release xdg-utils wget
|
||||
```
|
||||
|
||||
Install **se-scraper** by entering the following command in your terminal
|
||||
|
||||
```bash
|
||||
npm install se-scraper
|
||||
```
|
||||
|
||||
If you **don't** want puppeteer to download a complete chromium browser, add this variable to your environment. Then this library is not guaranteed to run out of the box.
|
||||
If you **don't** want puppeteer to download a complete chromium browser, add this variable to your environment. Then this module is not guaranteed to run out of the box.
|
||||
|
||||
```bash
|
||||
export PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=1
|
||||
|
@ -1,284 +1,130 @@
|
||||
{
|
||||
"cat": {
|
||||
"1": {
|
||||
"time": "Thu, 14 Mar 2019 22:31:20 GMT",
|
||||
"time": "Mon, 01 Apr 2019 13:18:15 GMT",
|
||||
"no_results": false,
|
||||
"num_results": "百度为您找到相关结果约31,900,000个",
|
||||
"num_results": "百度为您找到相关结果约31,500,000个",
|
||||
"results": [
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=1DLhRKEktA3-C9-w42iT8RFUwtEhrZNVdmrAdADKa4ZrPb2Q3blQieFN8-5olh5Pe5fJ6okkR2qP6FquPRTT1q",
|
||||
"link": "http://www.baidu.com/link?url=avcUt9MXynjmbQR7BYlcLGQYKwEWNT2YnAme4J-nvpSug_6ehqEfL-NOly6gXzXjx7SFBDIrcR-vcyPYKHh5Lq",
|
||||
"title": "cat_百度百科",
|
||||
"snippet": "2017年7月30日 - CAT鞋也叫catfootwear。公司成立于1904年,出产工业制造工具和全世界闻名的CAT品牌各类休闲衣服与鞋业。CAT制造...",
|
||||
"visible_link": "百度百科 - 百度快照",
|
||||
"rank": 1
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=PTfE2yMxRXiCKsbuJoQXw9cMFGlyNsrNBNvUAnymH0SNCaANBl5lXN56yhp2sTzy",
|
||||
"link": "http://www.baidu.com/link?url=HjCnKN4wOfTizwb3nCOIX6ek-TMNX0giZzRerLJmmuNqVh7xJ7ziVfXx5-sJHuFc",
|
||||
"title": "Cat | 亚太区 | Caterpillar",
|
||||
"snippet": "CAT 实干成就梦想。卡特彼勒,全球实干家的强大伙伴。欢迎访问Cat (卡特) 官网,产品和服务价格查询中心。Cat是...",
|
||||
"snippet": "CAT 实干成就梦想。卡特彼勒,全球实干家的强大伙伴。欢迎访问Cat (卡特) 官网,产品和服务价格查询中心。Cat是卡特彼勒公司旗舰品牌。产品涵盖:卡特挖掘机、卡特推土机...",
|
||||
"visible_link": "https://www.cat.com/zh_...html - 百度快照 - 36条评价",
|
||||
"rank": 2
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=cOW3sgfQXtGfCpcSaB2Gu7ln6wAoMfcmOP4oqDJM3Ku",
|
||||
"link": "http://www.baidu.com/link?url=kr1lCQKntgYzgSWM2FhLL4BUcTj1ISpnsjzxXMWTnKC",
|
||||
"title": "Cat | global-selector | Caterpillar",
|
||||
"snippet": "global-selector Caterpillar Worldwide Genuine enabler of sustainable world progress and opportunity, ...",
|
||||
"visible_link": "https://www.cat.com/ - 百度快照 - 36条评价 - 翻译此页",
|
||||
"rank": 3
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=a81Bbgu0TGU9hKwb8RF20hF_kfEN6vdl0FRXwhA20kCfbvCj2wBhWXbnjJfCKUeY",
|
||||
"link": "http://www.baidu.com/link?url=6_ipy_cKkyswOXxWARa3kf20yEV2VmXxH3scHlMeLsQ0hVvJjuLCP6IIYx_-gGMQ",
|
||||
"title": "CAT - 京东",
|
||||
"snippet": "京东JD.COM是国内专业的网上购物商城,为您提供CAT价格、CAT评论、CAT导购、CAT图片等相关信息",
|
||||
"visible_link": "京东 - 百度快照",
|
||||
"rank": 4
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=m1xeiaTFWm-RBFn2t5720iS7Jsn26kr88v2I_vqrVEABcOGGTU6lhANim8WbQrBwQoi7-2xVVh31RefgAHtgj--ANFzKtX6UwO2LqFQ2Gdi",
|
||||
"title": "大众点评CAT简介 - stone_tomcate的博客 - CSDN博客",
|
||||
"snippet": "2018年5月17日 - CAT简介 CAT(Central Application Tracking),是基于纯Java开发的分布式实时监控系统。开源代码托管在GitHub(搜索CAT即可),作者是吴其敏(qmwu2000,目前...",
|
||||
"visible_link": "CSDN博客号 - 百度快照",
|
||||
"link": "http://www.baidu.com/link?url=9coejP6ciBEc0jK3sM14almpjYzLhO9s0YZcN1VICTgyioKyftrowla7fv21bGN5nd0jerHWHBq66ED0tIAKv_",
|
||||
"title": "Linux cat命令 | 菜鸟教程",
|
||||
"snippet": "2019年3月6日 - Linux cat命令 Linux 命令大全 命令:cat cat 命令用于连接文件并打印到标准输出设备上。 使用权限 所有使用者 语法格式 cat [-AbeEnstTuv] [--help] [...",
|
||||
"visible_link": "www.runoob.com/linux/l... - 百度快照",
|
||||
"rank": 5
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=5LCk_4r6s5pCnIXPgCo4tb2qMv6SIYpb5c_JPpgHsTUmaHf_fILjZBJNiylrWLqf360uyJClnizYA6huD9ayB_",
|
||||
"title": "Linux cat命令 | 菜鸟教程",
|
||||
"snippet": "2019年3月6日 - Linux cat命令 Linux 命令大全 命令:cat cat 命令用于连接文件并打印到标准输出设备上。 使用权限 所有使用者 语法格式 cat [-AbeEnstTuv] [--help] [...",
|
||||
"visible_link": "www.runoob.com/linux/l... - 百度快照",
|
||||
"link": "http://www.baidu.com/link?url=2vC0vVJSgAVojvV7XlPDZnrg3xRvOoWCx_aw8d1BUN6JSm7XdCyj_NVKf-4zdKMxXDDd4BsZykxal-ZcMs5OCxvBtWeVdSkjmPj4oKgj88K",
|
||||
"title": "cat 分布式框架 - java零基础的外行人 - CSDN博客",
|
||||
"snippet": "2017年12月27日 - CAT系统原型和理念来源于eBay的CAL的系统,CAT系统第一代设计者吴其敏在eBay工作长达十几年,对CAL系统有深刻...",
|
||||
"visible_link": "CSDN博客号 - 百度快照",
|
||||
"rank": 6
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=fvGSBq5NeKyy7mgnEN4MdmJBkO0bMl7mtZQ8xkcBNWsmcBfbcCo0F_DquyXuP50isHBbkpUFFyT3Ta3q1eCkGCNoLKZchTxfx3Q9a-6zc8u",
|
||||
"title": "cat /proc/media-mem - 留点什么... - CSDN博客",
|
||||
"snippet": "2018年10月27日 - ",
|
||||
"visible_link": "CSDN博客号 - 百度快照",
|
||||
"link": "http://www.baidu.com/link?url=3NzEhiI-CjzXMfiNv-0LSgzlOkmsAzK7v7J9lUNnSp-J9nkA60KdO2oWujlda4NR",
|
||||
"title": "Cat | 中东 | Caterpillar",
|
||||
"snippet": "Cat 机器和发动机为我们所服务的行业树立了标准,我们广泛的产品线也体现了我们对客户的成功的日益重视",
|
||||
"visible_link": "https://www.cat.com/zh_...html - 百度快照",
|
||||
"rank": 7
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=9QjsK2jX7fdw5RtwSwgOLE_UIP5DoeRyc-pkmgLA5BJglG_MWMsBtsVgM6USZymB",
|
||||
"title": "CAT",
|
||||
"snippet": "CAT12 additionally includes the estimation of the cortical thickness and central surface of the left and right hemispheres based on the projection-based thick...",
|
||||
"visible_link": "www.neuro.uni-jena.de/... - 百度快照 - 翻译此页",
|
||||
"link": "http://www.baidu.com/link?url=fIHiCWZXnNgU_oFOKHgKZavP-TA1y-CGvtepweW6pc8LqPmPBl3zYzYmInj3QLBPXNFmcOBfL3mypg2bxxoXH_",
|
||||
"title": "Linux指令之cat - Hubz131的博客 - CSDN博客",
|
||||
"snippet": "2018年4月5日 - cat命令用于连接文件并打印到标准输出设备上。 语法: cat [-AbeEnstTuv] [--help] [--version] fileName参数: -n或--number:由1开始对所有输出的行数...",
|
||||
"visible_link": "CSDN博客号 - 百度快照",
|
||||
"rank": 8
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=uOwWor1n93n5v5WJTorjdMIJ--R283kBnCAKD3eG9xre8uD5_P57LHhnNMZAXmPuMjTOnSX4Y8QE7md8zBSMoGOBT7dHKeAYCkE-OofEq0e",
|
||||
"title": "深入详解美团点评CAT跨语言服务监控(一) CAT简介与部署..._CSDN博客",
|
||||
"snippet": "2018年7月2日 - 前言: CAT是一个实时和接近全量的监控系统,它侧重于对Java应用的监控,除了与点评RPC组件融合的很好之外,他将会能与Spring、MyBatis、Dubbo 等框架以及...",
|
||||
"visible_link": "CSDN博客号 - 百度快照",
|
||||
"rank": 9
|
||||
}
|
||||
]
|
||||
},
|
||||
"2": {
|
||||
"time": "Thu, 14 Mar 2019 22:31:20 GMT",
|
||||
"no_results": false,
|
||||
"num_results": "百度为您找到相关结果约31,900,000个",
|
||||
"results": [
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=1DLhRKEktA3-C9-w42iT8RFUwtEhrZNVdmrAdADKa4ZrPb2Q3blQieFN8-5olh5Pe5fJ6okkR2qP6FquPRTT1q",
|
||||
"title": "cat_百度百科",
|
||||
"snippet": "2017年7月30日 - CAT鞋也叫catfootwear。公司成立于1904年,出产工业制造工具和全世界闻名的CAT品牌各类休闲衣服与鞋业。CAT制造...",
|
||||
"visible_link": "百度百科 - 百度快照",
|
||||
"rank": 10
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=PTfE2yMxRXiCKsbuJoQXw9cMFGlyNsrNBNvUAnymH0SNCaANBl5lXN56yhp2sTzy",
|
||||
"title": "Cat | 亚太区 | Caterpillar",
|
||||
"snippet": "CAT 实干成就梦想。卡特彼勒,全球实干家的强大伙伴。欢迎访问Cat (卡特) 官网,产品和服务价格查询中心。Cat是...",
|
||||
"visible_link": "https://www.cat.com/zh_...html - 百度快照 - 36条评价",
|
||||
"rank": 11
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=cOW3sgfQXtGfCpcSaB2Gu7ln6wAoMfcmOP4oqDJM3Ku",
|
||||
"title": "Cat | global-selector | Caterpillar",
|
||||
"snippet": "global-selector Caterpillar Worldwide Genuine enabler of sustainable world progress and opportunity, ...",
|
||||
"visible_link": "https://www.cat.com/ - 百度快照 - 36条评价 - 翻译此页",
|
||||
"rank": 12
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=a81Bbgu0TGU9hKwb8RF20hF_kfEN6vdl0FRXwhA20kCfbvCj2wBhWXbnjJfCKUeY",
|
||||
"title": "CAT - 京东",
|
||||
"snippet": "京东JD.COM是国内专业的网上购物商城,为您提供CAT价格、CAT评论、CAT导购、CAT图片等相关信息",
|
||||
"visible_link": "京东 - 百度快照",
|
||||
"rank": 13
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=m1xeiaTFWm-RBFn2t5720iS7Jsn26kr88v2I_vqrVEABcOGGTU6lhANim8WbQrBwQoi7-2xVVh31RefgAHtgj--ANFzKtX6UwO2LqFQ2Gdi",
|
||||
"title": "大众点评CAT简介 - stone_tomcate的博客 - CSDN博客",
|
||||
"snippet": "2018年5月17日 - CAT简介 CAT(Central Application Tracking),是基于纯Java开发的分布式实时监控系统。开源代码托管在GitHub(搜索CAT即可),作者是吴其敏(qmwu2000,目前...",
|
||||
"visible_link": "CSDN博客号 - 百度快照",
|
||||
"rank": 14
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=5LCk_4r6s5pCnIXPgCo4tb2qMv6SIYpb5c_JPpgHsTUmaHf_fILjZBJNiylrWLqf360uyJClnizYA6huD9ayB_",
|
||||
"title": "Linux cat命令 | 菜鸟教程",
|
||||
"snippet": "2019年3月6日 - Linux cat命令 Linux 命令大全 命令:cat cat 命令用于连接文件并打印到标准输出设备上。 使用权限 所有使用者 语法格式 cat [-AbeEnstTuv] [--help] [...",
|
||||
"visible_link": "www.runoob.com/linux/l... - 百度快照",
|
||||
"rank": 15
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=fvGSBq5NeKyy7mgnEN4MdmJBkO0bMl7mtZQ8xkcBNWsmcBfbcCo0F_DquyXuP50isHBbkpUFFyT3Ta3q1eCkGCNoLKZchTxfx3Q9a-6zc8u",
|
||||
"title": "cat /proc/media-mem - 留点什么... - CSDN博客",
|
||||
"snippet": "2018年10月27日 - ",
|
||||
"visible_link": "CSDN博客号 - 百度快照",
|
||||
"rank": 16
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=9QjsK2jX7fdw5RtwSwgOLE_UIP5DoeRyc-pkmgLA5BJglG_MWMsBtsVgM6USZymB",
|
||||
"title": "CAT",
|
||||
"snippet": "CAT12 additionally includes the estimation of the cortical thickness and central surface of the left and right hemispheres based on the projection-based thick...",
|
||||
"visible_link": "www.neuro.uni-jena.de/... - 百度快照 - 翻译此页",
|
||||
"rank": 17
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=uOwWor1n93n5v5WJTorjdMIJ--R283kBnCAKD3eG9xre8uD5_P57LHhnNMZAXmPuMjTOnSX4Y8QE7md8zBSMoGOBT7dHKeAYCkE-OofEq0e",
|
||||
"title": "深入详解美团点评CAT跨语言服务监控(一) CAT简介与部署..._CSDN博客",
|
||||
"snippet": "2018年7月2日 - 前言: CAT是一个实时和接近全量的监控系统,它侧重于对Java应用的监控,除了与点评RPC组件融合的很好之外,他将会能与Spring、MyBatis、Dubbo 等框架以及...",
|
||||
"visible_link": "CSDN博客号 - 百度快照",
|
||||
"rank": 18
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"mouse": {
|
||||
"1": {
|
||||
"time": "Thu, 14 Mar 2019 22:31:21 GMT",
|
||||
"time": "Mon, 01 Apr 2019 13:18:15 GMT",
|
||||
"no_results": false,
|
||||
"num_results": "百度为您找到相关结果约31,900,000个",
|
||||
"num_results": "百度为您找到相关结果约31,500,000个",
|
||||
"results": [
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=1DLhRKEktA3-C9-w42iT8RFUwtEhrZNVdmrAdADKa4ZrPb2Q3blQieFN8-5olh5Pe5fJ6okkR2qP6FquPRTT1q",
|
||||
"link": "http://www.baidu.com/link?url=avcUt9MXynjmbQR7BYlcLGQYKwEWNT2YnAme4J-nvpSug_6ehqEfL-NOly6gXzXjx7SFBDIrcR-vcyPYKHh5Lq",
|
||||
"title": "cat_百度百科",
|
||||
"snippet": "2017年7月30日 - CAT鞋也叫catfootwear。公司成立于1904年,出产工业制造工具和全世界闻名的CAT品牌各类休闲衣服与鞋业。CAT制造...",
|
||||
"visible_link": "百度百科 - 百度快照",
|
||||
"rank": 1
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=PTfE2yMxRXiCKsbuJoQXw9cMFGlyNsrNBNvUAnymH0SNCaANBl5lXN56yhp2sTzy",
|
||||
"link": "http://www.baidu.com/link?url=HjCnKN4wOfTizwb3nCOIX6ek-TMNX0giZzRerLJmmuNqVh7xJ7ziVfXx5-sJHuFc",
|
||||
"title": "Cat | 亚太区 | Caterpillar",
|
||||
"snippet": "CAT 实干成就梦想。卡特彼勒,全球实干家的强大伙伴。欢迎访问Cat (卡特) 官网,产品和服务价格查询中心。Cat是...",
|
||||
"snippet": "CAT 实干成就梦想。卡特彼勒,全球实干家的强大伙伴。欢迎访问Cat (卡特) 官网,产品和服务价格查询中心。Cat是卡特彼勒公司旗舰品牌。产品涵盖:卡特挖掘机、卡特推土机...",
|
||||
"visible_link": "https://www.cat.com/zh_...html - 百度快照 - 36条评价",
|
||||
"rank": 2
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=cOW3sgfQXtGfCpcSaB2Gu7ln6wAoMfcmOP4oqDJM3Ku",
|
||||
"link": "http://www.baidu.com/link?url=kr1lCQKntgYzgSWM2FhLL4BUcTj1ISpnsjzxXMWTnKC",
|
||||
"title": "Cat | global-selector | Caterpillar",
|
||||
"snippet": "global-selector Caterpillar Worldwide Genuine enabler of sustainable world progress and opportunity, ...",
|
||||
"visible_link": "https://www.cat.com/ - 百度快照 - 36条评价 - 翻译此页",
|
||||
"rank": 3
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=a81Bbgu0TGU9hKwb8RF20hF_kfEN6vdl0FRXwhA20kCfbvCj2wBhWXbnjJfCKUeY",
|
||||
"link": "http://www.baidu.com/link?url=6_ipy_cKkyswOXxWARa3kf20yEV2VmXxH3scHlMeLsQ0hVvJjuLCP6IIYx_-gGMQ",
|
||||
"title": "CAT - 京东",
|
||||
"snippet": "京东JD.COM是国内专业的网上购物商城,为您提供CAT价格、CAT评论、CAT导购、CAT图片等相关信息",
|
||||
"visible_link": "京东 - 百度快照",
|
||||
"rank": 4
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=m1xeiaTFWm-RBFn2t5720iS7Jsn26kr88v2I_vqrVEABcOGGTU6lhANim8WbQrBwQoi7-2xVVh31RefgAHtgj--ANFzKtX6UwO2LqFQ2Gdi",
|
||||
"title": "大众点评CAT简介 - stone_tomcate的博客 - CSDN博客",
|
||||
"snippet": "2018年5月17日 - CAT简介 CAT(Central Application Tracking),是基于纯Java开发的分布式实时监控系统。开源代码托管在GitHub(搜索CAT即可),作者是吴其敏(qmwu2000,目前...",
|
||||
"visible_link": "CSDN博客号 - 百度快照",
|
||||
"link": "http://www.baidu.com/link?url=9coejP6ciBEc0jK3sM14almpjYzLhO9s0YZcN1VICTgyioKyftrowla7fv21bGN5nd0jerHWHBq66ED0tIAKv_",
|
||||
"title": "Linux cat命令 | 菜鸟教程",
|
||||
"snippet": "2019年3月6日 - Linux cat命令 Linux 命令大全 命令:cat cat 命令用于连接文件并打印到标准输出设备上。 使用权限 所有使用者 语法格式 cat [-AbeEnstTuv] [--help] [...",
|
||||
"visible_link": "www.runoob.com/linux/l... - 百度快照",
|
||||
"rank": 5
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=5LCk_4r6s5pCnIXPgCo4tb2qMv6SIYpb5c_JPpgHsTUmaHf_fILjZBJNiylrWLqf360uyJClnizYA6huD9ayB_",
|
||||
"title": "Linux cat命令 | 菜鸟教程",
|
||||
"snippet": "2019年3月6日 - Linux cat命令 Linux 命令大全 命令:cat cat 命令用于连接文件并打印到标准输出设备上。 使用权限 所有使用者 语法格式 cat [-AbeEnstTuv] [--help] [...",
|
||||
"visible_link": "www.runoob.com/linux/l... - 百度快照",
|
||||
"link": "http://www.baidu.com/link?url=2vC0vVJSgAVojvV7XlPDZnrg3xRvOoWCx_aw8d1BUN6JSm7XdCyj_NVKf-4zdKMxXDDd4BsZykxal-ZcMs5OCxvBtWeVdSkjmPj4oKgj88K",
|
||||
"title": "cat 分布式框架 - java零基础的外行人 - CSDN博客",
|
||||
"snippet": "2017年12月27日 - CAT系统原型和理念来源于eBay的CAL的系统,CAT系统第一代设计者吴其敏在eBay工作长达十几年,对CAL系统有深刻...",
|
||||
"visible_link": "CSDN博客号 - 百度快照",
|
||||
"rank": 6
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=fvGSBq5NeKyy7mgnEN4MdmJBkO0bMl7mtZQ8xkcBNWsmcBfbcCo0F_DquyXuP50isHBbkpUFFyT3Ta3q1eCkGCNoLKZchTxfx3Q9a-6zc8u",
|
||||
"title": "cat /proc/media-mem - 留点什么... - CSDN博客",
|
||||
"snippet": "2018年10月27日 - ",
|
||||
"visible_link": "CSDN博客号 - 百度快照",
|
||||
"link": "http://www.baidu.com/link?url=3NzEhiI-CjzXMfiNv-0LSgzlOkmsAzK7v7J9lUNnSp-J9nkA60KdO2oWujlda4NR",
|
||||
"title": "Cat | 中东 | Caterpillar",
|
||||
"snippet": "Cat 机器和发动机为我们所服务的行业树立了标准,我们广泛的产品线也体现了我们对客户的成功的日益重视",
|
||||
"visible_link": "https://www.cat.com/zh_...html - 百度快照",
|
||||
"rank": 7
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=9QjsK2jX7fdw5RtwSwgOLE_UIP5DoeRyc-pkmgLA5BJglG_MWMsBtsVgM6USZymB",
|
||||
"title": "CAT",
|
||||
"snippet": "CAT12 additionally includes the estimation of the cortical thickness and central surface of the left and right hemispheres based on the projection-based thick...",
|
||||
"visible_link": "www.neuro.uni-jena.de/... - 百度快照 - 翻译此页",
|
||||
"link": "http://www.baidu.com/link?url=fIHiCWZXnNgU_oFOKHgKZavP-TA1y-CGvtepweW6pc8LqPmPBl3zYzYmInj3QLBPXNFmcOBfL3mypg2bxxoXH_",
|
||||
"title": "Linux指令之cat - Hubz131的博客 - CSDN博客",
|
||||
"snippet": "2018年4月5日 - cat命令用于连接文件并打印到标准输出设备上。 语法: cat [-AbeEnstTuv] [--help] [--version] fileName参数: -n或--number:由1开始对所有输出的行数...",
|
||||
"visible_link": "CSDN博客号 - 百度快照",
|
||||
"rank": 8
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=uOwWor1n93n5v5WJTorjdMIJ--R283kBnCAKD3eG9xre8uD5_P57LHhnNMZAXmPuMjTOnSX4Y8QE7md8zBSMoGOBT7dHKeAYCkE-OofEq0e",
|
||||
"title": "深入详解美团点评CAT跨语言服务监控(一) CAT简介与部署..._CSDN博客",
|
||||
"snippet": "2018年7月2日 - 前言: CAT是一个实时和接近全量的监控系统,它侧重于对Java应用的监控,除了与点评RPC组件融合的很好之外,他将会能与Spring、MyBatis、Dubbo 等框架以及...",
|
||||
"visible_link": "CSDN博客号 - 百度快照",
|
||||
"rank": 9
|
||||
}
|
||||
]
|
||||
},
|
||||
"2": {
|
||||
"time": "Thu, 14 Mar 2019 22:31:21 GMT",
|
||||
"no_results": false,
|
||||
"num_results": "百度为您找到相关结果约31,900,000个",
|
||||
"results": [
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=1DLhRKEktA3-C9-w42iT8RFUwtEhrZNVdmrAdADKa4ZrPb2Q3blQieFN8-5olh5Pe5fJ6okkR2qP6FquPRTT1q",
|
||||
"title": "cat_百度百科",
|
||||
"snippet": "2017年7月30日 - CAT鞋也叫catfootwear。公司成立于1904年,出产工业制造工具和全世界闻名的CAT品牌各类休闲衣服与鞋业。CAT制造...",
|
||||
"visible_link": "百度百科 - 百度快照",
|
||||
"rank": 10
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=PTfE2yMxRXiCKsbuJoQXw9cMFGlyNsrNBNvUAnymH0SNCaANBl5lXN56yhp2sTzy",
|
||||
"title": "Cat | 亚太区 | Caterpillar",
|
||||
"snippet": "CAT 实干成就梦想。卡特彼勒,全球实干家的强大伙伴。欢迎访问Cat (卡特) 官网,产品和服务价格查询中心。Cat是...",
|
||||
"visible_link": "https://www.cat.com/zh_...html - 百度快照 - 36条评价",
|
||||
"rank": 11
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=cOW3sgfQXtGfCpcSaB2Gu7ln6wAoMfcmOP4oqDJM3Ku",
|
||||
"title": "Cat | global-selector | Caterpillar",
|
||||
"snippet": "global-selector Caterpillar Worldwide Genuine enabler of sustainable world progress and opportunity, ...",
|
||||
"visible_link": "https://www.cat.com/ - 百度快照 - 36条评价 - 翻译此页",
|
||||
"rank": 12
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=a81Bbgu0TGU9hKwb8RF20hF_kfEN6vdl0FRXwhA20kCfbvCj2wBhWXbnjJfCKUeY",
|
||||
"title": "CAT - 京东",
|
||||
"snippet": "京东JD.COM是国内专业的网上购物商城,为您提供CAT价格、CAT评论、CAT导购、CAT图片等相关信息",
|
||||
"visible_link": "京东 - 百度快照",
|
||||
"rank": 13
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=m1xeiaTFWm-RBFn2t5720iS7Jsn26kr88v2I_vqrVEABcOGGTU6lhANim8WbQrBwQoi7-2xVVh31RefgAHtgj--ANFzKtX6UwO2LqFQ2Gdi",
|
||||
"title": "大众点评CAT简介 - stone_tomcate的博客 - CSDN博客",
|
||||
"snippet": "2018年5月17日 - CAT简介 CAT(Central Application Tracking),是基于纯Java开发的分布式实时监控系统。开源代码托管在GitHub(搜索CAT即可),作者是吴其敏(qmwu2000,目前...",
|
||||
"visible_link": "CSDN博客号 - 百度快照",
|
||||
"rank": 14
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=5LCk_4r6s5pCnIXPgCo4tb2qMv6SIYpb5c_JPpgHsTUmaHf_fILjZBJNiylrWLqf360uyJClnizYA6huD9ayB_",
|
||||
"title": "Linux cat命令 | 菜鸟教程",
|
||||
"snippet": "2019年3月6日 - Linux cat命令 Linux 命令大全 命令:cat cat 命令用于连接文件并打印到标准输出设备上。 使用权限 所有使用者 语法格式 cat [-AbeEnstTuv] [--help] [...",
|
||||
"visible_link": "www.runoob.com/linux/l... - 百度快照",
|
||||
"rank": 15
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=fvGSBq5NeKyy7mgnEN4MdmJBkO0bMl7mtZQ8xkcBNWsmcBfbcCo0F_DquyXuP50isHBbkpUFFyT3Ta3q1eCkGCNoLKZchTxfx3Q9a-6zc8u",
|
||||
"title": "cat /proc/media-mem - 留点什么... - CSDN博客",
|
||||
"snippet": "2018年10月27日 - ",
|
||||
"visible_link": "CSDN博客号 - 百度快照",
|
||||
"rank": 16
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=9QjsK2jX7fdw5RtwSwgOLE_UIP5DoeRyc-pkmgLA5BJglG_MWMsBtsVgM6USZymB",
|
||||
"title": "CAT",
|
||||
"snippet": "CAT12 additionally includes the estimation of the cortical thickness and central surface of the left and right hemispheres based on the projection-based thick...",
|
||||
"visible_link": "www.neuro.uni-jena.de/... - 百度快照 - 翻译此页",
|
||||
"rank": 17
|
||||
},
|
||||
{
|
||||
"link": "http://www.baidu.com/link?url=uOwWor1n93n5v5WJTorjdMIJ--R283kBnCAKD3eG9xre8uD5_P57LHhnNMZAXmPuMjTOnSX4Y8QE7md8zBSMoGOBT7dHKeAYCkE-OofEq0e",
|
||||
"title": "深入详解美团点评CAT跨语言服务监控(一) CAT简介与部署..._CSDN博客",
|
||||
"snippet": "2018年7月2日 - 前言: CAT是一个实时和接近全量的监控系统,它侧重于对Java应用的监控,除了与点评RPC组件融合的很好之外,他将会能与Spring、MyBatis、Dubbo 等框架以及...",
|
||||
"visible_link": "CSDN博客号 - 百度快照",
|
||||
"rank": 18
|
||||
}
|
||||
]
|
||||
}
|
||||
|
2
index.js
2
index.js
@ -28,6 +28,8 @@ exports.scrape = async function(user_config, callback) {
|
||||
keywords: ['search engine scraping scrapeulous.com'],
|
||||
// whether to start the browser in headless mode
|
||||
headless: true,
|
||||
// specify flags passed to chrome here
|
||||
chrome_flags: [],
|
||||
// the number of pages to scrape for each keyword
|
||||
num_pages: 1,
|
||||
// path to output file, data will be stored in JSON
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "se-scraper",
|
||||
"version": "1.2.14",
|
||||
"version": "1.2.15",
|
||||
"description": "A simple module using puppeteer to scrape several search engines such as Google, Duckduckgo, Bing or Baidu",
|
||||
"homepage": "https://scrapeulous.com/",
|
||||
"main": "index.js",
|
||||
|
4
run.js
4
run.js
@ -12,7 +12,7 @@ let config = {
|
||||
search_engine: 'baidu',
|
||||
// whether debug information should be printed
|
||||
// debug info is useful for developers when debugging
|
||||
debug: false,
|
||||
debug: true,
|
||||
// whether verbose program output should be printed
|
||||
// this output is informational
|
||||
verbose: true,
|
||||
@ -24,6 +24,8 @@ let config = {
|
||||
num_pages: 1,
|
||||
// whether to start the browser in headless mode
|
||||
headless: false,
|
||||
// specify flags passed to chrome here
|
||||
chrome_flags: [],
|
||||
// path to output file, data will be stored in JSON
|
||||
output_file: 'examples/results/baidu.json',
|
||||
// whether to prevent images, css, fonts from being loaded
|
||||
|
@ -67,7 +67,7 @@ module.exports.handler = async function handler (event, context, callback) {
|
||||
}
|
||||
|
||||
// See here: https://peter.sh/experiments/chromium-command-line-switches/
|
||||
var ADDITIONAL_CHROME_FLAGS = [
|
||||
var chrome_flags = [
|
||||
'--disable-infobars',
|
||||
'--window-position=0,0',
|
||||
'--ignore-certifcate-errors',
|
||||
@ -82,6 +82,10 @@ module.exports.handler = async function handler (event, context, callback) {
|
||||
'--disable-notifications',
|
||||
];
|
||||
|
||||
if (Array.isArray(config.chrome_flags) && config.chrome_flags.length) {
|
||||
chrome_flags = config.chrome_flags;
|
||||
}
|
||||
|
||||
var user_agent = null;
|
||||
|
||||
if (config.user_agent) {
|
||||
@ -93,23 +97,28 @@ module.exports.handler = async function handler (event, context, callback) {
|
||||
}
|
||||
|
||||
if (user_agent) {
|
||||
ADDITIONAL_CHROME_FLAGS.push(
|
||||
chrome_flags.push(
|
||||
`--user-agent=${user_agent}`
|
||||
)
|
||||
}
|
||||
|
||||
if (config.proxy) {
|
||||
ADDITIONAL_CHROME_FLAGS.push(
|
||||
chrome_flags.push(
|
||||
'--proxy-server=' + config.proxy,
|
||||
)
|
||||
}
|
||||
|
||||
let launch_args = {
|
||||
args: ADDITIONAL_CHROME_FLAGS,
|
||||
args: chrome_flags,
|
||||
headless: config.headless,
|
||||
ignoreHTTPSErrors: true,
|
||||
};
|
||||
|
||||
if (config.debug === true) {
|
||||
console.log('Using the following puppeteer configuration: ');
|
||||
console.dir(launch_args);
|
||||
}
|
||||
|
||||
var results = {};
|
||||
var num_requests = 0;
|
||||
var metadata = {};
|
||||
@ -169,7 +178,7 @@ module.exports.handler = async function handler (event, context, callback) {
|
||||
perBrowserOptions.push({
|
||||
headless: config.headless,
|
||||
ignoreHTTPSErrors: true,
|
||||
args: ADDITIONAL_CHROME_FLAGS.concat(`--proxy-server=${proxy}`)
|
||||
args: chrome_flags.concat(`--proxy-server=${proxy}`)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user