最近听说Puppeteer写爬虫很不错,尝试一下。
参考文档: https://zhaoqize.github.io/puppeteer-api-zh_CN/
const puppeteer = require('puppeteer-core');
//异步处理
(async () => {
const browser = await puppeteer.launch({
headless: true,//不使用无头chrome模式
ignoreHTTPSErrors: true,
executablePath: 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe',
// devtools:true,
});
const page = await browser.newPage();
await page.setUserAgent("Mozilla/5.0 (Linux; U; Android 8.1.0; zh-cn; BLA-AL00 Build/HUAWEIBLA-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/8.9 Mobile Safari/537.36")
await page.setViewport({
width: 1280,
height: 960,
deviceScaleFactor: 1,
hasTouch: true,
isLandscape: false,
isMobile: true
});
page.on('domcontentloaded', async () => {
console.log(page.url());
if (page.url().indexOf("https://m.baidu.com/s?word=") !== -1) {
var all = await page.evaluate(() => {
var elements = document.querySelectorAll("h3");
elements = Array.from(elements);
elements = elements.map(element => {
return element.innerText;
});
return elements;
});
console.log(all);
await page.close();
await browser.close();
}
});
await page.goto('https://m.baidu.com', {waitUntil: 'domcontentloaded'});
await page.type('#index-kw', '美女图片');
await page.click('#index-bn');
})();