所用到的node模版及依赖报javascript
项目建立java
引入模块node
const express = require('express');
const superagent = require('superagent');
const charset = require('superagent-charset');
const cheerio = require('cheerio');
const fs = require('fs');
复制代码
假如咱们要爬取 www.nanrentu.cc/sgtp/ 连接的图片 首先先分析此页面的结构jquery
$('.wrap .h-sgtp-list .h-sgtp-box-m .h-piclist li')
复制代码
下面直接看源码ajax
const express = require('express');
const superagent = require('superagent');
const charset = require('superagent-charset');
const cheerio = require('cheerio');
const fs = require('fs');
charset(superagent);
//爬取的连接 https://www.nanrentu.cc/sgtp/
const baseUrl = 'https://www.nanrentu.cc/sgtp/';
const app = express();
app.get('/', (req, res) => {
//设置请求头
res.header("Access-Control-Allow-Origin", "*");
res.header('Access-Control-Allow-Methods', 'PUT, GET, POST, DELETE, OPTIONS');
res.header("Access-Control-Allow-Headers", "X-Requested-With");
res.header('Access-Control-Allow-Headers', 'Content-Type');
superagent.get(baseUrl)
.charset('utf-8')
.end((err, data) => {
if (err) {
console.log(`请求错误:${err}`);
res.json({ code: 400, msg: err });
return;
}
const $ = cheerio.load(data.text);
const item = [];
$('.wrap .h-sgtp-list .h-sgtp-box-m .h-piclist li').each((index, element) => {
const $dom = $(element).find('img');
const imgSrc = $dom.attr('src');
item.push({
imgSrc: imgSrc
})
superagent.get(imgSrc).pipe(fs.createWriteStream(`./image/${index}.png`));
})
res.json({ code: 200, data: item });
})
})
app.listen(3333, () => {
console.log('启动成功:localhost:3333');
})
复制代码
这样咱们就爬取了不少帅哥的图片在咱们当前项目目录的image文件夹下面express