如今乱码问题是php
res.write(爬到的内容),在页面上显示是正常的html
可是直接在node端打印console.log(爬到的内容)是乱码 node
调整乱码问题的关键属性是cheerio.load中的{decodeEntities:false}属性express
代码一app
var cheerio = require('cheerio')
, superagent = require('superagent')
, express = require('express');
var url = 'http://acm.hdu.edu.cn/statistic.php?pid=1000';
var app = express();
app.get('/', function (req, res, next) {
superagent.get(url)
.end(function (err, pres) {
var html = pres.text;
var $ = cheerio.load(html, {decodeEntities: false});
var ans = $('.r_search_item').eq(0).html();
res.write(ans);
});
});
app.listen(3000, function () {
console.log('app is listening at port 3000');
});
复制代码
代码二ui
var cheerio = require('cheerio')
, superagent = require('superagent')
, express = require('express');
var url = 'http://acm.hdu.edu.cn/statistic.php?pid=1000';
var app = express();
app.get('/', function (req, res, next) {
superagent.get(url)
.end(function (err, pres) {
var html = pres.text;
var $ = cheerio.load(html);
var ans = $('.r_search_item').eq(0).html();
res.write(ans);
});
});
app.listen(3000, function () {
console.log('app is listening at port 3000');
});
复制代码
当为'代码一'时,添加{decodeEntities: false}属性,node端正常,页面乱码 当为'代码一'时,没有{decodeEntities: false}属性,node端乱码,页面正常url