爬虫cheerio乱码问题

如今乱码问题是php

res.write(爬到的内容),在页面上显示是正常的html

可是直接在node端打印console.log(爬到的内容)是乱码 node

调整乱码问题的关键属性是cheerio.load中的{decodeEntities:false}属性express

代码一app

var cheerio = require('cheerio')
  , superagent = require('superagent')
  , express = require('express');

var url = 'http://acm.hdu.edu.cn/statistic.php?pid=1000';
var app = express();

app.get('/', function (req, res, next) {

  superagent.get(url)
.end(function (err, pres) {
  var html = pres.text;
  var $ = cheerio.load(html, {decodeEntities: false});
  var ans = $('.r_search_item').eq(0).html();
  res.write(ans);
});
  });

app.listen(3000, function () {
  console.log('app is listening at port 3000');
});
复制代码

代码二ui

var cheerio = require('cheerio')
  , superagent = require('superagent')
  , express = require('express');

var url = 'http://acm.hdu.edu.cn/statistic.php?pid=1000';
var app = express();

app.get('/', function (req, res, next) {

  superagent.get(url)
.end(function (err, pres) {
  var html = pres.text;
  var $ = cheerio.load(html);
  var ans = $('.r_search_item').eq(0).html();
  res.write(ans);
});
  });

app.listen(3000, function () {
  console.log('app is listening at port 3000');
});
复制代码

总结

当为'代码一'时,添加{decodeEntities: false}属性,node端正常,页面乱码 当为'代码一'时,没有{decodeEntities: false}属性,node端乱码,页面正常url

解决方案

www.tuicool.com/articles/6R…spa