判断了字符集以后,如要显示中文,须要用print。示例以下:ide
import urllib2url
import respa
page = 1it
url = 'http://www.qiushibaike.com/hot/page/' + str(page)class
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'import
headers = { 'User-Agent' : user_agent }file
out_file = open ("qiushibaike.txt", "w")request
request = urllib2.Request(url,headers = headers)im
response = urllib2.urlopen(request)img
buf=response.read()
out_file.write(buf)
out_file.close()
list_jpg=re.findall(r'http://.+\.jpg', buf)
list_joketxt=re.findall(r'<span>.+</span>', buf)
print buf #输出网页源文件,格式正确,中文显示正常
# list_jpg=re.findall(r'<img.+src=.+\.jpg', buf)
list_jpg=re.findall(r'http://.+\.jpg', buf)
list_joketxt=re.findall(r'<span>.+</span>', buf)
print list_joketxt #显示不正确,中文显示不正常
print list_joketxt[0] #输出正确,中文显示正常
for jok in list_joketxt:
print jok #输出正确,中文显示正常