我分析王者荣耀网站上面的英雄资料库发现全部英雄的页面基本上都是连贯的,而且仍是偏向与静态网页没有过多的JavaScript渲染,因此比较适合来一波穷举遍历爬虫,可是速度也很快。利用python语法字符串操做,列表操做,字典操做等等循环遍历英雄页面利用python列表切片索引和字符串拼接等等方法来解析HTML获得皮肤图片连接和背景故事文本,将皮肤图片二进制文件保存为png高清大图,将全部英雄背景故事合并保存为文本文档html
#!/usr/bin/env python # -*- encoding: utf-8 -*- import requests fo = open('./wzry-jpg/wzry-pf.txt', 'w') path = './wzry-jpg/' ls = [] for i in range(105,200): url = "http://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/{0}/{0}-bigskin-1.jpg".format(i) status_code = requests.request('get', url).status_code if status_code != 200: continue for j in range(1, 9): imgurl = url[:-5] + '{}.jpg'.format(j) response = requests.request('get', imgurl) if response.status_code != 200: continue ls.append(imgurl) ##下载 print("开始下载第{}-{}个英雄皮肤图片>>>".format(i,j), end='') with open(path+str(i)+'-'+str(j)+'.png', 'wb') as f: f.write(response.content) print('======下载完成======') for i in range(501,516): url = "http://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/{0}/{0}-bigskin-1.jpg".format(i) status_code = requests.request('get', url).status_code if status_code != 200: continue for j in range(1, 9): imgurl = url[:-5] + '{}.jpg'.format(j) response = requests.request('get', imgurl) if response.status_code != 200: continue ls.append(imgurl) ##下载 print("开始下载第{}-{}个英雄皮肤图片>>>".format(i,j), end='') with open(path+str(i)+'-'+str(j)+'.png', 'wb') as f: f.write(response.content) print('======下载完成======') ##print(ls) ##链接写入txt文件 for line in ls: fo.write(line+'\n') fo.close()
#!/usr/bin/env python # -*- encoding: utf-8 -*- import requests fo = open('R:/python123全国等考/wzry-jpg/pop-bd.txt', 'w') ls = [] for i in range(105,200): url = "https://pvp.qq.com/web201605/herodetail/{}.shtml".format(i) r = requests.request('get', url) if r.status_code != 200: continue r.encoding = 'gbk' name = r.text.split('cname')[1].split(',')[0].split("'")[1] text = r.text.split('pop-bd')[1].split('p>')[1][:-2] print("开始保存第{}个英雄背景故事>>>".format(i)) fo.write('【'+name+'】'+'\n'+text+'\n'+'-'*30) for i in range(501,516): url = "https://pvp.qq.com/web201605/herodetail/{}.shtml".format(i) r = requests.request('get', url) if r.status_code != 200: continue r.encoding = 'gbk' name = r.text.split('cname')[1].split(',')[0].split("'")[1] text = r.text.split('pop-bd')[1].split('p>')[1][:-2] print("开始保存第{}个英雄背景故事>>>".format(i)) fo.write('【'+name+'】'+'\n'+text+'\n'+'-'*30) ##链接写入txt文件 fo.close()