###迷迷糊糊的开端~~ 1.爬取百度贴吧的小旅途html
from urllib.request import Request,urlopen from urllib import parse # 保存页面 def save_page(filename, data): print("正在保存:",filename) with open(filename, 'wb') as f: f.write(data) # 下载 def download_page(full_pn): print("正在下载:", full_pn) headers = { "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36" } request = Request(full_pn, headers=headers) res = urlopen(request) return res.read() #bytes类型 #拼接页面函数 def baidu_spider(url,start_page,end_page): for page in range(start_page,end_page + 1): print(page) pn = (page - 1)*20 full_url = url + "&pn=" + str(pn) print(full_url) #请求路径 html = download_page(full_url) filename = "第" + str(page) + "页.html" #保存请求到的数据 save_page(filename,html) #定义个一个main函数 def main(): kw = int(input("请输入要爬去的内容:")) start_page = int(input("请输入首页:")) end_page = int(input("请输入末尾页:")) kw = {"kw":kw} #字典 kw = parse.urlencode(kw) #编码 url = "https://tieba.baidu.com/f?" + kw + "&ie=utf-8" print(url) baidu_spider = (url,start_page,end_page) if __name__ == "__main__": main() 原文连接: [https://mp.csdn.net/mdeditor/80897296](https://mp.csdn.net/mdeditor/80897296)