Python爬取百度贴吧

from urllib import request,parseimport os#找到借口及关键字base_url = 'http://tieba.baidu.com/f?'a = input("贴吧")start = input("开始页")end = input("结束页")#建立文件路径if not os.path.exists(a):    os.mkdir(a)qs = {    "kw":a}#将qs转变为电脑识别的语言qs = parse.urlencode(qs)for page in range(int(start),int(end)+1):    pn = (page-1)*50  #拼接成贴吧接口    url = base_url+qs+'&pn='+str(pn)    print("downing...第%d页"%page)    response = request.urlopen(url)    html = response.read().decode("utf-8")    with open(a+'/'+str(page)+'.html','w',encoding="utf-8") as f:        f.write(html)print("下载完成")#好了再你的ide环境下运行下就能够随便找到你想要的帖子了
相关文章
相关标签/搜索