载入爬虫模块html
from requests_html import HTMLSession #载入爬虫模块
建立session对象python
from requests_html import HTMLSession #载入爬虫模块 session =HTMLSession() #建立完毕
得到发现百度图片搜索规律并发起请求并匹配到图片的urlsession
http://image.baidu.com/search/index?tn=baiduimage&fm=result&ie=utf-8&word=`咱们搜图片的关键字`并发
from requests_html import HTMLSession #载入爬虫模块 session =HTMLSession() #建立完毕 #拿二傻子为了 response = session.get('http://image.baidu.com/search/index?tn=baiduimage&fm=result&ie=utf-8&word=二傻子') #获取咱们图片的url的正则匹配格式 img_url_regex = '"thumbURL":"{}",' #解析并获取图片url_list img_url_list = response.html.search_all(img_url_regex)
访问图片url而且保存下来python爬虫
from requests_html import HTMLSession #载入爬虫模块 session =HTMLSession() #建立完毕 #拿二傻子为了 response = session.get('http://image.baidu.com/search/index?tn=baiduimage&fm=result&ie=utf-8&word=二傻子') #获取咱们图片的url的正则匹配格式 img_url_regex = '"thumbURL":"{}",' #解析并获取图片url_list img_url_list = response.html.search_all(img_url_regex) mun=0 for url in img_url_list: mun+=1 #访问图片连接 response= session.get(url[0]) #保存二进制并保存至本地 with open(f'第{mun}张.jpg','wb') as fw: fw.write(response.content)
类的封装ui
from requests_html import HTMLSession class BaiDuImg: session = HTMLSession() img_url_regex = '"thumbURL":"{}",' url='' img_url_list =[] def get_search(self): search=input() self.url=f'http://image.baidu.com/search/index?tn=baiduimage&fm=result&ie=utf-8&word={search}' def get_img_url_list(self): response=self.session.get(self.url) self.img_url_list = response.html.search_all(img_url_regex) def save_img(self): mun = 0 for url in self.img_url_list: mun += 1 # 访问图片连接 response = self.session.get(url[0]) # 保存二进制并保存至本地 with open(f'第{mun}张.jpg', 'wb') as fw: fw.write(response.content) def run(self): self.get_search() self.get_img_url_list() self.save_img() if __name__ == '__main__': baidu=BaiDuImg() baidu.run()