Python 爬虫:煎蛋网妹子图

使用 Headless Chrome 替代了 PhatomJS。html

图片保存到指定文件夹中。web

 1 import requests  2 from bs4 import BeautifulSoup  3 from selenium import webdriver  4 from selenium.webdriver.chrome.options import Options  5 
 6 chrome_options = Options()  7 chrome_options.add_argument('--headless')  8 chrome_options.add_argument('--disable-gpu')  9 driver = webdriver.Chrome(chrome_options=chrome_options) 10 dir = 'C:/spider-download/jandan-girls/'
11 img_urls = [] 12 page_urls = ["http://jandan.net/ooxx/page-{}#comments".format(str(i)) for i in range(5, 6)] 13 
14 def GetImgUrl(u): 15  driver.get(u) 16     html = driver.page_source 17     soup = BeautifulSoup(html, 'lxml') 18     images = soup.select('a.view_img_link') 19     for i in images: 20         t = i.get('href') 21         if str('gif') in str(t): 22             pass
23         else: 24             img_url = 'http:' + t 25  img_urls.append(img_url) 26 
27 def DownloadImg(): 28     n = 1
29     for i in img_urls: 30         print('' + str(n) + ' 张 ... ', end='') 31         with open(dir + i[-20:], 'wb') as f: 32  f.write(requests.get(i).content) 33         print('OK!') 34         n = n + 1
35 
36 for u in page_urls: 37  GetImgUrl(u) 38 print('*** 开始下载 ***') 39 DownloadImg() 40 print('*** 下载完成 ***')
相关文章
相关标签/搜索
本站公众号
   欢迎关注本站公众号,获取更多信息