Python 抓取图片例子实测

时间 2019-11-12

原文原文链接

# -*- coding: utf-8 -*-
import urllib
import re
import time
import os
import socket
#显示下载进度
def schedule(a,b,c):
# a:已经下载的数据块
# b:数据块的大小
# c:远程文件的大小
per = 100.0 * a * b / c
if per > 100 :
per = 100
print '%.2f%%' % per

def getHtml(url):
socket.setdefaulttimeout(5)
page = urllib.urlopen(url)
html = page.read()
return html

def downloadImg(html,prei):
reg = r'src="(.+?\.jpg)"'
imgre = re.compile(reg)
imglist = re.findall(imgre, html)
#定义文件夹的名字
t = time.localtime(time.time())
foldername = prei + str(t.__getattribute__("tm_year"))+"-"+str(t.__getattribute__("tm_mon"))+"-"+str(t.__getattribute__("tm_mday"))
picpath = '/Users/chenbing/Documents/workspace/python/jpg/%s' % (foldername) #下载到的本地目录

if not os.path.exists(picpath): #路径不存在时建立一个
os.makedirs(picpath)
x = 0
for imgurl in imglist:
target = picpath+'\\%s.jpg' % x
print 'Downloading image to location: ' + target + '\nurl=' + imgurl
image = urllib.urlretrieve(imgurl, target, schedule)
x += 1
return image;



if __name__ == '__main__':
print ''''' start .........'''
for i in range(100000, 999999):
   strs = "http://tieba.baidu.com/p/5039"+str(i)
   try:
       html = getHtml(strs)
       downloadImg(html,str(i))
   except Exception, e:
       print strs+" error "html

print "Download has finished."
python

Python 抓取图片例子 实测

Python 抓取图片例子实测