Python 抓取图片例子 实测

# -*- coding: utf-8 -*-  
import urllib  
import re  
import time  
import os  
import socket  
#显示下载进度  
def schedule(a,b,c):  
#    a:已经下载的数据块 
#    b:数据块的大小 
#    c:远程文件的大小 
    per = 100.0 * a * b / c  
    if per > 100 :  
        per = 100  
    print '%.2f%%' % per  
  
def getHtml(url):  
    socket.setdefaulttimeout(5)
    page = urllib.urlopen(url)  
    html = page.read()  
    return html  
  
def downloadImg(html,prei):  
    reg = r'src="(.+?\.jpg)"'  
    imgre = re.compile(reg)  
    imglist = re.findall(imgre, html)  
    #定义文件夹的名字  
    t = time.localtime(time.time())  
    foldername = prei + str(t.__getattribute__("tm_year"))+"-"+str(t.__getattribute__("tm_mon"))+"-"+str(t.__getattribute__("tm_mday"))  
    picpath = '/Users/chenbing/Documents/workspace/python/jpg/%s' % (foldername) #下载到的本地目录  
      
    if not os.path.exists(picpath):   #路径不存在时建立一个  
        os.makedirs(picpath)     
    x = 0  
    for imgurl in imglist:  
        target = picpath+'\\%s.jpg' % x  
        print 'Downloading image to location: ' + target + '\nurl=' + imgurl  
        image = urllib.urlretrieve(imgurl, target, schedule)  
        x += 1  
    return image;  
  
      
      
if __name__ == '__main__':  
    print '''''  start  .........'''  
    for i in range(100000, 999999):
    strs = "http://tieba.baidu.com/p/5039"+str(i)
    try:
        html = getHtml(strs)
        downloadImg(html,str(i))  
    except Exception, e:
        print strs+" error "html

    print "Download has finished."  
 python

相关文章
相关标签/搜索