from urllib import request def f(url): print("GET:%s"% url) # 实例化 resp = request.urlopen(url) # data就是下载的网页 data = resp.read() # 打开url下载到本地 f = open("url.html","wb") f.write(data) f.close() print('%d bytes received from %s.' % (len(data), url)) # 须要爬的网页 f("http://www.cnblogs.com/alex3714/articles/5248247.html")
import gevent,time from urllib import request from gevent import monkey # 把当前程序的全部的io操做给我单独的坐上标记 # 至关于gevent.sleep monkey.patch_all() def f(url): print("GET:%s"% url) # 实例化网页捕获 resp = request.urlopen(url) # data就是下载的网页 data = resp.read() print('%d bytes received from %s.' % (len(data), url)) #---------------------------串行------------------------------# # 建立列表 urls = ['https://www.python.org/', 'https://www.yahoo.com/', 'https://github.com/' ] # 获取同步时间 time_start = time.time() # 循环打印网页 for url in urls: f(url) print("同步cost",time.time() - time_start) #--------------------------并行--------------------------------# # 获取异步时间 async_time_start = time.time() # 执行协程 gevent.joinall([ # 生成三个协程,执行f函数,参数。 gevent.spawn(f, 'https://www.python.org/'), gevent.spawn(f, 'https://www.yahoo.com/'), gevent.spawn(f, 'https://github.com/'), ]) print("异步cost",time.time() - async_time_start)