目录python
# 普通爬取 from time import sleep import time from multiprocessing.dummy import Pool def request(url): print('正在下载:', url) sleep(2) print('下载成功:', url) start = time.time() urls = [ 'www.baidu.com', 'www.taobao.com', 'www.sogou.com' ] for url in urls: request(url) print('总耗时:', time.time()-start) 正在下载: www.baidu.com 下载成功: www.baidu.com 正在下载: www.taobao.com 下载成功: www.taobao.com 正在下载: www.sogou.com 下载成功: www.sogou.com 总耗时: 6.003343343734741
# 使用线程池 from time import sleep import time from multiprocessing.dummy import Pool def request(url): print('正在下载:', url) sleep(2) print('下载成功:', url) start = time.time() urls = [ 'www.baidu.com', 'www.taobao.com', 'www.sogou.com' ] pool = Pool(3) pool.map(request, urls) print('总耗时', time.time()-start)
# 基本使用 import asyncio async def hello(name): print('hello to:', name) # 获取一个协程对象 c = hello('konghui') # 建立一个事件循环对象 loop = asyncio.get_event_loop() # 将协程对象注册到事件循环中, 而后启动事件循环 loop.run_until_complete(c) hello to: konghui
# task的使用 import asyncio async def hello(name): print('hello to:', name) c = hello('konghui') loop = asyncio.get_event_loop() # 就协程进行进一步的封装, 封装到了task对象中 task = loop.create_task(c) print(task) loop.run_until_complete(task) print(task) <Task pending coro=<hello() running at <ipython-input-11-91468b72d8d0>:5>> hello to: konghui <Task finished coro=<hello() done, defined at <ipython-input-11-91468b72d8d0>:5> result=None>
# future的使用 import asyncio async def hello(name): print('hello to:', name) c = hello('konghui') future = asyncio.ensure_future(c) loop.run_until_complete(future) hello to: konghui
# 绑定回调(task) def callback(task): print('i am callback', task.result()) import asyncio async def hello(name): print('hello to:', name) return name c = hello('konghui') task = asyncio.ensure_future(c) # 给任务对象绑定一个回调函数 task.add_done_callback(callback) loop.run_until_complete(task) hello to: konghui i am callback konghui 'konghui'
import asyncio async def request(url): print('正在下载:',url) sleep(2) #非异步模块的代码:在此处若是存在非异步操做代码,则会完全让asyncio失去异步的效果 print('下载成功:',url) urls = [ 'www.baidu.com', 'www.taobao.com', 'www.sogou.com' ] start = time.time() loop = asyncio.get_event_loop() tasks = [] #任务列表,放置多个任务对象 for url in urls: c = request(url) task = asyncio.ensure_future(c) tasks.append(task) #将多个任务对象对应的列表注册到事件循环中 loop.run_until_complete(asyncio.wait(tasks)) print('总耗时:',time.time()-start) 正在下载: www.baidu.com 下载成功: www.baidu.com 正在下载: www.taobao.com 下载成功: www.taobao.com 正在下载: www.sogou.com 下载成功: www.sogou.com 总耗时: 6.006343603134155
import asyncio async def request(url): print('正在下载:',url) # sleep(2) #非异步模块的代码:在此处若是存在非异步操做代码,则会完全让asyncio失去异步的效果 await asyncio.sleep(2) print('下载成功:',url) urls = [ 'www.baidu.com', 'www.taobao.com', 'www.sogou.com' ] start = time.time() loop = asyncio.get_event_loop() tasks = [] #任务列表,放置多个任务对象 for url in urls: c = request(url) task = asyncio.ensure_future(c) tasks.append(task) #将多个任务对象对应的列表注册到事件循环中 loop.run_until_complete(asyncio.wait(tasks)) print('总耗时:',time.time()-start) 正在下载: www.baidu.com 正在下载: www.taobao.com 正在下载: www.sogou.com 下载成功: www.baidu.com 下载成功: www.taobao.com 下载成功: www.sogou.com 总耗时: 2.0051145553588867
import aiohttp import asyncio async def get_page(url): async with aiohttp.ClientSession() as session: async with await session.get(url=url) as response: page_text = await response.text() #read() json() print(page_text) start = time.time() urls = [ 'http://127.0.0.1:5000/bobo', 'http://127.0.0.1:5000/jay', 'http://127.0.0.1:5000/tom', 'http://127.0.0.1:5000/bobo', 'http://127.0.0.1:5000/jay', 'http://127.0.0.1:5000/tom', 'http://127.0.0.1:5000/bobo', 'http://127.0.0.1:5000/jay', 'http://127.0.0.1:5000/tom' ] tasks = [] loop = asyncio.get_event_loop() for url in urls: c = get_page(url) task = asyncio.ensure_future(c) tasks.append(task) loop.run_until_complete(asyncio.wait(tasks)) print('总耗时:',time.time()-start) Hello bobo Hello jay Hello tom Hello jay Hello bobo Hello tom Hello bobo Hello tom Hello jay 总耗时: 2.031116008758545