import asyncio import aiohttp async def request(url): print("当前url:",url) #使用aiohttp发起request请求。 async with aiohttp.request("GET",url) as r: #r.read()不变吗,直接读取。返回来是二进制文件 reponse = await r.read() print("返回reponse:",reponse) urls = [ 'https://www.baidu.com', 'https://www.sogou.com', 'https://www.qq.com', ] #任务列表,存放多个任务对象 stasks=[] for url in urls: c = request(url) task = asyncio.ensure_future(c) stasks.append(task) loop = asyncio.get_event_loop() #须要将任务列表封装到wait中 loop.run_until_complete(asyncio.wait(stasks))
#!/usr/bin/env python # -*- coding:utf-8 -*- """ Xu Junkai """ import requests import asyncio import time import aiohttp start_time = time.time() urls = [ 'https://blog.csdn.net/', 'https://www.sogou.com', 'http://www.renren.com/', ] async def get_page(url): print(url) async with aiohttp.ClientSession() as session: async with session.get(url) as res: print(res.status)#获取相应状态码 print(res.charset)#获取网页编码 reponse = await res.text()#获取返回文本 print(reponse) tasks=[] for url in urls: c = get_page(url) task = asyncio.ensure_future(c) tasks.append(task) loop = asyncio.get_event_loop() loop.run_until_complete(asyncio.wait(tasks)) end_time = time.time() print('总耗时:',end_time-start_time)
async with session.put(url,data=b"data")
注意:html
不要为每次的链接都建立一次session,通常状况下只须要建立一个session,而后使用这个session执行全部的请求。 每一个session对象,内部包含了一个链接池,而且将会保持链接和链接复用(默认开启)能够加快总体的性能
import asyncio import time import aiohttp start_time = time.time() urls = [ 'https://blog.csdn.net/', 'https://www.sogou.com', 'http://www.renren.com/', ] data = {"name":"foo"} async def get_page(url,data):#定义函数能够放入多个参数 print(url) async with aiohttp.ClientSession() as session: async with session.get(url,params= data) as res: print(res.status) #获取响应内容(因为获取响应内容是一个阻塞耗时过程,因此咱们使用await实现协程切换) reponse = await res.text() print(reponse) print(res.charset) tasks=[] for url in urls: c = get_page(url,data)#传入参数,但不会执行 task = asyncio.ensure_future(c) tasks.append(task) loop = asyncio.get_event_loop() loop.run_until_complete(asyncio.wait(tasks)) end_time = time.time() print('总耗时:',end_time-start_time)
注意java
当使用res.text(),res.read()获取响应内容(因为获取响应内容是一个阻塞耗时过程,因此咱们使用await实现协程切换) 正确写法 await res.text() await res.read() #获取是字节 await res.json() 能够设置编码,设置处理函数 注意: res.json()为Requests中内置的JSON解码器 其中只有response返回为json格式时,用res.json()打印出响应的内容. 若是response返回不为json格式,使用res.json()会报错
#字节流形式获取数据 import asyncio import aiohttp urls ='https://blog.csdn.net/' async def get_page(url): async with aiohttp.ClientSession() as session: async with session.get(url) as res: #打印100个字节的数据 print(await res.content.read(100)) c = get_page(urls,)#函数对象 task = asyncio.ensure_future(c)#放入ensure_future中 loop = asyncio.get_event_loop()#建立循环事件 loop.run_until_complete(task) #获取100个字节数据
import asyncio import aiohttp urls ='https://blog.csdn.net/' async def get_page(url): async with aiohttp.ClientSession() as session: async with session.get(url) as res: with open("cnds.text","wb") as fp: #循环,100个字节100个字节读取放入文件中 while True: chunk = await res.content.read(100) if not chunk: break fp.write(chunk) c = get_page(urls,) task = asyncio.ensure_future(c) loop = asyncio.get_event_loop() loop.run_until_complete(task)
注意python
async with session.get(url) as res:#异步上下文管理器 with open("cnds.text","wb") as fp:#普通上下文管理器 #由于异步上下文管理器在enter和exit方法处可以暂停执行上下文管理器 #为了实现此功能,加入了2个新方法:__aenter__ 和__aexit__这两个方法都要返回一个 awaitable类型的值。 详见: https://www.jb51.net/article/163540.htm 异步迭代器
#与requests方法同样,headers放User-agent比较多。 async def get_page(url): async with aiohttp.ClientSession() as session: headers = {'Content-Type':'text/html; charset=utf-8'} async with session.get(url,headers=headers) as res: with open("cnds.text","wb") as fp: #循环,100个字节100个字节读取放入文件中 while True: chunk = await res.content.read(100) if not chunk: break fp.write(chunk)
#源码显示 class ClientSession: """First-class interface for making HTTP requests.""" ATTRS = frozenset([ '_source_traceback', '_connector', 'requote_redirect_url', '_loop', '_cookie_jar', '_connector_owner', '_default_auth', '_version', '_json_serialize', '_requote_redirect_url', '_timeout', '_raise_for_status', '_auto_decompress', '_trust_env', '_default_headers', '_skip_auto_headers', '_request_class', '_response_class', '_ws_response_class', '_trace_configs']) _source_traceback = None _connector = None def __init__(self, *, connector: Optional[BaseConnector]=None, loop: Optional[asyncio.AbstractEventLoop]=None, cookies: Optional[LooseCookies]=None, headers: Optional[LooseHeaders]=None, skip_auto_headers: Optional[Iterable[str]]=None, auth: Optional[BasicAuth]=None, json_serialize: JSONEncoder=json.dumps, request_class: Type[ClientRequest]=ClientRequest, response_class: Type[ClientResponse]=ClientResponse, ws_response_class: Type[ClientWebSocketResponse]=ClientWebSocketResponse, # noqa version: HttpVersion=http.HttpVersion11, cookie_jar: Optional[AbstractCookieJar]=None, connector_owner: bool=True, raise_for_status: bool=False, read_timeout: Union[float, object]=sentinel, conn_timeout: Optional[float]=None, timeout: Union[object, ClientTimeout]=sentinel, auto_decompress: bool=True, trust_env: bool=False, requote_redirect_url: bool=True, trace_configs: Optional[List[TraceConfig]]=None) -> None:
cookies = {"cookies":"xxxxxxxxxx"} async with ClientSession(cookies=cookies) as session: ...
res.statusjson
async with session.get(url) as res: print(res.status)
import asyncio import aiohttp async def get_page(url): async with aiohttp.ClientSession() as session: headers = {'Content-Type':'text/html; charset=utf-8'} async with session.get(url,headers=headers) as res: for item,values in res.headers.items(): print(item,"*******",values) c = get_page(urls,) task = asyncio.ensure_future(c) loop = asyncio.get_event_loop() loop.run_until_complete(task)
默认IO操做都有5分钟响应时间,可是时间太长,咱们能够本身设置timeout安全
若是timeout=None或timeout=0将不进行超时检查。也就不限时长。cookie
async with session.get("https://baidu.com",timeout=60) as res: pass
import aiohttp import asyncio async def request(): #设置一个cookies cookies = {"my_cookie":"my_set_cookies"} async with aiohttp.ClientSession(cookies=cookies) as session: async with session.get("https://www.csdn.net/") as res: print(session.cookie_jar.filter_cookies("https://www.csdn.net/nav/python")) print("*******************************************") async with session.get("https://www.csdn.net/") as res: print(session.cookie_jar.filter_cookies("https://www.csdn.net/nav/java")) c = request() task = asyncio.ensure_future(c) loop = asyncio.get_event_loop() loop.run_until_complete(task) #Set-Cookie: dc_session_id=10_1562499942692.566280 #Set-Cookie: my_cookie=my_set_cookies #Set-Cookie: uuid_tt_dd=10_20709428800-1562499942692-906566 #******************************************* #Set-Cookie: dc_session_id=10_1562499942692.566280 #Set-Cookie: my_cookie=my_set_cookies #Set-Cookie: uuid_tt_dd=10_20709428800-1562499942692-906566
最好使用session.cookie_jar.filter_cookies()获取网站cookie,不一样于requests模块,虽然咱们能够使用res.cookies有可能获取到cookie,但彷佛并未获取到全部的cookies。网络
总结session
1.当咱们使用res.cookie时,只会获取到当前url下设置的cookie,不会维护整站的cookie 2.而session.cookie_jar.filter_cookies(url)会一直保留这个网站的全部设置cookies,含有咱们在会话时设置的cookie,而且会根据响应修改更新cookie。这个才是咱们须要的 3.而咱们设置cookie,也是须要在aiohttp.ClientSession(cookies=cookies)中设置 4.ClientSession 还支持 请求头,keep-alive链接和链接池(connection pooling)
默认ClientSession使用的是严格模式的 aiohttp.CookieJar. RFC 2109,明确的禁止接受url和ip地址产生的cookie,只能接受 DNS 解析IP产生的cookie。能够经过设置aiohttp.CookieJar 的 unsafe=True 来配置app
jar = aiohttp.CookieJar(unsafe=True) session = aiohttp.ClientSession(cookie_jar=jar)
TCPConnector维持连接池,限制并行链接的总量,当池满了,有请求退出再加入新请求异步
async def request(): cookies = {"my_cookies":"my_cookies"} #限制并行的数量 conn = aiohttp.TCPConnector(limit=5) async with aiohttp.ClientSession(cookies=cookies,connector=conn) as session: pass c = request() task = asyncio.ensure_future(c) loop = asyncio.get_event_loop() loop.run_until_complete(task)
限制同时打开链接到同一端点的数量,能够经过设置 limit_per_host 参数:
limit_per_host: 同一端点的最大链接数量。同一端点即(host, port, is_ssl)彻底相同状况。 conn = aiohttp.TCPConnector(limit_per_host=30)#默认是0
import asyncio import aiohttp headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36", } def callback(task): #回调函数能够对页面进行解析,这里图省事就打印了 print(len(task.result())) async def res(url): async with aiohttp.request('GET',url,headers=headers)as fp: # response =await fp.read() #因访问3个网站编码方式不一样,统一转码(ISO-8859-1比较全) response = response.decode('iso-8859-1') # 返回给回调好书 return response urls = [ 'https://www.baidu.com', 'https://www.sogou.com', 'https://www.qq.com', ] #proxy="http://some.proxy.com" if __name__ == '__main__': #建立 stasks = [] for url in urls: #建立协程对象 c = res(url) #封装任务对象 task = asyncio.ensure_future(c) #给任务对象绑定回调函数 task.add_done_callback(callback) #添加列表中 stasks.append(task) # 建立一个事件循环对象 loop = asyncio.get_event_loop() #将任务对象列表注册到事件循环对象中而且开启事件循环 loop.run_until_complete(asyncio.wait(stasks))