from urllib import parse url = 'https://docs.python.org/3.5/search.html?q=parse&check_keywords=yes&area=default' parseResult = parse.urlparse(url) print(parseResult) # ParseResult(scheme='https', netloc='docs.python.org', path='/3.5/search.html', params='', query='q=parse&check_keywords=yes&area=default', fragment='') param_dict = parse.parse_qs(parseResult.query) print(param_dict) # {'q': ['parse'], 'check_keywords': ['yes'], 'area': ['default']} q = param_dict['q'][0] print(q) 'parse' # # 注意:加号会被解码,可能有时并非咱们想要的 d = parse.parse_qs('proxy=183.222.102.178:8080&task=XXXXX|5-3+2') print(d)
from urllib import parse query = {"name": "walker", "age": 99} d = parse.urlencode(query) print(d) # name=walker&age=99
from urllib import parse d = parse.quote('a&b/c') #未编码斜线 print(d) # a%26b/c d1 = parse.quote_plus('a&b/c') #编码了斜线 print(d1) # a%26b%2Fc
from urllib import parse d = parse.unquote('1+2') # 不解码加号 print(d) # 1+2 d1 = parse.unquote_plus('1+2') # 把加号解码为空格 print(d1) # 1 2
from urllib import parse def qs(url): query = parse.urlparse(url).query return dict([(k, v[0]) for k, v in parse.parse_qs(query).items()]) print(qs('http://url/api?param=2¶m2=4')) # {'param': '2', 'param2': '4'}
from urllib import parse def url_add_params(url, **params): pr = parse.urlparse(url) query = dict(parse.parse_qsl(pr.query)) query.update(params) pr_list = list(pr) pr_list[4] = parse.urlencode(query) return parse.ParseResult(*pr_list).geturl() if __name__ == "__main__": url = 'http://bbs.163.com/viewthread.php' data = {"name": "hero", "111": "222"} print(url_add_params(url, **data)) # result : http://bbs.163.com/viewthread.php?111=222&name=hero
from urllib import parse d = parse.urljoin('http://www.oschina.com/tieba', 'index.php') print(d) # http://www.oschina.com/index.php d1 = parse.urljoin('http://www.oschina.com/tieba/', 'index.php') print(d1) # http://www.oschina.com/tieba/index.php
urlsplit和urlparse差很少,不过它不切分URL的参数。适用于遵循RFC2396的URL,每一个路径段都支持参数。这样返回的元组就只有5个元素.php
from urllib import parse url = parse.urlsplit('http://www.baidu.com/index.php?username=guol') print(url) # SplitResult(scheme='http', netloc='www.baidu.com', path='/index.php', query='username=guol', fragment='')
使用urlsplit的格式组合成一个url,传递的元素必须是5个,或者直接将分解的元组从新组合html
from urllib import parse d = parse.urlunsplit(("https", "i.cnblogs.com", "EditPosts.aspx", "a=a", "b=b")) print(d) # https://i.cnblogs.com/EditPosts.aspx?a=a#b=b
urlparse(将url解析为组件,url必须以http://开头)python
from urllib import parse d = parse.urlparse("https://i.cnblogs.com/EditPosts.aspx?opt=1") print(d) # ParseResult(scheme='https', netloc='i.cnblogs.com', path='/EditPosts.aspx', params='', query='opt=1', fragment='')
使用urlparse的格式组合成一个url,能够直接将urlparse的返回传递组合api
from urllib import parse data = parse.urlparse("https://i.cnblogs.com:80/EditPosts.aspx?opt=1") print(parse.urlunparse(data)) # https://i.cnblogs.com:80/EditPosts.aspx?opt=1