urllib2和urllib是python两个内置的模块,要实现HTTP功能,实现方式是以urllib2为主,urllib为辅html
import urllib2 response = urllib2.urlopen('http://www.cnblogs.com/guguobao') html = response.read() print html
#!coding:utf-8 import urllib2 #请求 request = urllib2.Request('http://www.cnblogs.com/guguobao') #响应 response = urllib2.urlopen(request) html = response.read() print html
#!coding:utf-8 import urllib import urllib2 url = 'http://www.cnblogs.com/login' postdata = {'username' : 'qiye', 'password' : 'qiye_pass'} #info 须要被编码为urllib2能理解的格式,这里用到的是urllib data = urllib.urlencode(postdata) req = urllib2.Request(url, data) response = urllib2.urlopen(req) html = response.read()
把上面的列子添加User-Agent域和Referer域信息python
#coding:utf-8 #请求头headers处理:设置一下请求头中的User-Agent域和Referer域信息 import urllib import urllib2 url = 'http://www.xxxxxx.com/login' user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' referer='http://www.xxxxxx.com/' postdata = {'username' : 'qiye', 'password' : 'qiye_pass'} # 将user_agent,referer写入头信息 headers={'User-Agent':user_agent,'Referer':referer} data = urllib.urlencode(postdata) req = urllib2.Request(url, data,headers) response = urllib2.urlopen(req) html = response.read()
import urllib2,cookielib cookie = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie)) response = opener.open('http://www.zhihu.com') for item in cookie: print item.name+':'+item.name
import urllib2,cookielib opener = urllib2.build_opener() opener.addheaders.append(('Cookie','email='+'helloguguobao@gmail.com'))#Cookie和email替换什么值均可以,但不能没有 req = urllib2.Request('http://www.zhihu.com') response = opener.open(req) print response.headers retdata = response.read()
import urllib2 request=urllib2.Request('http://www.zhihu.com') response = urllib2.urlopen(request,timeout=2) html=response.read() print html
import urllib2 try: response = urllib2.urlopen('http://www.google.com') print response except urllib2.HTTPError as e: if hasattr(e, 'code'): print 'Error code:',e.code
import urllib2 response = urllib2.urlopen('http://www.zhihu.cn') isRedirected = response.geturl() == 'http://www.zhihu.cn'
import urllib2 class RedirectHandler(urllib2.HTTPRedirectHandler): def http_error_301(self, req, fp, code, msg, headers): pass def http_error_302(self, req, fp, code, msg, headers): result =urllib2.HTTPRedirectHandler.http_error_301(self,req,fp,code,msg,headers) result.status =code result.newurl = result.geturl() return result opener = urllib2.build_opener(RedirectHandler) opener.open('http://www.zhihu.cn')
import urllib2 proxy = urllib2.ProxyHandler({'http': '127.0.0.1:1080'})# 运行时须要把socketsocks关闭系统代理。并使用1080端口,或者直接退出socketsocks软件 opener = urllib2.build_opener(proxy) urllib2.install_opener(opener) response = urllib2.urlopen('http://www.zhihu.com/') print response.read()
这里要注意一个细节,使用urllib2.install_opener()会设置urllib2的全局opener,以后,全部的HTTP访问都会使用这个代理,这样很方便,可是,想在程序中使用两个不一样的代理,就不能使用install_opener去更改全局的设置,而是直接调用urllib2.open()浏览器
import urllib2 proxy = urllib2.ProxyHandler({'http': '127.0.0.1:1080'}) opener = urllib2.build_opener(proxy,) response = opener.open("http://www.google.com/") print response.read()
运行时须要把socketsocks关闭系统代理。服务器