1.学习pythonpython
urlopen(url, data, timeout)json
urllib2.Request()浏览器
urllib.urlencode()服务器
params = {} get : url + "?" + paramscookie
import urllib import urllib2 url = 'http://www.server.com/login' user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' values = {'username' : 'cqc', 'password' : 'XXXX' } headers = { 'User-Agent' : user_agent } data = urllib.urlencode(values) request = urllib2.Request(url, data, headers) response = urllib2.urlopen(request) page = response.read()
对付”反盗链”的方式,对付防盗链,服务器会识别headers中的referer是否是它本身,若是不是,有的服务器不会响应,因此咱们还能够在headers中加入refererapp
代理服务器设置学习
import urllib2 enable_proxy = True proxy_handler = urllib2.ProxyHandler({"http" : 'http://some-proxy.com:8080'}) null_proxy_handler = urllib2.ProxyHandler({}) if enable_proxy: opener = urllib2.build_opener(proxy_handler) else: opener = urllib2.build_opener(null_proxy_handler) urllib2.install_opener(opener)
import urllib2 httpHandler = urllib2.HTTPHandler(debuglevel=1) httpsHandler = urllib2.HTTPSHandler(debuglevel=1) opener = urllib2.build_opener(httpHandler, httpsHandler) urllib2.install_opener(opener) response = urllib2.urlopen('http://www.baidu.com')
import urllib2 import cookielib #声明一个CookieJar对象实例来保存cookie cookie = cookielib.CookieJar() #利用urllib2库的HTTPCookieProcessor对象来建立cookie处理器 handler=urllib2.HTTPCookieProcessor(cookie) #经过handler来构建opener opener = urllib2.build_opener(handler) #此处的open方法同urllib2的urlopen方法,也能够传入request response = opener.open('http://www.baidu.com') for item in cookie: print 'Name = '+item.name print 'Value = '+item.value
import cookielib import urllib2 #设置保存cookie的文件,同级目录下的cookie.txt filename = 'cookie.txt' #声明一个MozillaCookieJar对象实例来保存cookie,以后写入文件 cookie = cookielib.MozillaCookieJar(filename) #利用urllib2库的HTTPCookieProcessor对象来建立cookie处理器 handler = urllib2.HTTPCookieProcessor(cookie) #经过handler来构建opener opener = urllib2.build_opener(handler) #建立一个请求,原理同urllib2的urlopen response = opener.open("http://www.baidu.com") #保存cookie到文件 cookie.save(ignore_discard=True, ignore_expires=True)
import cookielib import urllib2 #建立MozillaCookieJar实例对象 cookie = cookielib.MozillaCookieJar() #从文件中读取cookie内容到变量 cookie.load('cookie.txt', ignore_discard=True, ignore_expires=True) #建立请求的request req = urllib2.Request("http://www.baidu.com") #利用urllib2的build_opener方法建立一个opener opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie)) response = opener.open(req) print response.read()
遍历文档树ui
搜索文档树url