一、urllib代理设置javascript
1 from urllib.error import URLError 2 from urllib.request import ProxyHandler 3 from urllib.request import build_opener 4 5 # 经过ProxyHandler来设置代理服务器,参数为字典类型,键名为协议,键值为代理 6 proxy_handler = ProxyHandler({"http": "http://113.120.33.75:9999", 7 "https":"https://120.83.99.72:9999"}) 8 9 # 建立一个opener对象,比openurl多了一个header 10 opener = build_opener(proxy_handler) 11 try: 12 # 经过opener对象打开url 13 response = opener.open("http://httpbin.org/get") 14 print(response.read().decode("utf-8")) 15 except URLError as e: 16 print(e.reason)
结果会出现两种状况html
[WinError 10061] 因为目标计算机积极拒绝,没法链接。 java
解决方法:主要是代理不可用,更换代理就行python
[WinError 10060] 因为链接方在一段时间后没有正确答复或链接的主机没有反应,链接尝试失败。 web
解决方法:将浏览器的代理设置中的局域网设置,里面的自动配置脚本选项改成自动检测设置便可。chrome
运行结果以下,能够看到origin已经改为了代理IPwindows
{ "args": {}, "headers": { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3", "Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.9", "Host": "httpbin.org", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36" }, "origin": "113.120.33.75, 113.120.33.75", "url": "https://httpbin.org/get" }
二、requests代理设置浏览器
1 import requests 2 3 # 设置代理 4 proxies = {"http":"http://182.92.113.183:8118", 5 "https":"https://120.83.99.72:9999"} 6 try: 7 # 请求url连接 8 response = requests.get("http://httpbin.org/get",proxies=proxies) 9 10 # 输出文本内容 11 print(response.text) 12 except requests.exceptions.ConnectionError as e: 13 print(e.args)
运行结果以下,origin已经更改成代理IP了,显然比urllib简单多了,且不用建立opener对象缓存
{ "args": {}, "headers": { "Accept": "*/*", "Accept-Encoding": "gzip, deflate", "Host": "httpbin.org", "User-Agent": "python-requests/2.21.0" }, "origin": "182.92.113.183, 182.92.113.183", "url": "https://httpbin.org/get" }
三、selenium代理设置服务器
1 import time 2 from selenium import webdriver 3 4 # 代理IP地址 5 proxy = "182.92.113.183:8118" 6 7 # url连接 8 url = "http://httpbin.org/get" 9 10 # 启动谷歌控制选项,以便添加代理 11 chrom_options = webdriver.ChromeOptions() 12 13 # 设置代理,注意“=”号两边不能有空格 14 chrom_options.add_argument("--proxy-server=http://" + proxy) 15 16 # 模拟谷歌浏览器,并经过chrome_options参数传递代理 17 browser = webdriver.Chrome(executable_path="D:\chromedriver.exe",chrome_options=chrom_options) 18 19 # 打开url连接 20 browser.get(url=url) 21 time.sleep(10) 22 23 # 退出并清除浏览器缓存 24 browser.quit()
运行结果以下,能够看到origin已经更改成代理IP了
{ "args": {}, "headers": { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3", "Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.9", "Host": "httpbin.org", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36" }, "origin": "182.92.113.183, 182.92.113.183", "url": "https://httpbin.org/get" }
四、PhantomJS代理设置(新版本selenium已经弃用,使用谷歌无界面浏览器)
from selenium import webdriver """service_args = [ ‘--proxy=%s‘ % ip_html, # 代理 IP:prot (eg:192.168.0.28:808) ‘--proxy-type=http’, # 代理类型:http/https ‘--load-images=no’, # 关闭图片加载(可选) ‘--disk-cache=yes’, # 开启缓存(可选) ‘--ignore-ssl-errors=true’ # 忽略https错误(可选) ]""" # url连接 url = "http://httpbin.org/get" service_args = ["--proxy=121.233.206.44:9999", # 代理IP "--proxy-type=http"] # 代理协议类型http/HTTPS # 启用PhantomJS无界面浏览器,并传递参数为代理IP browser = webdriver.PhantomJS(executable_path=r"D:\phantomjs-2.1.1-windows\bin\phantomjs.exe",service_args=service_args) browser.get(url=url) print(browser.page_source)
运行结果出乎意料的是最新版本的selenium不在支持PhantomJS了,让咱们使用谷歌或者火狐的无头浏览器
UserWarning: Selenium support for PhantomJS has been deprecated, please use headless versions of Chrome or Firefox instead warnings.warn('Selenium support for PhantomJS has been deprecated, please use headless '
因为个人浏览器是谷歌浏览器,因此我就使用谷歌的无头浏览器进行IP代理,
from selenium import webdriver from selenium.webdriver.chrome.options import Options # 建立谷歌浏览器模拟设置对象 chrome_options = Options() proxy = "182.92.113.183:8118" # 代理IP url = "http://httpbin.org/get" # url连接 # 设置谷歌浏览器无界面模式,即浏览器不停可视化页面 chrome_options.add_argument("--headless") # 禁用使用GPU加速 chrome_options.add_argument("--disable-gpu") # 设置语言 chrome_options.add_argument("-lang=zh-cn") # 中文 # chrome_options.add_argument("-lang=en-GB") # 英文 # 设置谷歌浏览器代理IP chrome_options.add_argument("--proxy-server=http://" + proxy) # 指定浏览器分辨率 chrome_options.add_argument("window-size=1920x3000") # 模拟谷歌浏览器,并经过chrome_options参数传递代理IP browser = webdriver.Chrome(chrome_options=chrome_options,executable_path="D:\chromedriver.exe") browser.get(url=url) print(browser.find_element_by_xpath("/html/body/pre").text)
运行结果以下,origin的IP地址已经更改成代理IP
{ "args": {}, "headers": { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3", "Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-cn", "Host": "httpbin.org", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/74.0.3729.169 Safari/537.36" }, "origin": "182.92.113.183, 182.92.113.183", "url": "https://httpbin.org/get" }
最后附上selenium模块add_argument经常使用参数
chrome_options.add_argument('--user-agent=""') # 设置请求头的User-Agent chrome_options.add_argument('--window-size=1280x1024') # 设置浏览器分辨率(窗口大小) chrome_options.add_argument('--start-maximized') # 最大化运行(全屏窗口),不设置,取元素会报错 chrome_options.add_argument('--disable-infobars') # 禁用浏览器正在被自动化程序控制的提示 chrome_options.add_argument('--incognito') # 隐身模式(无痕模式) chrome_options.add_argument('--hide-scrollbars') # 隐藏滚动条, 应对一些特殊页面 chrome_options.add_argument('--disable-javascript') # 禁用javascript chrome_options.add_argument('--blink-settings=imagesEnabled=false') # 不加载图片, 提高速度 chrome_options.add_argument('--headless') # 浏览器不提供可视化页面 chrome_options.add_argument('--ignore-certificate-errors') # 禁用扩展插件并实现窗口最大化 chrome_options.add_argument('--disable-gpu') # 禁用GPU加速 chrome_options.add_argument('--disable-software-rasterizer') chrome_options.add_argument('--disable-extensions') # 禁止扩展 chrome_options.add_argument('--start-maximized') # 启动就最大化 chrome_options.add_argument("--proxy-server=http://xxxxxxx") # 设置IP代理
另外还有其余参数,请参考https://blog.csdn.net/liaojianqiu0115/article/details/78353267