若是直接使用selenium访问淘宝、新浪和知乎这些网址。通常会识别出这是自动化测试工具,会有反制措施。
当开启开发者模式后,就能够绕过他们的检测啦。
几个站模拟登录的套路都是差很少。
麻烦一点的是知乎,总是弹出验证码。这里不提验证码的绕过,真的有大量的登陆获取cookie的需求。链接打码平台应该是个不错的选择...
其实用selenium操做浏览器时尽可能模拟人的操做,就能够减小验证码出现的概率了。
直接上代码吧,注释里会说明逻辑:
淘宝:
from selenium.webdriver.chrome.options import Options from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC URL = 'https://login.taobao.com/' USER = '' PASSWORD = '' chrome_options = Options() #设置 #chrome_options.add_argument('--headless') #浏览器不提供可视化界面 chrome_options.add_argument('--disable-gpu') #规避bug # 设置开发者模式启动,该模式下webdriver属性为正常值 通常反爬比较好的网址都会根据这个反爬 chrome_options.add_experimental_option('excludeSwitches', ['enable-automation']) driver = webdriver.Chrome(chrome_options=chrome_options) #配置设置 wait = WebDriverWait(driver, 10) #超时时长为10s driver.get(URL) #请求网址 #选择密码登陆 login_click = wait.until(EC.presence_of_element_located((By.XPATH, '//i[@class="iconfont static"]'))) login_click.click() #选择微博登陆 weibo_click = wait.until(EC.presence_of_element_located((By.XPATH, '//a[@class="weibo-login"]'))) weibo_click.click() #等待微博帐号输入框出现 weibo_user = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.username > .W_input'))) weibo_user.send_keys(USER) #等待微博密码输入框出现 weibo_pwd = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.password > .W_input'))) weibo_pwd.send_keys(PASSWORD) #等待登陆按钮出现 submit = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.btn_tip > a > span'))) submit.click() #在搜索框中输入搜索关键字 search_input = wait.until(EC.presence_of_element_located((By.XPATH, '//input[@id="q"]'))) search_input.send_keys('美食') #driver.close()
知乎:html
from selenium.webdriver.chrome.options import Options from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.action_chains import ActionChains import time URL = 'https://www.zhihu.com/signin' USER = '' PASSWORD = '' chrome_options = Options() #设置 #chrome_options.add_argument('--headless') #浏览器不提供可视化界面 chrome_options.add_argument('--disable-gpu') #规避bug # 设置开发者模式启动,该模式下webdriver属性为正常值 通常反爬比较好的网址都会根据这个反爬 chrome_options.add_experimental_option('excludeSwitches', ['enable-automation']) driver = webdriver.Chrome(chrome_options=chrome_options) driver.maximize_window() #全屏打开浏览器 wait = WebDriverWait(driver, 10) #超时时长为10s driver.get(URL) #转到密码登陆 change = wait.until(EC.presence_of_element_located((By.XPATH, '//div[@class="SignFlow-tab"]'))) change.click() # 等待知乎帐号输入框出现 zhihu_user = wait.until(EC.presence_of_element_located((By.XPATH, '//input[@name="username"]'))) zhihu_user.click() time.sleep(1) zhihu_user.send_keys(USER) # 等待知乎密码输入框出现 zhihu_pwd = wait.until(EC.presence_of_element_located((By.XPATH, '//input[@name="password"]'))) zhihu_pwd.click() zhihu_pwd.send_keys(PASSWORD) time.sleep(1.5) #直接点击登陆按钮 ActionChains(driver).move_by_offset(930, 500).click().perform() # 鼠标左键点击, 200为x坐标, 100为y坐标 #driver.close()
新浪微博:web
from selenium.webdriver.chrome.options import Options from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.action_chains import ActionChains import time URL = 'https://weibo.com' USER = '' PASSWORD = '' chrome_options = Options() #设置 #chrome_options.add_argument('--headless') #浏览器不提供可视化界面 chrome_options.add_argument('--disable-gpu') #规避bug # 设置开发者模式启动,该模式下webdriver属性为正常值 通常反爬比较好的网址都会根据这个反爬 chrome_options.add_experimental_option('excludeSwitches', ['enable-automation']) driver = webdriver.Chrome(chrome_options=chrome_options) #配置设置 driver.maximize_window() #全屏打开浏览器 wait = WebDriverWait(driver, 10) #超时时长为10s driver.get(URL) # 等待微博帐号输入框出现 weibo_user = wait.until(EC.presence_of_element_located((By.XPATH, '//input[@id="loginname"]'))) weibo_user.click() # 用来模拟手工点击一下再输入帐号 time.sleep(0.5) # 延时一下,速度太快好像会致使验证码的出现 weibo_user.send_keys(USER) #输入帐号 # 等待微博密码输入框出现 weibo_pwd = wait.until(EC.presence_of_element_located((By.XPATH, '//div[@class="input_wrap"]/input[@name="password"]'))) weibo_pwd.send_keys(PASSWORD) # 直接按坐标点击登陆按钮 ActionChains(driver).move_by_offset(1360, 280).click().perform() # 鼠标左键点击 #driver.close()
参考:http://www.javashuo.com/article/p-nqhdjcbq-t.html
The end~chrome