1.简介。
自动化测试工具,支持多种浏览器,爬虫中主要用来解决JavaScript渲染问题。(ps:要调用游览器必须先下载相应的driver.exe文件并把它放进python的安装目录!)
2.代码。
基本使用php
#!/usr/bin/python # -*- coding: UTF-8 -*- from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait browser = webdriver.Chrome() try: browser.get("http://www.baidu.com") input = browser.find_element_by_id('kw') input.send_keys('Python') input.send_keys(Keys.ENTER) wait = WebDriverWait(browser,10) wait.until(EC.presence_of_all_elements_located((By.ID,'content_left'))) print(browser.current_url) print(browser.get_cookies()) print(browser.page_source) finally: browser.close()
声明浏览器对象和访问页面css
#!/usr/bin/python # -*- coding: UTF-8 -*- from selenium import webdriver browser = webdriver.Chrome() #声明谷歌浏览器对象 browser.get('https://www.taobao.com') print(browser.page_source) #打印源码 browser.close() #关闭浏览器
查找元素 —— 单个元素python
#!/usr/bin/python # -*- coding: UTF-8 -*- from selenium import webdriver browser = webdriver.Chrome() #声明浏览器对象 browser.get('https://www.taobao.com') input_first = browser.find_element_by_id('q') input_second = browser.find_element_by_css_selector('#q') input_third = browser.find_element_by_xpath('//*[@id="q"]') print(input_first,input_second,input_third) browser.close() #关闭浏览器
运行结果:jquery
D:\Anaconda3\python.exe C:/Users/lenovo/PycharmProjects/爬虫/s2.py <selenium.webdriver.remote.webelement.WebElement (session="bd728922e48649f1839dc4b9d5cf6436", element="0.7649982219346745-1")> <selenium.webdriver.remote.webelement.WebElement (session="bd728922e48649f1839dc4b9d5cf6436", element="0.7649982219346745-1")> <selenium.webdriver.remote.webelement.WebElement (session="bd728922e48649f1839dc4b9d5cf6436", element="0.7649982219346745-1")> Process finished with exit code 0
查找元素 —— 多个元素web
#!/usr/bin/python # -*- coding: UTF-8 -*- from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.taobao.com') lis = browser.find_elements_by_css_selector('.service-bd li') print(lis) browser.close()
D:\Anaconda3\python.exe C:/Users/lenovo/PycharmProjects/爬虫/s3.py [<selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-1")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-2")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-3")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-4")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-5")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-6")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-7")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-8")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-9")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-10")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-11")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-12")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-13")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-14")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-15")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-16")>] Process finished with exit code 0
元素交互操做——对获取的元素调用交互方法api
#!/usr/bin/python # -*- coding: UTF-8 -*- import time from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.taobao.com') input = browser.find_element_by_id('q') input.send_keys('iPhone') time.sleep(1) input.clear() input.send_keys('iPad') button = browser.find_element_by_class_name('btn-search') button.click()
交互动做——将动做附加到动做链中串行执行浏览器
#!/usr/bin/python # -*- coding: UTF-8 -*- #实现拖拽 from selenium.webdriver import ActionChains from selenium import webdriver browser = webdriver.Chrome() url = "http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable" browser.get(url) browser.switch_to.frame('iframeResult') #进入到ifrname标签中的id="iframeResult"元素中 source = browser.find_element_by_css_selector('#draggable') #被拖拽对象 target = browser.find_element_by_css_selector('#droppable') #拖拽的目标 actions = ActionChains(browser) #调用函数声明对象 actions.drag_and_drop(source,target) #调用函数把source拖拽到target actions.perform() #执行拖拽动做
执行JavaScriptcookie
#!/usr/bin/python # -*- coding: UTF-8 -*- from selenium import webdriver browser = webdriver.Chrome() browser.get('http://www.zhihu.com/explore') browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') browser.execute_script('alert("To Bottom")')
获取元素信息——获取属性session
#!/usr/bin/python # -*- coding: UTF-8 -*- from selenium import webdriver from selenium.webdriver import ActionChains browser = webdriver.Chrome() url = 'http://www.zhihu.com/explore' browser.get(url) logo = browser.find_element_by_id('zh-top-link-logo') print(logo) print(logo.get_attribute('class'))
结果:dom
D:\Anaconda3\python.exe C:/Users/lenovo/PycharmProjects/爬虫/s7.py <selenium.webdriver.remote.webelement.WebElement (session="b1bebb30bb020339bf2d7693620c5002", element="0.047356492735414424-1")> zu-top-link-logo Process finished with exit code 0
获取元素信息——获取文本值
#!/usr/bin/python # -*- coding: UTF-8 -*- from selenium import webdriver from selenium.webdriver import ActionChains browser = webdriver.Chrome() url = 'http://www.zhihu.com/explore' browser.get(url) input = browser.find_element_by_class_name('zu-top-add-question') print(input.text)
结果:
D:\Anaconda3\python.exe C:/Users/lenovo/PycharmProjects/爬虫/s8.py 提问 Process finished with exit code 0
获取元素信息——获取ID、位置、标签名、大小
#!/usr/bin/python # -*- coding: UTF-8 -*- from selenium import webdriver browser = webdriver.Chrome() url = 'http://www.zhihu.com/explore' browser.get(url) input = browser.find_element_by_class_name('zu-top-add-question') print(input.id) print(input.location) print(input.tag_name) print(input.size)
结果:
D:\Anaconda3\python.exe C:/Users/lenovo/PycharmProjects/爬虫/s9.py 0.981153209857224-1 {'x': 758, 'y': 7} button {'height': 32, 'width': 66} Process finished with exit code 0
前进后退
#!/usr/bin/python # -*- coding: UTF-8 -*- from selenium import webdriver import time browser = webdriver.Chrome() browser.get('http://www.taobao.com') browser.get('http://www.baidu.com') browser.get('http://www.zhihu.com') browser.back() #后退 time.sleep(1) #睡眠一秒 browser.forward() #前进一布 browser.close()
Cookies
#!/usr/bin/python # -*- coding: UTF-8 -*- from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.zhihu.com/explore') print(browser.get_cookies()) #获取cookies browser.add_cookie({'name':'dwj','domain':'www.zhihu.com','value':'germey'}) #添加cookies print(browser.get_cookies()) browser.delete_all_cookies() #删除全部的cookies print(browser.get_cookies())
结果:
D:\Anaconda3\python.exe C:/Users/lenovo/PycharmProjects/爬虫/s11.py [{'domain': '.zhihu.com', 'httpOnly': False, 'name': 'l_n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': 'www.zhihu.com', 'expiry': 1543737715.05855, 'httpOnly': False, 'name': 'tgw_l7_route', 'path': '/', 'secure': False, 'value': '53d8274aa4a304c1aeff9b999b2aaa0a'}, {'domain': '.zhihu.com', 'expiry': 1543738618, 'httpOnly': False, 'name': '__utmb', 'path': '/', 'secure': False, 'value': '51854390.0.10.1543736818'}, {'domain': '.zhihu.com', 'expiry': 1638344815.058755, 'httpOnly': False, 'name': 'q_c1', 'path': '/', 'secure': False, 'value': 'f25fb2b060c848b6b6ce5ab4b95b6369|1543736810000|1543736810000'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': '7639300af6920ffa63978e25321aa41b'}, {'domain': '.zhihu.com', 'expiry': 1546328815.060298, 'httpOnly': False, 'name': 'r_cap_id', 'path': '/', 'secure': False, 'value': '"OTc4ODJmYmZhZTVhNDdlMDgwMzkxODQ2ZTBkNGJjMTE=|1543736810|303ecf1cc48a8709da272dcebe8884ecb8d48a88"'}, {'domain': '.zhihu.com', 'expiry': 1546328815.060384, 'httpOnly': False, 'name': 'cap_id', 'path': '/', 'secure': False, 'value': '"MDRiYWE4NTA0ZjVlNDA1OGFhZmQxYTNhYjY2YTVkODA=|1543736810|0f3c805aeecd7ba88f3094e7a1b4a11e944c4101"'}, {'domain': '.zhihu.com', 'expiry': 1546328815.060585, 'httpOnly': False, 'name': 'l_cap_id', 'path': '/', 'secure': False, 'value': '"MTRkZGIzYWI3MDQzNDE4NGFiOThkZTExM2NhNDllNGM=|1543736810|e0857722320b2e1f3329cdb5b298fd4d95de6e85"'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': 'n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': '.zhihu.com', 'expiry': 1638344817.948379, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': '"AFDiVUWMmw6PTmwXpV-HqPzoef5yS4TXstc=|1543736813"'}, {'domain': '.zhihu.com', 'expiry': 1621496818.358731, 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': 'BkkYmIaUuoFBlmisga7IPW2j6k5DllLB'}, {'domain': '.zhihu.com', 'expiry': 1606808818, 'httpOnly': False, 'name': '__utma', 'path': '/', 'secure': False, 'value': '51854390.1267940103.1543736818.1543736818.1543736818.1'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '__utmc', 'path': '/', 'secure': False, 'value': '51854390'}, {'domain': '.zhihu.com', 'expiry': 1559504818, 'httpOnly': False, 'name': '__utmz', 'path': '/', 'secure': False, 'value': '51854390.1543736818.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'}, {'domain': '.zhihu.com', 'expiry': 1606808818, 'httpOnly': False, 'name': '__utmv', 'path': '/', 'secure': False, 'value': '51854390.000--|3=entry_date=20181202=1'}, {'domain': '.zhihu.com', 'expiry': 1606808818, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': 'c3079d4f-f991-4026-b3a4-f10aa6d821ef'}] [{'domain': '.zhihu.com', 'httpOnly': False, 'name': 'l_n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': 'www.zhihu.com', 'expiry': 1543737715.05855, 'httpOnly': False, 'name': 'tgw_l7_route', 'path': '/', 'secure': False, 'value': '53d8274aa4a304c1aeff9b999b2aaa0a'}, {'domain': '.zhihu.com', 'expiry': 1543738618, 'httpOnly': False, 'name': '__utmb', 'path': '/', 'secure': False, 'value': '51854390.0.10.1543736818'}, {'domain': '.zhihu.com', 'expiry': 1638344815.058755, 'httpOnly': False, 'name': 'q_c1', 'path': '/', 'secure': False, 'value': 'f25fb2b060c848b6b6ce5ab4b95b6369|1543736810000|1543736810000'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': '7639300af6920ffa63978e25321aa41b'}, {'domain': '.zhihu.com', 'expiry': 1546328815.060298, 'httpOnly': False, 'name': 'r_cap_id', 'path': '/', 'secure': False, 'value': '"OTc4ODJmYmZhZTVhNDdlMDgwMzkxODQ2ZTBkNGJjMTE=|1543736810|303ecf1cc48a8709da272dcebe8884ecb8d48a88"'}, {'domain': '.zhihu.com', 'expiry': 1546328815.060384, 'httpOnly': False, 'name': 'cap_id', 'path': '/', 'secure': False, 'value': '"MDRiYWE4NTA0ZjVlNDA1OGFhZmQxYTNhYjY2YTVkODA=|1543736810|0f3c805aeecd7ba88f3094e7a1b4a11e944c4101"'}, {'domain': '.zhihu.com', 'expiry': 1546328815.060585, 'httpOnly': False, 'name': 'l_cap_id', 'path': '/', 'secure': False, 'value': '"MTRkZGIzYWI3MDQzNDE4NGFiOThkZTExM2NhNDllNGM=|1543736810|e0857722320b2e1f3329cdb5b298fd4d95de6e85"'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': 'n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': '.zhihu.com', 'expiry': 1638344817.948379, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': '"AFDiVUWMmw6PTmwXpV-HqPzoef5yS4TXstc=|1543736813"'}, {'domain': '.zhihu.com', 'expiry': 1621496818.358731, 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': 'BkkYmIaUuoFBlmisga7IPW2j6k5DllLB'}, {'domain': '.zhihu.com', 'expiry': 1606808818, 'httpOnly': False, 'name': '__utma', 'path': '/', 'secure': False, 'value': '51854390.1267940103.1543736818.1543736818.1543736818.1'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '__utmc', 'path': '/', 'secure': False, 'value': '51854390'}, {'domain': '.zhihu.com', 'expiry': 1559504818, 'httpOnly': False, 'name': '__utmz', 'path': '/', 'secure': False, 'value': '51854390.1543736818.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'}, {'domain': '.zhihu.com', 'expiry': 1606808818, 'httpOnly': False, 'name': '__utmv', 'path': '/', 'secure': False, 'value': '51854390.000--|3=entry_date=20181202=1'}, {'domain': '.zhihu.com', 'expiry': 1606808818, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': 'c3079d4f-f991-4026-b3a4-f10aa6d821ef'}, {'domain': 'www.zhihu.com', 'expiry': 2174456818, 'httpOnly': False, 'name': 'dwj', 'path': '/', 'secure': True, 'value': 'germey'}] [] Process finished with exit code 0
选项卡
#!/usr/bin/python # -*- coding: UTF-8 -*- from selenium import webdriver import time browser = webdriver.Chrome() browser.get('https://www.baidu.com') browser.execute_script('window.open()') #让浏览器打开一个新的选项卡 print(browser.window_handles) #打印全部的选项卡 browser.switch_to.window(browser.window_handles[1]) #让浏览器进入到第二个选项卡并打开淘宝 browser.get('https://www.taobao.com') time.sleep(1) browser.switch_to.window(browser.window_handles[0]) #让浏览器进入到第一个选项卡并打开知乎 browser.get('https://www.zhihu.com')