1.爬取电影页php
2.解析提取电影信息css
3.保存数据jquery
驱动浏览器往目标网站发送请求,获取响应数据web
-不须要分析复杂的通讯流程chrome
-执行js代码api
-获取动态数据浏览器
driver = webdriver.Chrome()网站
driver.get('网站') 往某个网站发送请求ui
driver.close()url
element:查找一个
elements:查找多个
by_id
by_class_name
by_name
by_link_text
by_partial_link_text
by_css_selector
click
clear
示例:
from selenium import webdriver from selenium.webdriver.common.by import By #按照什么方式查找,By.Id,By.CSS_SELECTOR from selenium.webdriver.common.keys import Keys #键盘按键操做 from selenium.webdriver.support import expected_conditions as EC #和下面WebDriverWait一块儿用的 from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素 import time driver = webdriver.Chrome(r'C:\Program Files (x86)\chromedriver.exe') try: driver.implicitly_wait(10) driver.get("https://www.jd.com/") time.sleep(5) #点击、清除 input = driver.find_element_by_id('key') input.send_keys('围城') search = driver.find_element_by_class_name('button') search.click() time.sleep(3) input2 = driver.find_element_by_id('key') input2.clear() time.sleep(1) input2.send_keys('墨菲定律') input2.send_keys(Keys.ENTER) time.sleep(10) finally: driver.close()
是一个动做链对象,
from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By #按照什么方式查找,By.Id,By.CSS_SELECTOR from selenium.webdriver.common.keys import Keys #键盘按键操做 from selenium.webdriver.support import expected_conditions as EC #和下面WebDriverWait一块儿用的 from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素 import time driver = webdriver.Chrome(r'C:\Program Files (x86)\chromedriver.exe') try: driver.implicitly_wait(10) driver.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable') time.sleep(5) driver.switch_to.frame('iframeResult') time.sleep(1) #获取动做链对象 action = ActionChains(driver) source = driver.find_element_by_id('draggable') target = driver.find_element_by_id('droppable') #方式一 #秒移 # action.drag_and_drop(source, target).perform() #拟定好一个动做。须要调用执行方法.perform # 方式二 # 一点点移动 #找到滑动距离 print(source.tag_name) print(source.text) print(source.size) print(target.location) print(source.location) x=target.location['x']-source.location['x'] #按住div,不一样动做不能公用一个ActionChains ActionChains(driver).click_and_hold(source).perform() s = 0 while s < x: #每一次都要调用一次ActionChains ActionChains(driver).move_by_offset(xoffset=2, yoffset=0).perform() s += 2 time.sleep(0.1) #放下div ActionChains(driver).release(source).perform() time.sleep(10) finally: driver.close()
driver.switch_to.frame(frame 的id名)
driver.get("https://www.baidu.com/") driver.execute_script( ''' alert("你好") ''' ) time.sleep(5)
#模拟浏览器的前进后退 import time from selenium import webdriver browser=webdriver.Chrome() browser.get('https://www.baidu.com') browser.get('https://www.taobao.com') browser.get('http://www.sina.com.cn/') #后退 browser.back() time.sleep(10) #前进 browser.forward() browser.close()
from selenium import webdriver from selenium.webdriver.common.keys import Keys #键盘按键操做 driver = webdriver.Chrome(r'C:\Program Files (x86)\chromedriver.exe') try: driver.implicitly_wait(10) driver.get('https://www.jd.com/') # 往京东主页输入墨菲定律 input_tag = driver.find_element_by_id('key') input_tag.send_keys('墨菲定律') input_tag.send_keys(Keys.ENTER) time.sleep(2) goods = driver.find_elements_by_class_name('gl-item') for good in goods: # print(good) #商品名称 name = good.find_element_by_css_selector('.p-name em').text # print(name) #商品价格 price = good.find_element_by_class_name('p-price').text #商品连接 url = good.find_element_by_css_selector('.p-name a').get_attribute('href') #商品评价 commit = good.find_element_by_class_name('p-commit').text good_content = f''' 商品名称:{name} 商品价格:{price} 商品连接:{url} 商品评价:{commit} \n ''' print(good_content) with open('jd.txt','a',encoding='utf-8') as f: f.write(good_content) print("写入成功") finally: driver.close()
(加入了自动下拉加载商品与点击下一页):
from selenium import webdriver from selenium.webdriver.common.keys import Keys #键盘按键操做 def get_goods(driver): num = 1 js_code = ''' window.scrollTo(0,5000) ''' driver.execute_script(js_code) try: goods = driver.find_elements_by_class_name('gl-item') for good in goods: # print(good) # 商品名称 name = good.find_element_by_css_selector('.p-name em').text # print(name) # 商品价格 price = good.find_element_by_class_name('p-price').text # 商品连接 url = good.find_element_by_css_selector('.p-name a').get_attribute('href') # 商品评价 commit = good.find_element_by_class_name('p-commit').text good_content = f''' num:{num} 商品名称:{name} 商品价格:{price} 商品连接:{url} 商品评价:{commit} ''' print(good_content) with open('jd.txt', 'a', encoding='utf-8') as f: f.write(good_content) num += 1 print("写入成功") #找到下一页 next_tag = driver.find_element_by_class_name('pn-next') next_tag.click() time.sleep(5) get_goods(driver) finally: driver.close() if __name__ == '__main__': driver = webdriver.Chrome(r'C:\Program Files (x86)\chromedriver.exe') try: driver.implicitly_wait(10) driver.get('https://www.jd.com/') # 往京东主页输入墨菲定律 input_tag = driver.find_element_by_id('key') input_tag.send_keys('墨菲定律') input_tag.send_keys(Keys.ENTER) get_goods(driver) finally: driver.close()