(一)初级爬取:css
import time from selenium.webdriver.common.keys import Keys from selenium import webdriver driver = webdriver.Chrome(r'D:\Python\Scripts\chromedriver.exe') num=1 try: driver.implicitly_wait(10) #往京东发送请求 driver.get('http://www.jd.com/') input_tag=driver.find_element_by_id('key') input_tag.send_keys('墨菲定律') input_tag.send_keys(Keys.ENTER) time.sleep(3) good_list=driver.find_elements_by_class_name('gl-item') for good in good_list: #print(good) #商品名字 good_name=good.find_element_by_css_selector('.p-name em').text #print(good_name) #商品连接详情 good_url=good.find_element_by_css_selector('.p-name a').get_attribute('href') #print(good_url) #商品价格 good_price=good.find_element_by_class_name('p-price').text #print(good_price) #商品评价 good_commit=good.find_element_by_class_name('p-commit').text good_content=f''' num={num} 商品名称:{good_name} 商品连接:{good_url} 商品价格:{good_price} 商品评价:{good_commit} \n ''' print(good_content) with open('jd.txt','a',encoding='utf-8')as f: f.write(good_content) num+=1 print('商品写入完毕...') next_tag=driver.find_element_by_class_name('pn-next') next_tag.click() finally: driver.close()
(二)中级爬取web
import time from selenium.webdriver.common.keys import Keys from selenium import webdriver driver = webdriver.Chrome(r'D:\Python\Scripts\chromedriver.exe') num=1 try: driver.implicitly_wait(10) #往京东发送请求 driver.get('http://www.jd.com/') input_tag=driver.find_element_by_id('key') input_tag.send_keys('墨菲定律') input_tag.send_keys(Keys.ENTER) time.sleep(5) #下拉滑动5000px js_code=''' window.scrollTo(0,5000) ''' driver.execute_script(js_code) time.sleep(3) good_list=driver.find_elements_by_class_name('gl-item') for good in good_list: #print(good) #商品名字 good_name=good.find_element_by_css_selector('.p-name em').text #print(good_name) #商品连接详情 good_url=good.find_element_by_css_selector('.p-name a').get_attribute('href') #print(good_url) #商品价格 good_price=good.find_element_by_class_name('p-price').text #print(good_price) #商品评价 good_commit=good.find_element_by_class_name('p-commit').text good_content=f''' num={num} 商品名称:{good_name} 商品连接:{good_url} 商品价格:{good_price} 商品评价:{good_commit} \n ''' print(good_content) with open('jd.txt','a',encoding='utf-8')as f: f.write(good_content) num+=1 print('商品写入完毕...') next_tag=driver.find_element_by_class_name('pn-next') next_tag.click() finally: driver.close()
(三)高级爬取chrome
import time from selenium.webdriver.common.keys import Keys from selenium import webdriver driver = webdriver.Chrome(r'D:\Python\Scripts\chromedriver.exe') def get_good(driver): num=1 try: time.sleep(5) # 下拉滑动5000px js_code = ''' window.scrollTo(0,5000) ''' driver.execute_script(js_code) time.sleep(5) good_list = driver.find_elements_by_class_name('gl-item') for good in good_list: # print(good) # 商品名字 good_name = good.find_element_by_css_selector('.p-name em').text # print(good_name) # 商品连接详情 good_url = good.find_element_by_css_selector('.p-name a').get_attribute('href') # print(good_url) # 商品价格 good_price = good.find_element_by_class_name('p-price').text # print(good_price) # 商品评价 good_commit = good.find_element_by_class_name('p-commit').text good_content = f''' num={num} 商品名称:{good_name} 商品连接:{good_url} 商品价格:{good_price} 商品评价:{good_commit} \n ''' print(good_content) with open('jd.txt', 'a', encoding='utf-8')as f: f.write(good_content) num+=1 print('商品写入完毕...') next_tag = driver.find_element_by_class_name('pn-next') next_tag.click() time.sleep(5) #递归调用函数自己 get_good(driver) finally: driver.close() if __name__=='__main__': driver = webdriver.Chrome(r'D:\Python\Scripts\chromedriver.exe') try: driver.implicitly_wait(10) # 往京东发送请求 driver.get('http://www.jd.com/') input_tag = driver.find_element_by_id('key') input_tag.send_keys('墨菲定律') input_tag.send_keys(Keys.ENTER) #调用获取商品信息函数 get_good(driver) finally: driver.close()