selenium登陆csdn,urllib抓取数据:html
import selenium import requests import selenium.webdriver import selenium.webdriver.common.keys import time #须要手动滑动验证码 driver = selenium.webdriver.Chrome() driver.get("https://passport.csdn.net/login?code=public") login=driver.find_element_by_link_text("帐号密码登陆") login.click() time.sleep(5) username=driver.find_element_by_id("all") username.send_keys("用户名") time.sleep(3) password=driver.find_element_by_id("password-number") password.send_keys("密码") time.sleep(5) logins=driver.find_element_by_xpath("//*[@id=\"app\"]/div/div/div[1]/div[2]/div[5]/div/div[6]/div/button") time.sleep(10) #手动滑动验证码 logins.click() print(driver.page_source) time.sleep(15) #等待cookie加载 print("开始会话") req=requests.session() #会话 打开一个网页,直到关闭浏览器以前 都是会话 cookies=driver.get_cookies() #抓取所有的cookie for cookie in cookies: req.cookies.set(cookie['name'],cookie["value"]) req.headers.clear()#清空头 newpage=req.get("http://my.csdn.net/") print("会话完成") print(newpage.text) #页面 time.sleep(10) driver.close()
urllib保存cookie:web
#coding:utf-8 import selenium import selenium.webdriver import time import lxml import lxml.etree import requests import urllib.request ''' driver = selenium.webdriver.Chrome() driver.get("https://passport.csdn.net/account/login?") time.sleep(3) user=driver.find_element_by_id("username") password=driver.find_element_by_id("password") submit=driver.find_element_by_class_name("logging") user.clear() password.clear() time.sleep(1) user.send_keys("yincheng01@163.com") password.send_keys("yinchengak47.net") time.sleep(1) submit.click() time.sleep(10) #等待页面加载, cookies=driver.get_cookies()#抓取所有的cookie print cookies print "------------------------" driver.close() ''' print("开始会话") headers={ # "Host": "my.csdn.net", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "DNT": "1", "Referer": "http://www.csdn.net/", #"Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.8", u"cookie":u"uuid_tt_dd=-1734079490838081701_20171010; bdshare_firstime=1507966544895; UserName=yinghuming; UserInfo=LZTCl6p9mr%2BUgX1cEEgqwIand1mBReKkuogvIYHivh6MdgAq8c4Y4%2Fmx1uhFT%2FmWFuTu%2BCna36D%2BZ7ssW7xuzAjlIwc7Vgjd7Y7zTDJqy%2FakzOPFEGR52GRrp8sf0i9NK7p2hdvM39vRq5Y7NLJObQ%3D%3D; UserNick=%E8%8B%B1%E9%9B%84%E6%97%A0%E6%95%8C2017; AU=821; UD=%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80; UN=yincheng0571; UE=\"yincheng01@163.com\"; BT=1508039179648; access-token=8260e0b7-a35c-419d-b4af-1f02d51c677d; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1507965242,1507969974,1508038063,1508039035; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1508039041; dc_tos=oxuidd; dc_session_id=1508039034960_0.6956040327941211" } request=urllib.request.Request("http://my.csdn.net/",headers=headers) response=urllib.request.urlopen(request) newpagetext=response.read() file=open("csdn.txt","wb") file.write(newpagetext) file.close() print(newpagetext) print("会话完成") time.sleep(10) time.sleep(10) #driver.close()
selenium+urllib 模拟登陆 抓取数据:浏览器
import selenium import selenium.webdriver import selenium.webdriver.common.keys import urllib.request import time #须要手动滑动验证码 driver = selenium.webdriver.Chrome() driver.get("https://passport.csdn.net/login?code=public") login=driver.find_element_by_link_text("帐号密码登陆") login.click() time.sleep(5) username=driver.find_element_by_id("all") username.send_keys("用户名") time.sleep(3) password=driver.find_element_by_id("password-number") password.send_keys("密码") time.sleep(5) logins=driver.find_element_by_xpath("//*[@id=\"app\"]/div/div/div[1]/div[2]/div[5]/div/div[6]/div/button") time.sleep(10) #手动滑动验证码 logins.click() print(driver.page_source) time.sleep(15) #等待cookie加载 cookies=driver.get_cookies() #抓取所有的cookie print(cookies) cookiestr="" for cookie in cookies: #每一条cookie信息 print(cookie['name'],cookie["value"]) #通常用于登陆的信息都在name和value里 cookiestr += (str(cookie["name"]) + "=" + str(cookie["value"]) + ";") print("------------------------") #cookies print("开始会话") headers={ "Host": "my.csdn.net", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "DNT": "1", "Referer": "http://www.csdn.net/", #"Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.8", u"cookie": cookiestr # u为中文转义 } request=urllib.request.Request("http://my.csdn.net/",headers=headers) response=urllib.request.urlopen(request) newpagetext=response.read() file=open("csdn.txt","wb") file.write(newpagetext) file.close() print(newpagetext) print("会话完成") time.sleep(10) driver.close()