一、首先分析登陆页面,经分析得知,知乎登陆的POST数据:html
二、XSRF为跨站请求伪造(Cross-site request forgery),经过搜集资料,在大神的博客里找到有相应资料,http://cuiqingcai.com/2076.html ,说的也很清楚,有兴趣能够查看。这个参数目的就是为了防范XSRF攻击而设置的一个hash值,每次访问主页都会生成这样一个惟一的字符串。这里咱们只关注如何去取这个xsrf值。右键分析网页源码发现:cookie
这样一来,咱们只须要requests请求到页面的响应response以后,用正则匹配获得这个xsrf就好了。解决了这个问题咱们就能够去模拟登陆了。session
三、直接贴上源码post
# -*- coding: utf-8 -*- import requests try: import cookielib except: import http.cookiejar as cookielib import re session = requests.session() session.cookies = cookielib.LWPCookieJar(filename="cookies.txt") try: session.cookies.load(ignore_discard=True) except: print ("cookie未能加载") agent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0" header = { "HOST":"www.zhihu.com", "Referer": "https://www.zhizhu.com", 'User-Agent': agent } def is_login(): #经过我的中心页面返回状态码来判断是否为登陆状态 inbox_url = "https://www.zhihu.com/question/56250357/answer/148534773" response = session.get(inbox_url, headers=header, allow_redirects=False) if response.status_code != 200: return False else: return True def get_xsrf(): #获取xsrf code response = session.get("https://www.zhihu.com", headers=header) match_obj = re.match('.*name="_xsrf" value="(.*?)"', response.text) if match_obj: return (match_obj.group(1)) else: return "" def get_index(): response = session.get("https://www.zhihu.com", headers=header) with open("index_page.html", "wb") as f: f.write(response.text.encode("utf-8")) print ("ok") def zhihu_login(account, password): #知乎登陆 if re.match("^1\d{10}",account): print ("手机号码登陆") post_url = "https://www.zhihu.com/login/phone_num" post_data = { "_xsrf": get_xsrf(), "phone_num": account, "password": password } else: if "@" in account: #判断用户名是否为邮箱 print("邮箱方式登陆") post_url = "https://www.zhihu.com/login/email" post_data = { "_xsrf": get_xsrf(), "email": account, "password": password } response_text = session.post(post_url, data=post_data, headers=header) session.cookies.save() zhihu_login("18782902568", "admin123") # get_index() is_login()