requests模拟登陆知乎

时间 2019-11-19

原文原文链接

一、首先分析登陆页面，经分析得知，知乎登陆的POST数据：html

二、XSRF为跨站请求伪造（Cross-site request forgery），经过搜集资料，在大神的博客里找到有相应资料，http://cuiqingcai.com/2076.html ，说的也很清楚，有兴趣能够查看。这个参数目的就是为了防范XSRF攻击而设置的一个hash值，每次访问主页都会生成这样一个惟一的字符串。这里咱们只关注如何去取这个xsrf值。右键分析网页源码发现：cookie

这样一来，咱们只须要requests请求到页面的响应response以后，用正则匹配获得这个xsrf就好了。解决了这个问题咱们就能够去模拟登陆了。session

三、直接贴上源码post

# -*- coding: utf-8 -*-

import requests
try:
import cookielib
except:
import http.cookiejar as cookielib

import re

session = requests.session()
session.cookies = cookielib.LWPCookieJar(filename="cookies.txt")
try:
session.cookies.load(ignore_discard=True)
except:
print ("cookie未能加载")

agent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0"
header = {
"HOST":"www.zhihu.com",
"Referer": "https://www.zhizhu.com",
'User-Agent': agent
}

def is_login():
#经过我的中心页面返回状态码来判断是否为登陆状态
inbox_url = "https://www.zhihu.com/question/56250357/answer/148534773"
response = session.get(inbox_url, headers=header, allow_redirects=False)
if response.status_code != 200:
return False
else:
return True

def get_xsrf():
#获取xsrf code
response = session.get("https://www.zhihu.com", headers=header)
match_obj = re.match('.*name="_xsrf" value="(.*?)"', response.text)
if match_obj:
return (match_obj.group(1))
else:
return ""


def get_index():
response = session.get("https://www.zhihu.com", headers=header)
with open("index_page.html", "wb") as f:
f.write(response.text.encode("utf-8"))
print ("ok")

def zhihu_login(account, password):
#知乎登陆
if re.match("^1\d{10}",account):
print ("手机号码登陆")
post_url = "https://www.zhihu.com/login/phone_num"
post_data = {
"_xsrf": get_xsrf(),
"phone_num": account,
"password": password
}
else:
if "@" in account:
#判断用户名是否为邮箱
print("邮箱方式登陆")
post_url = "https://www.zhihu.com/login/email"
post_data = {
"_xsrf": get_xsrf(),
"email": account,
"password": password
}

response_text = session.post(post_url, data=post_data, headers=header)
session.cookies.save()

zhihu_login("18782902568", "admin123")
# get_index()
is_login()