1 import requests 2 import time 3 import re 4 import base64 5 import hmac 6 import hashlib 7 import json 8 import matplotlib.pyplot as plt 9 from http import cookiejar 10 from PIL import Image 11 12 HEADERS = { 13 'Connection': 'keep-alive', 14 'Host': 'www.zhihu.com', 15 'Referer': 'https://www.zhihu.com/', 16 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 ' 17 '(KHTML, like Gecko) Chrome/56.0.2924.87 Mobile Safari/537.36' 18 } 19 LOGIN_URL = 'https://www.zhihu.com/signup' 20 LOGIN_API = 'https://www.zhihu.com/api/v3/oauth/sign_in' 21 FORM_DATA = { 22 'client_id': 'c3cef7c66a1843f8b3a9e6a1e3160e20', 23 'grant_type': 'password', 24 'source': 'com.zhihu.web', 25 'username': '', 26 'password': '', 27 # 改成'cn'是倒立汉字验证码 28 'lang': 'en', 29 'ref_source': 'homepage' 30 } 31 32 33 class ZhihuAccount(object): 34 35 def __init__(self): 36 self.login_url = LOGIN_URL 37 self.login_api = LOGIN_API 38 self.login_data = FORM_DATA.copy() 39 self.session = requests.session() 40 self.session.headers = HEADERS.copy() 41 self.session.cookies = cookiejar.LWPCookieJar(filename='./cookies.txt') 42 43 def login(self, username=None, password=None, load_cookies=True): 44 """ 45 模拟登陆知乎 46 :param username: 登陆手机号 47 :param password: 登陆密码 48 :param load_cookies: 是否读取上次保存的 Cookies 49 :return: bool 50 """ 51 if load_cookies and self.load_cookies(): 52 if self.check_login(): 53 return True 54 55 headers = self.session.headers.copy() 56 headers.update({ 57 'authorization': 'oauth c3cef7c66a1843f8b3a9e6a1e3160e20', 58 'X-Xsrftoken': self._get_token() 59 }) 60 username, password = self._check_user_pass(username, password) 61 self.login_data.update({ 62 'username': username, 63 'password': password 64 }) 65 timestamp = str(int(time.time()*1000)) 66 self.login_data.update({ 67 'captcha': self._get_captcha(self.login_data['lang'], headers), 68 'timestamp': timestamp, 69 'signature': self._get_signature(timestamp) 70 }) 71 72 resp = self.session.post(self.login_api, data=self.login_data, headers=headers) 73 if 'error' in resp.text: 74 print(json.loads(resp.text)['error']['message']) 75 elif self.check_login(): 76 return True 77 print('登陆失败') 78 return False 79 80 def load_cookies(self): 81 """ 82 读取 Cookies 文件加载到 Session 83 :return: bool 84 """ 85 try: 86 self.session.cookies.load(ignore_discard=True) 87 return True 88 except FileNotFoundError: 89 return False 90 91 def check_login(self): 92 """ 93 检查登陆状态,访问登陆页面出现跳转则是已登陆, 94 如登陆成功保存当前 Cookies 95 :return: bool 96 """ 97 resp = self.session.get(self.login_url, allow_redirects=False) 98 if resp.status_code == 302: 99 self.session.cookies.save() 100 print('登陆成功') 101 return True 102 return False 103 104 def _get_token(self): 105 """ 106 从登陆页面获取 token 107 :return: 108 """ 109 110 resp = requests.get("https://www.zhihu.com") 111 cookies = resp.cookies 112 token = cookies.items()[0][1] 113 return token 114 115 def _get_captcha(self, lang, headers): 116 """ 117 请求验证码的 API 接口,不管是否须要验证码都须要请求一次 118 若是须要验证码会返回图片的 base64 编码 119 根据 lang 参数匹配验证码,须要人工输入 120 :param lang: 返回验证码的语言(en/cn) 121 :param headers: 带受权信息的请求头部 122 :return: 验证码的 POST 参数 123 """ 124 if lang == 'cn': 125 api = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=cn' 126 else: 127 api = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en' 128 resp = self.session.get(api, headers=headers) 129 show_captcha = re.search(r'true', resp.text) 130 131 if show_captcha: 132 put_resp = self.session.put(api, headers=headers) 133 json_data = json.loads(put_resp.text) 134 img_base64 = json_data['img_base64'].replace(r'\n', '') 135 with open('./captcha.jpg', 'wb') as f: 136 f.write(base64.b64decode(img_base64)) 137 img = Image.open('./captcha.jpg') 138 if lang == 'cn': 139 plt.imshow(img) 140 print('点击全部倒立的汉字,按回车提交') 141 points = plt.ginput(7) 142 capt = json.dumps({'img_size': [200, 44], 143 'input_points': [[i[0]/2, i[1]/2] for i in points]}) 144 else: 145 img.show() 146 capt = input('请输入图片里的验证码:') 147 # 这里必须先把参数 POST 验证码接口 148 self.session.post(api, data={'input_text': capt}, headers=headers) 149 return capt 150 return '' 151 152 def _get_signature(self, timestamp): 153 """ 154 经过 Hmac 算法计算返回签名 155 实际是几个固定字符串加时间戳 156 :param timestamp: 时间戳 157 :return: 签名 158 """ 159 ha = hmac.new(b'd1b964811afb40118a12068ff74a12f4', digestmod=hashlib.sha1) 160 grant_type = self.login_data['grant_type'] 161 client_id = self.login_data['client_id'] 162 source = self.login_data['source'] 163 ha.update(bytes((grant_type + client_id + source + timestamp), 'utf-8')) 164 return ha.hexdigest() 165 166 def _check_user_pass(self, username, password): 167 """ 168 检查用户名和密码是否已输入,若无则手动输入 169 """ 170 if username is None: 171 username = self.login_data.get('username') 172 if not username: 173 username = input('请输入手机号:') 174 if len(username) == 11 and username.isdigit() and '+86' not in username: 175 username = '+86' + username 176 177 if password is None: 178 password = self.login_data.get('password') 179 if not password: 180 password = input('请输入密码:') 181 return username, password 182 183 184 if __name__ == '__main__': 185 account = ZhihuAccount() 186 account.login(username=None, password=None, load_cookies=True)
GitHub:https://github.com/liyunchen/Zhihu-Login/blob/master/zhihu_login.pygit