import requests from urllib.parse import urlencode from pyquery import PyQuery as pq from pymongo import MongoClient #链接Mongdb数据库 import json base_url = 'https://m.weibo.cn/api/container/getIndex?' headers = { 'Host': 'm.weibo.cn', 'Referer': 'https://m.weibo.cn/u/2830678474', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36 ', 'X-Requested-With': 'XMLHttpRequest', } client = MongoClient() #链接数据库 db = client['weibo'] #数据库下指定操做哪一个数据库表头 collection = db['weibo'] #指定数据库表头db之下小列表weibo max_page = 10 def get_page(page): #获取网页json文本,返回值为json params = { 'type': 'uid', 'value': '2830678474', 'containerid': '1076032830678474', 'page': page } url = base_url + urlencode(params) try: response = requests.get(url, headers=headers) if response.status_code == 200: return response.json(), page except requests.ConnectionError as e: print('Error', e.args) def parse_page(json, page: int): if json: items = json.get('data').get('cards') for index, item in enumerate(items): if page == 1 and index == 1: continue else: item = item.get('mblog', {}) weibo = {} weibo['id'] = item.get('id') weibo['text'] = pq(item.get('text')).text() weibo['attitudes'] = item.get('attitudes_count') weibo['comments'] = item.get('comments_count') weibo['reposts'] = item.get('reposts_count') yield weibo def save_to_mongo(result): if collection.insert_one(result): #链接数据库且将数据添加 print('Saved to Mongo') if __name__ == '__main__': for page in range(1, max_page + 1): json1 = get_page(page) results = parse_page(*json1) #*表示参数是一个元组,**表示形参是一个字典 for result in results: print(result) with open('456789.json','a',encoding='utf-8') as file: file.write(json.dumps(result,ensure_ascii=False)) file.write('\n') save_to_mongo(result)