经过浏览器检查原代码未发现提交信息,检查元素,在XHR发现所须要的信息。html
找到包含所需信息的网址:https://edu.cnblogs.com/Homework/GetAnswers?homeworkId=2420&_=1543629375998,剩下就是代码的问题了。python
https://edu.cnblogs.com/campus/hbu/Python2018Fall/homework/2420json
https://edu.cnblogs.com/Homework/GetAnswers?homeworkId=2420&_=1543629375998浏览器
不过 ,经过原网址和现网址对比,发现“2420”相同,遂猜测可经过网址最后编号获取“博客园”全部做业的提取,经过代码实践,“https://edu.cnblogs.com/Homework/GetAnswers?homeworkId=2420”即可提取信息。网络
至此便完成这次网络爬虫的全部工做。可是,最近正在学习python的图形界面,遂设计了一个简单的爬取界面。app
输入博客园的做业链接,点击开始爬取,即可以将爬取信息显示在下方输出窗口。学习
更有意思的即是只要改最后四个数字,即可以爬取其余的做业连接,上图即是小小的实验。ui
最后爬了下网络爬虫做业的信息。url
from PyQt5 import QtCore, QtGui, QtWidgets class Ui_Form(object): def setupUi(self, Form): Form.setObjectName("Form") Form.resize(1083, 667) self.label = QtWidgets.QLabel(Form) self.label.setGeometry(QtCore.QRect(110, 50, 91, 41)) font = QtGui.QFont() font.setPointSize(12) self.label.setFont(font) self.label.setObjectName("label") self.lineEdit = QtWidgets.QLineEdit(Form) self.lineEdit.setGeometry(QtCore.QRect(210, 60, 441, 31)) self.lineEdit.setObjectName("lineEdit") self.pushButton = QtWidgets.QPushButton(Form) self.pushButton.setGeometry(QtCore.QRect(650, 60, 91, 31)) font = QtGui.QFont() font.setPointSize(12) self.pushButton.setFont(font) self.pushButton.setObjectName("pushButton") self.textBrowser = QtWidgets.QTextBrowser(Form) self.textBrowser.setGeometry(QtCore.QRect(70, 110, 891, 501)) self.textBrowser.setObjectName("textBrowser") self.retranslateUi(Form) QtCore.QMetaObject.connectSlotsByName(Form) def retranslateUi(self, Form): _translate = QtCore.QCoreApplication.translate Form.setWindowTitle(_translate("Form", "Form")) self.label.setText(_translate("Form", "博客园连接:")) self.pushButton.setText(_translate("Form", "开始爬取")) from PyQt5 import QtWidgets from login import Ui_Form from PyQt5.QtWidgets import QFileDialog import requests import json class mywindow(QtWidgets.QWidget, Ui_Form): def __init__ (self): super(mywindow, self).__init__() self.setupUi(self) self.pushButton.clicked.connect(self.fun) def fun(self): u = self.lineEdit.text() u = u.split('/')[-1] url = "https://edu.cnblogs.com/Homework/GetAnswers?homeworkId={}".format(u) r = requests.get(url) r.encoding = r.apparent_encoding jd = json.loads(r.text)['data'] p = "" for i in jd: p += str(i['StudentNo']) + ',' + str(i['RealName']) + ',' + str(i['DateAdded']).replace('T', ' ').split('.')[0] + ',' + str(i['Title']) + ',' + str(i['Url'] + '\n') self.textBrowser.setText(p) f = open('D:\hwlist.csv', 'w') f.write(p) f.close() if __name__=="__main__": import sys from PyQt5.QtGui import QIcon app=QtWidgets.QApplication(sys.argv) ui = mywindow() ui.show() sys.exit(app.exec_())