因为js脚本的影响,咱们请求获得的数据经常与网页显示的数据不同。而chrome插件xpath helper不能调试本地网页,因而有了制造一个xpath解析器的想法。(粗略尝试了一下,没有问题,你们要是发现bug的话记得评论告诉我啊~)
工具:html
有关(一)、(二)部分的教程能够参考:https://www.jb51.net/article/...python
将如下代码添加到def setupUi后面chrome
# 设置按钮控件 self.button_Get_html.clicked.connect(self.Button_Get_Html) self.button_Xpath_Parse.clicked.connect(self.Button_Xpath_Parse)
如下分别是按钮==Get Html==和按钮 ==Xpath Parse== 的代码:segmentfault
def Button_Get_Html(self): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3970.5 Safari/537.36' } url = self.text_Web_Site.toPlainText().strip() if len(url): if url[0] == 'w': url = 'http://' + url session = requests.session() try: res = session.get(url=url, headers=headers, verify=False).content.decode('utf-8','ignore') # 在text_HTML_Code中输出返回内容 self.text_HTML_Code.setPlainText(res) except Exception as e: self.text_HTML_Code.setPlainText(e.__str__()) else: self.text_HTML_Code.setPlainText('网址不能为空!') def Button_Xpath_Parse(self): self.text_Result.document().clear() xpath_syntax=self.text_Xpath_Syntax.toPlainText() html_code=self.text_HTML_Code.toPlainText() html=etree.HTML(html_code) try: results = html.xpath(xpath_syntax) num = 0 for result in results: self.text_Result.append('-'*60+'这里是第 '+str(num)+' 个') # result 有两种格式 try: self.text_Result.append(result.text) except Exception: self.text_Result.append(result) num=num+1 except Exception as e: self.text_Result.setPlainText(e.__str__())
if __name__ == '__main__': # 每一pyqt5应用程序必须建立一个应用程序对象。sys.argv参数是一个列表,从命令行输入参数。 app = QtWidgets.QApplication(sys.argv) # QWidget部件是pyqt5全部用户界面对象的基类。他为QWidget提供默认构造函数。默认构造函数没有父类。 w = QtWidgets.QWidget() ui = Ui_Asyu17_Xpath_Helper() ui.setupUi(w) w.show() # 系统exit()方法确保应用程序干净的退出 # 的exec_()方法有下划线。由于执行是一个Python关键词。所以,exec_()代替 sys.exit(app.exec_())
测试无问题后,可以使用pyinstaller将代码编译成可执行文件~微信
代码:session
from PyQt5 import QtCore, QtGui, QtWidgets import sys import requests from lxml import etree requests.packages.urllib3.disable_warnings() class Ui_Asyu17_Xpath_Helper(object): def setupUi(self, Asyu17_Xpath_Helper): Asyu17_Xpath_Helper.setObjectName("Asyu17_Xpath_Helper") Asyu17_Xpath_Helper.resize(969, 905) self.button_Xpath_Parse = QtWidgets.QPushButton(Asyu17_Xpath_Helper) self.button_Xpath_Parse.setGeometry(QtCore.QRect(830, 860, 75, 31)) self.button_Xpath_Parse.setObjectName("button_Xpath_Parse") self.label = QtWidgets.QLabel(Asyu17_Xpath_Helper) self.label.setGeometry(QtCore.QRect(10, 10, 71, 16)) self.label.setFrameShape(QtWidgets.QFrame.StyledPanel) self.label.setScaledContents(False) self.label.setObjectName("label") self.label_2 = QtWidgets.QLabel(Asyu17_Xpath_Helper) self.label_2.setGeometry(QtCore.QRect(490, 10, 51, 16)) self.label_2.setFrameShape(QtWidgets.QFrame.StyledPanel) self.label_2.setScaledContents(False) self.label_2.setObjectName("label_2") self.label_3 = QtWidgets.QLabel(Asyu17_Xpath_Helper) self.label_3.setGeometry(QtCore.QRect(20, 860, 91, 31)) self.label_3.setObjectName("label_3") self.text_Xpath_Syntax = QtWidgets.QTextBrowser(Asyu17_Xpath_Helper) self.text_Xpath_Syntax.setGeometry(QtCore.QRect(110, 860, 681, 31)) font = QtGui.QFont() font.setFamily("Arial") font.setPointSize(13) self.text_Xpath_Syntax.setFont(font) self.text_Xpath_Syntax.setReadOnly(False) self.text_Xpath_Syntax.setObjectName("text_Xpath_Syntax") self.button_Get_html = QtWidgets.QPushButton(Asyu17_Xpath_Helper) self.button_Get_html.setGeometry(QtCore.QRect(830, 820, 75, 31)) self.button_Get_html.setObjectName("button_Get_html") self.text_Web_Site = QtWidgets.QTextBrowser(Asyu17_Xpath_Helper) self.text_Web_Site.setGeometry(QtCore.QRect(110, 820, 681, 31)) font = QtGui.QFont() font.setFamily("Arial") font.setPointSize(13) self.text_Web_Site.setFont(font) self.text_Web_Site.setReadOnly(False) self.text_Web_Site.setObjectName("text_Web_Site") self.label_4 = QtWidgets.QLabel(Asyu17_Xpath_Helper) self.label_4.setGeometry(QtCore.QRect(20, 820, 91, 31)) self.label_4.setObjectName("label_4") self.layoutWidget = QtWidgets.QWidget(Asyu17_Xpath_Helper) self.layoutWidget.setGeometry(QtCore.QRect(10, 30, 951, 781)) self.layoutWidget.setObjectName("layoutWidget") self.horizontalLayout = QtWidgets.QHBoxLayout(self.layoutWidget) self.horizontalLayout.setContentsMargins(0, 0, 0, 0) self.horizontalLayout.setObjectName("horizontalLayout") self.text_HTML_Code = QtWidgets.QTextBrowser(self.layoutWidget) self.text_HTML_Code.setEnabled(True) font = QtGui.QFont() font.setFamily("Arial") font.setPointSize(12) self.text_HTML_Code.setFont(font) self.text_HTML_Code.setMouseTracking(False) self.text_HTML_Code.setTabletTracking(False) self.text_HTML_Code.setReadOnly(False) self.text_HTML_Code.setObjectName("text_HTML_Code") self.horizontalLayout.addWidget(self.text_HTML_Code) self.text_Result = QtWidgets.QTextBrowser(self.layoutWidget) font = QtGui.QFont() font.setFamily("Arial") font.setPointSize(12) self.text_Result.setFont(font) self.text_Result.setReadOnly(False) self.horizontalLayout.addWidget(self.text_Result) self.retranslateUi(Asyu17_Xpath_Helper) QtCore.QMetaObject.connectSlotsByName(Asyu17_Xpath_Helper) # 设置按钮控件 self.button_Get_html.clicked.connect(self.Button_Get_Html) self.button_Xpath_Parse.clicked.connect(self.Button_Xpath_Parse) def retranslateUi(self, Asyu17_Xpath_Helper): _translate = QtCore.QCoreApplication.translate Asyu17_Xpath_Helper.setWindowTitle(_translate("Asyu17_Xpath_Helper", "Asyu17 Xpath Helper")) self.button_Xpath_Parse.setText(_translate("Asyu17_Xpath_Helper", "Xpath Parse")) self.label.setText(_translate("Asyu17_Xpath_Helper", "HTML Code:")) self.label_2.setText(_translate("Asyu17_Xpath_Helper", "Result:")) self.label_3.setText(_translate("Asyu17_Xpath_Helper", "Xpath Syntax:")) self.button_Get_html.setText(_translate("Asyu17_Xpath_Helper", "Get Html")) self.label_4.setText(_translate("Asyu17_Xpath_Helper", "Web Site:")) def Button_Get_Html(self): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3970.5 Safari/537.36' } url = self.text_Web_Site.toPlainText().strip() if len(url): if url[0] == 'w': url = 'http://' + url session = requests.session() try: res = session.get(url=url, headers=headers, verify=False).content.decode('utf-8','ignore') # 在text_HTML_Code中输出返回内容 self.text_HTML_Code.setPlainText(res) except Exception as e: self.text_HTML_Code.setPlainText(e.__str__()) else: self.text_HTML_Code.setPlainText('网址不能为空!') def Button_Xpath_Parse(self): self.text_Result.document().clear() xpath_syntax=self.text_Xpath_Syntax.toPlainText() html_code=self.text_HTML_Code.toPlainText() html=etree.HTML(html_code) try: results = html.xpath(xpath_syntax) num = 0 for result in results: self.text_Result.append('-'*60+'这里是第 '+str(num)+' 个') # result 有两种格式 try: self.text_Result.append(result.text) except Exception: self.text_Result.append(result) num=num+1 except Exception as e: self.text_Result.setPlainText(e.__str__()) if __name__ == '__main__': # 每一pyqt5应用程序必须建立一个应用程序对象。sys.argv参数是一个列表,从命令行输入参数。 app = QtWidgets.QApplication(sys.argv) # QWidget部件是pyqt5全部用户界面对象的基类。他为QWidget提供默认构造函数。默认构造函数没有父类。 w = QtWidgets.QWidget() ui = Ui_Asyu17_Xpath_Helper() ui.setupUi(w) w.show() # 系统exit()方法确保应用程序干净的退出 # 的exec_()方法有下划线。由于执行是一个Python关键词。所以,exec_()代替 sys.exit(app.exec_())