详见:官网:elasticsearch 下载及安装教程python
pip install elasticsearch
安装,注意版本的对应(详见elasticsearch python手册)analysis-ik
,并将其复制到elasticsearch-7.1.1\plugins
目录下使用Elasticsearch.indices.create()
建立名为的indexing_test
索引git
from elasticsearch import Elasticsearch
es = Elasticsearch()
index = 'indexing_test'
# 自定义创建映射结构文件,很重要
mappings = {
"settings" : {
"index" : {
"number_of_shards" : 5,
"number_of_replicas" : 0
},
"analysis" : {
"analyzer" : {
"ik" : {
"tokenizer" : "ik_max_word"
}
}
},
},
"mappings":{
"properties":{
"sub" : {
"type" : "text",
"index" : "analyzed",
"analyzer" : "ik_max_word",
"include_in_all" : "false"
},
"verb" : {
"type" : "text",
"index" : "analyzed",
"analyzer" : "ik_max_word",
"include_in_all" : "false"
},
"obj" : {
"type" : "text",
"index" : "analyzed",
"analyzer" : "ik_max_word",
"include_in_all" : "false"
}
}
}
}
# 建立名为indexing_test索引
es.indices.create(index=index, ignore=[400, 404], body=mappings)
复制代码
本文用的数据为csv格式。使用helpers.bulk()
批量上传数据github
from elasticsearch import Elasticsearch
from elasticsearch import helpers
import csv
es = Elasticsearch()
# 读取csv文件
csv_reader = csv.reader(open('data/标引.csv', encoding='utf-8'))
action = ({
"_index": index,
"_source": {
"标题":row[0],"摘要":row[1],"关键词":row[2],"标引词":row[3]
}} for row in csv_reader)
# 使用bulk批量导入数据
helpers.bulk(es, action, index = "indexing_new", raise_on_error=True)
复制代码
在特定字段中匹配输入的词并返回检索结果。app
from elasticsearch import Elasticsearch
from elasticsearch import helpers
import sys
def get_results(word):
results = []
es = Elasticsearch()
index = 'indexing_test'
query = {
"size":100,
"query":{
"multi_match":{
"query":word
, "fields": ["标题","摘要","关键词"]
}
},
"highlight" : {
"fields" : {
"标题": {},
"摘要": {},
"关键词": {}
}
}
}
query1 = {
"size":100,
"query":{
"match":{
"标引词":word
}
},
"highlight" : {
"fields" : {
"标引词": {}
}
}
}
res_left = es.search(index="indexing_test",body=query, size=30)
res_right = es.search(index="indexing_test",body=query1, size=30)
return res_left, res_right
if __name__ == "__main__":
main()
复制代码
界面如图所示: elasticsearch
from PyQt5 import QtCore,QtGui,QtWidgets
import sys
from query_scroll_scan import get_results
class MainUi(QtWidgets.QMainWindow):
def __init__(self):
super().__init__()
self.init_ui()
def init_ui(self):
self.resize(960,700)
self.setWindowTitle('检索测试')
self.main_widget = QtWidgets.QWidget() # 建立窗口主部件
self.main_layout = QtWidgets.QGridLayout() # 建立主部件的网格布局
self.main_widget.setLayout(self.main_layout) # 设置窗口主部件布局为网格布局
self.right_bar_widget_search_input = QtWidgets.QLineEdit()
self.right_bar_widget_search_input.setPlaceholderText("输入关键词,点击按钮/回车进行搜索")
self.search_button = QtWidgets.QPushButton("搜索")
# self.search_button.setCheckable(True)
self.search_button.clicked.connect(self.get_words) # 为按钮添加点击事件
self.right_bar_widget_search_input.returnPressed.connect(self.get_words)
self.up_widget = QtWidgets.QWidget() # 建立顶部部件
self.up_widget.setObjectName('up_widget')
self.up_layout = QtWidgets.QGridLayout() # 建立顶部部件的网格布局层
self.up_widget.setLayout(self.up_layout)
self.up_layout.addWidget(self.right_bar_widget_search_input, 0, 0)
self.up_layout.addWidget(self.search_button, 0, 1)
self.left_label = QtWidgets.QLabel("全文检索结果")
self.right_label = QtWidgets.QLabel("主题标引后检索结果")
self.up_layout.addWidget(self.left_label,1,0)
self.up_layout.addWidget(self.right_label,1,1)
self.left_text = QtWidgets.QTextEdit()
self.up_layout.addWidget(self.left_text,3,0)
self.right_text = QtWidgets.QTextEdit()
self.up_layout.addWidget(self.right_text,3,1)
self.main_layout.addWidget(self.up_widget,0,0,1,1)
self.setCentralWidget(self.main_widget) # 设置窗口主部件
def get_words(self):
words = self.right_bar_widget_search_input.text()
res_left, res_right = get_results(words)
sizes_left = res_left['hits']['total']['value']
self.left_text.setText("")
self.left_label = QtWidgets.QLabel("全文检索结果")
self.left_text.append("<font size='3'>共检索到<em> " + str(sizes_left) + "</em> 条结果<br/></font>")
for hit in res_left['hits']['hits']:
self.left_text.append(
"<div>"
"<font color='red' size='3'>标题:" + hit["_source"]["标题"] + "<br/></font>"
"<font size='3'>摘要:" + hit["_source"]["摘要"] + "<br/></font>"
"<font size='3'>关键词:" + hit["_source"]["关键词"] + "<br/></font>"
"<font size='3'>标引词:" + hit["_source"]["标引词"] + "<br/></font>"
"<font color='black' size='3'>相关性:" + str(hit["_score"]) + "<br/></font>"
"</div>"
)
sizes_right = res_right['hits']['total']['value']
self.right_text.setText("")
self.right_text.append("<font size='3'>共检索到<em> " + str(sizes_right) + "</em> 条结果<br/></font>")
for hit in res_right['hits']['hits']:
self.right_text.append(
"<div>"
"<font color='red' size='3'>标题:" + hit["_source"]["标题"] + "<br/></font>"
"<font size='3'>摘要:" + hit["_source"]["摘要"] + "<br/></font>"
"<font size='3'>关键词:" + hit["_source"]["关键词"] + "<br/></font>"
"<font size='3'>标引词:" + hit["_source"]["标引词"] + "<br/></font>"
"<font color='black' size='3'>相关性:" + str(hit["_score"]) + "<br/></font>"
"</div>"
)
def main():
app = QtWidgets.QApplication(sys.argv)
gui = MainUi()
gui.show()
sys.exit(app.exec_())
if __name__ == '__main__':
main()
复制代码