基于elasticsearch 7.1 和python 3.6的简易检索系统实现

环境配置

elasticsearch 7.1的安装

详见:官网:elasticsearch 下载及安装教程python

python 3.6及elasticsearch包的安装

  1. python的安装详见:python 官网下载地址
  2. 确认python安装无误后,使用pip install elasticsearch安装,注意版本的对应(详见elasticsearch python手册
  3. ik分词器的安装:从elasticsearch-analysis-ik Github下载地址中下载对应版本的编译后安装包(本身编译太麻烦),解压后重命名为analysis-ik,并将其复制到elasticsearch-7.1.1\plugins目录下

索引建立

使用Elasticsearch.indices.create()建立名为的indexing_test索引git

from elasticsearch import Elasticsearch

es = Elasticsearch()
index = 'indexing_test'

# 自定义创建映射结构文件,很重要
mappings = {
        "settings" : {
            "index" : {
                "number_of_shards" : 5, 
                "number_of_replicas" : 0 
            },
            "analysis" : {
                "analyzer" : {
                    "ik" : {
                        "tokenizer" : "ik_max_word"
                    }
                }
            },
        },
        "mappings":{
            "properties":{
                "sub" : {
                    "type" : "text",
                    "index" : "analyzed",
                    "analyzer" : "ik_max_word",
                    "include_in_all" : "false"
                },
                "verb" : {
                    "type" : "text",
                    "index" : "analyzed",
                    "analyzer" : "ik_max_word",
                    "include_in_all" : "false"
                },
                "obj" : {
                    "type" : "text",
                    "index" : "analyzed",
                    "analyzer" : "ik_max_word",
                    "include_in_all" : "false"
                }
            }
        }
    }

# 建立名为indexing_test索引
es.indices.create(index=index, ignore=[400, 404], body=mappings)
复制代码

数据上传

本文用的数据为csv格式。使用helpers.bulk()批量上传数据github

from elasticsearch import Elasticsearch
from elasticsearch import helpers
import csv

es = Elasticsearch()

# 读取csv文件
csv_reader = csv.reader(open('data/标引.csv', encoding='utf-8'))

action = ({
    "_index": index,
    "_source": {
        "标题":row[0],"摘要":row[1],"关键词":row[2],"标引词":row[3]
    }} for row in csv_reader)

# 使用bulk批量导入数据
helpers.bulk(es, action, index = "indexing_new", raise_on_error=True)
复制代码

检索实现

在特定字段中匹配输入的词并返回检索结果。app

from elasticsearch import Elasticsearch
from elasticsearch import helpers
import sys
 

def get_results(word):
    results = []
    es = Elasticsearch()
    index = 'indexing_test'
    query = {
            "size":100,
            "query":{
                "multi_match":{
                    "query":word
                    , "fields": ["标题","摘要","关键词"]
                }
                
            },
            "highlight" : {
                "fields" : {
                    "标题": {},
                    "摘要": {},
                    "关键词": {}
                }
            }

        }
    query1 = {
            "size":100,
            "query":{
                "match":{
                    "标引词":word
                }
            },
            "highlight" : {
                "fields" : {
                    "标引词": {}
                }
            }
        }
    res_left = es.search(index="indexing_test",body=query, size=30)
    res_right = es.search(index="indexing_test",body=query1, size=30)
    return res_left, res_right
    
    
if __name__ == "__main__":
    main()
复制代码

检索界面

界面如图所示: elasticsearch

检索界面
代码:

from PyQt5 import QtCore,QtGui,QtWidgets
import sys
from query_scroll_scan import get_results

class MainUi(QtWidgets.QMainWindow):
    def __init__(self):
        super().__init__()
        self.init_ui()

    def init_ui(self):
        self.resize(960,700)
        self.setWindowTitle('检索测试')
        self.main_widget = QtWidgets.QWidget()  # 建立窗口主部件
        self.main_layout = QtWidgets.QGridLayout()  # 建立主部件的网格布局
        self.main_widget.setLayout(self.main_layout)  # 设置窗口主部件布局为网格布局
        
        self.right_bar_widget_search_input = QtWidgets.QLineEdit()
        self.right_bar_widget_search_input.setPlaceholderText("输入关键词,点击按钮/回车进行搜索")
        self.search_button = QtWidgets.QPushButton("搜索")
        # self.search_button.setCheckable(True)
        self.search_button.clicked.connect(self.get_words)  # 为按钮添加点击事件
        self.right_bar_widget_search_input.returnPressed.connect(self.get_words)
        
        self.up_widget = QtWidgets.QWidget()  # 建立顶部部件
        self.up_widget.setObjectName('up_widget')
        self.up_layout = QtWidgets.QGridLayout()  # 建立顶部部件的网格布局层
        self.up_widget.setLayout(self.up_layout)
        
        self.up_layout.addWidget(self.right_bar_widget_search_input, 0, 0)
        self.up_layout.addWidget(self.search_button, 0, 1)

        self.left_label = QtWidgets.QLabel("全文检索结果")
        self.right_label = QtWidgets.QLabel("主题标引后检索结果")
        self.up_layout.addWidget(self.left_label,1,0)
        self.up_layout.addWidget(self.right_label,1,1)
     

        self.left_text = QtWidgets.QTextEdit()
        self.up_layout.addWidget(self.left_text,3,0)
        self.right_text = QtWidgets.QTextEdit()
        self.up_layout.addWidget(self.right_text,3,1)
       

        self.main_layout.addWidget(self.up_widget,0,0,1,1) 
        
        self.setCentralWidget(self.main_widget) # 设置窗口主部件

    def get_words(self):
        words = self.right_bar_widget_search_input.text()
        res_left, res_right = get_results(words)
        sizes_left = res_left['hits']['total']['value']
        self.left_text.setText("")
        self.left_label = QtWidgets.QLabel("全文检索结果")
        self.left_text.append("<font size='3'>共检索到<em> " + str(sizes_left) +  "</em> 条结果<br/></font>")
        for hit in res_left['hits']['hits']:
            self.left_text.append(
            "<div>"
            "<font color='red' size='3'>标题:" + hit["_source"]["标题"] + "<br/></font>"
            "<font size='3'>摘要:" + hit["_source"]["摘要"] + "<br/></font>"
            "<font size='3'>关键词:" + hit["_source"]["关键词"] + "<br/></font>"
            "<font size='3'>标引词:" + hit["_source"]["标引词"] + "<br/></font>"
            "<font color='black' size='3'>相关性:" + str(hit["_score"]) + "<br/></font>"
            "</div>"            
        )
        
        sizes_right = res_right['hits']['total']['value']
        self.right_text.setText("")
        self.right_text.append("<font size='3'>共检索到<em> " + str(sizes_right) +  "</em> 条结果<br/></font>")
        for hit in res_right['hits']['hits']:
            self.right_text.append(
            "<div>"
            "<font color='red' size='3'>标题:" + hit["_source"]["标题"] + "<br/></font>"
            "<font size='3'>摘要:" + hit["_source"]["摘要"] + "<br/></font>"
            "<font size='3'>关键词:" + hit["_source"]["关键词"] + "<br/></font>"
            "<font size='3'>标引词:" + hit["_source"]["标引词"] + "<br/></font>"
            "<font color='black' size='3'>相关性:" + str(hit["_score"]) + "<br/></font>"
            "</div>"            
        )
       
def main():
    app = QtWidgets.QApplication(sys.argv)
    gui = MainUi()
    gui.show()
    sys.exit(app.exec_())

if __name__ == '__main__':
    main()
复制代码
相关文章
相关标签/搜索