solr第四篇（solr整合数据源）

时间 2019-11-11

标签 solr 第四整合数据繁體版

原文原文链接

#编辑/usr/local/tomcat/solr_home/new_core/conf下的solrconfig.xml
cd /usr/local/tomcat/solr_home/new_core/conf
vim solrconfig.xml
#首先配置jar位置
<lib dir="/usr/local/tomcat/tomcat8/webapps/solr/WEB-INF/lib" regex=".*\.jar" />
#在name="/select"  class="solr.SearchHandler"之上添加以下代码
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
        <lst name="defaults">
            <str name="config">data-config.xml</str>
        </lst>
</requestHandler>

#完成如上操做以后 在和solrconfig.xml的同目录项目添加文件data-config.xml
#在当前目录下建立文件
mkdir data-config.xml
#编辑data-config.xml
vim data-config.xml
#添加以下
<?xml version="1.0" encoding="UTF-8"?>  
<dataConfig>  
	#更改成本身的数据源地址
	<dataSource name="source1" type="JdbcDataSource" driver="com.mysql.jdbc.Driver" url="jdbc:mysql://localhost:3306/solrdata" user="root" password="root" batchSize="-1" />  
　　	<document>  
	#entity对应表
	<entity name="goods" pk="id"  dataSource="source1"   
		query="select * from  goods"  
		#配置增量更新须要使用
 		deltaImportQuery="select * from goods where id='${dih.delta.id}'"  
		#配置增量更新须要使用
		deltaQuery="select id from goods where updatetime> '${dataimporter.last_index_time}'">  
		#field对应列字段
		<field column="id" name="id"/> 
		<field column="name" name="name"/>  
		<field column="number" name="number"/>  
		<field column="updatetime" name="updatetime"/>  
　　　  	</entity> 
　　	</document>  
</dataConfig>
#其中deltaImportQuery，deltaQuery设置的内容是自动更新mysql数据到solr引擎中来所须要的。若是多个表再并列添加一个entity标签

#编辑managed-schema
vim managed-schema
#添加刚才数据源配置里面的field字段
<field name="id" type="int" indexed="true" stored="true" required="true"  multiValued="false"/>
<field name="username" type="string" indexed="true" stored="true" />
#type="text_ik"表示使用自定义分词 能够中文分词
<field name="nickname" type="text_ik" indexed="true" stored="true" />
<field name="password" type="string" indexed="true" stored="true" />
<field name="regTime" type="date" indexed="true" stored="true" />
#个人配置以下图

#配置自动更新数据源
#在solr_home文件夹里面建立conf文件，在conf里面建立dataimport.properties文件
cd /usr/local/tomcat/solr_home
mkdir conf
cd conf
mkdir dataimport.properties
#编辑dataimport.properties 添加以下内容
#内容开始---------------------------
#################################################

#                                               #

#       dataimport scheduler properties         #

#                                               #

#################################################

#  to sync or not to sync
#  1 - active; anything else - inactive
# 这里的配置不用修改
syncEnabled=1

#  which cores to schedule
#  in a multi-core environment you can decide which cores you want syncronized
#  leave empty or comment it out if using single-core deployment

#  修改为你所使用的core，若是自定义了多个core，用逗号隔开
syncCores=new_core
#syncCores=new_core，new_core2，new_core3，....

#  solr server name or IP address
#  [defaults to localhost if empty]

# 这个通常都是localhost不会变
server=localhost

#  solr server port

#  [defaults to 80 if empty]

#  安装solr的tomcat端口，若是你使用的是默认的端口，就不用改了，不然改为本身的端口就行了
port=8080


#  application name/context

#  [defaults to current ServletContextListener's context (app) name]

#  这里默认不改

webapp=solr

#  URL params [mandatory]

#  remainder of URL

#  这里改为下面的形式，solr同步数据时请求的连接

params=/dataimport?command=delta-import&clean=false&commit=true

#  schedule interval

#  number of minutes between two runs

#  [defaults to 30 if empty]

#这里是设置定时任务的，单位是分钟，也就是多长时间你检测一次数据同步，根据项目需求修改

#  开始测试的时候为了方便看到效果，时间能够设置短一点

interval=1


#  重作索引的时间间隔，单位分钟，默认7200，即5天;

#  为空,为0,或者注释掉:表示永不重作索引

reBuildIndexInterval=7200


#  重作索引的参数

reBuildIndexParams=/select?qt=/dataimport&command=full-import&clean=true&commit=true


#  重作索引时间间隔的计时开始时间，第一次真正执行的时间=reBuildIndexBeginTime+reBuildIndexInterval*60*1000；

#  两种格式：2012-04-11 03:10:00 或者  03:10:00，后一种会自动补全日期部分为服务启动时的日期

reBuildIndexBeginTime=03:10:00

#内容结束------------------------------

在/usr/local/tomcat/tomcat8/webapps/solr/WEB-INF/lib下添加自动增量更新Jar包
连接：https://pan.baidu.com/s/1KPf9qRPn3BePVLzdTFEvyQ
提取码：wyn4
这个jar只适合solr5.5.x版本
个人数据源配置的库和表 python

#执行这句话，用户自动更新判别条件  意思就是结合上面data-config.xml
deltaQuery="select id from goods where regTime> '${dataimporter.last_index_time}'"来作更新
alter table user modify  regTime(因为我这个表没有设置updatetime使用regTime代替) TIMESTAMP  NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP

好了测试一下
第一次先手动导入下数据
mysql

q: 查询字符串（必须的）。*:*表示查询全部；keyword:东看 表示按关键字“东看”查询
fq: filter query 过滤查询。使用Filter Query能够充分利用Filter Query Cache，提升检索性能。做用：在q查询符合结果中同时是fq查询符合的(相似求交集)，例如：q=mm&fq=date_time:[20081001 TO 20091031]，找关键字mm，而且date_time是20081001到20091031之间的。
sort: 排序。格式以下：字段名 排序方式；如advertiserId desc 表示按id字段降序排列查询结果。
start,rows:表示查回结果从第几条数据开始显示，共显示多少条。
fl: field list。指定查询结果返回哪些字段。多个时以空格“ ”或逗号“,”分隔。不指定时，默认全返回。
df: default field默认的查询字段，通常默认指定。
Raw Query Parameters:
wt: write type。指定查询输出结果格式，咱们经常使用的有json格式与xml格式。在solrconfig.xml中定义了查询输出格式：xml、json、python、ruby、php、phps、custom。
indent: 返回的结果是否缩进，默认关闭，用 indent=true | on 开启，通常调试json,php,phps,ruby输出才有必要用这个参数。
debugQuery: 设置返回结果是否显示Debug信息。
dismax:
edismax:
hl: high light 高亮。hl=true表示启用高亮
hl.fl ： 用空格或逗号隔开的字段列表（指定高亮的字段）。要启用某个字段的highlight功能，就得保证该字段在schema中是stored。若是该参数未被给出，那么就会高 亮默认字段 standard handler会用df参数，dismax字段用qf参数。你可使用星号去方便的高亮全部字段。若是你使用了通配符，那么要考虑启用 hl.requiredFieldMatch选项。	
hl.simple.pre：
hl.requireFieldMatch: 若是置为true，除非该字段的查询结果不为空才会被高亮。它的默认值是false，意味 着它可能匹配某个字段却高亮一个不一样的字段。若是hl.fl使用了通配符，那么就要启用该参数。尽管如此，若是你的查询是all字段（多是使用 copy-field 指令），那么仍是把它设为false，这样搜索结果能代表哪一个字段的查询文本未被找到
hl.usePhraseHighlighter：若是一个查询中含有短语（引号框起来的）那么会保证必定要彻底匹配短语的才会被高亮。
hl.highlightMultiTerm：若是使用通配符和模糊搜索，那么会确保与通配符匹配的term会高亮。默认为false，同时hl.usePhraseHighlighter要为true。
facet:分组统计，在搜索关键字的同时,可以按照Facet的字段进行分组并统计。
facet.query：Facet Query利用相似于filter query的语法提供了更为灵活的Facet.经过facet.query参数，能够对任意字段进行筛选。
facet.field：须要分组统计的字段，能够多个。
facet.prefix： 表示Facet字段值的前缀。好比facet.field=cpu&facet.prefix=Intel，那么对cpu字段进行Facet查询，返回的cpu都是以Intel开头的， AMD开头的cpu型号将不会被统计在内。
spatial:
spellcheck: 拼写检查。

进行查询
当前查询了全部而且进行了0-10分页 linux

进行text_ik分词并进行检索 以前在managed-schema中配置了<field name="nickname" type="text_ik" indexed="true" stored="true" /> 
下面咱们使用这个字段进行检索
能够看到数据只有5条

为了证实进行了分词而不是模糊查询
我进行以下搜索
能够看到以下 仍是5条数据 而且有只为凉城的结果也展现出来了

下面测试增量更新  以前咱们设置了自动更新时间为1分钟作测试
当前user表数据只有22条
如今新增一条

能够看到数据已经更新为23条了
等待一分钟后再去查询全部发现已经更新了 web

如今测试多表多字段联合检索查询
为了方便我在虚拟机再添加一个表 只有三个字段的

已经建立成功了

添加了4条数据
sql

去更新data-conig.xml 和 managed-schema配置
cd /usr/local/tomcat/solr_home/new_core/conf
vim data-conig.xml
#添加entity以下
vim managed-schema
#添加字段以下图

product表也配置增量更新

重启tomcat solr再次导入数据两个表总共27条数据所有查询出来了 apache

进行多字段查询 结果以下

完成
因为solr整合到项目中有不少方式这里就不在写了能够百度博客json