Apache Lucene 6.3.0 Demo

时间 2019-11-06

标签 apache lucene 6.3.0 demo 栏目 Apache 繁體版

原文原文链接

准备工做

本着凡事都用新版本的原则，本人的的jdk是1.8版本，完美运行。但为了验证Apache Lucene 6.3.0须要jdk1.8的传说，因而乎换了jdk1.7，发现果真运行不了，收到来自虚拟机的报错。java

依赖包

pom.xml的dependencies中加入以下代码后，自动导入了数据库

lucene-queryparser-6.3.0.jarapache
lucene-core-6.3.0.jarthis
lucene-queries-6.3.0.jar.net

lucene-sandbox-6.3.0.jarcode

<dependency>
  	<groupId>org.apache.lucene</groupId>
  	<artifactId>lucene-queryparser</artifactId>
  	<version>6.3.0</version>
  </dependency>

开始码字

package com.lucene.test;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class FSDirectoryDemo {
    private static String INDEX_DIR = "D://LuceneTest//index";// 索引存放目录  
    
    private Directory directory;  
    private Analyzer analyzer;

    public FSDirectoryDemo() throws Exception {
		super();
		analyzer = new StandardAnalyzer();
		directory = initLuceneDirctory();
	}

	/***
	 * 初始化索引文件目录
	 * 
	 * [@return](https://my.oschina.net/u/556800)
	 * [@throws](https://my.oschina.net/throws) Exception
	 */
	private Directory initLuceneDirctory() throws Exception {
		if (directory == null) {
			File indexDir = new File(INDEX_DIR);
			/*
			 * 文件目录
			 * 把索引文件存储到磁盘目录
			 * 索引文件可放的位置：索引能够存放在两个地方
			 * 1.硬盘，directory = FSDirectory.open(Path path);
			 * 2.内存；directory = new RAMDirectory();
			 * 放在硬盘上能够用FSDirectory()，放在内存的用RAMDirectory()不过一关机就没了
			 */
			directory = FSDirectory.open(indexDir.toPath());
		}
		return directory;
	};

	/**
	 * 该方法用来建立org.apache.lucene.document.Document对象。
	 * 从代码上看，Document对象封装着被检索的文档，里面包含着多个Field对象。
	 * 本段代码用的构造方法为Field(String name, String value, FieldType type)，
	 * 参数分别为Field的name，value和type。
	 * TextField对象里有两个常量，TYPE_NOT_STORED和TYPE_STORED，
	 * 表示该Field检索到以后是否被储存，这一点后面会演示。
	 * [@param](https://my.oschina.net/u/2303379) title
	 * [@param](https://my.oschina.net/u/2303379) content
	 * [@return](https://my.oschina.net/u/556800)
	 */
	public static Document createDocument(String title, String content) {
		Document doc = new Document();
		doc.add(new Field("content", content, TextField.TYPE_STORED));
		doc.add(new Field("title", title, TextField.TYPE_STORED));
		doc.add(new Field("author", "paul", TextField.TYPE_NOT_STORED));
		return doc;
	}

	/**
	 * 添加索引，此处是写死的，实际开发中，能够从数据库中读取
	 * @throws IOException
	 */
	public void addDirectory() throws IOException {
		IndexWriterConfig iwc = new IndexWriterConfig();
		IndexWriter writer = new IndexWriter(directory, iwc);
		
		/*
		 * 这里给IndexWriter writer对象添加了3个Document对象
		 * 后面调用IndexSearcher的方法时，并未用到此处的IndexWriter writer，
		 * 但看上去，咱们把被检索的内容都放到了该对象里，这是为何呢？
		 * 答案就是经过前面和后面各类对象之间的关联实现的
		 */
		writer.addDocument(createDocument("FieldName_1", "FieldValue content one"));
		writer.addDocument(createDocument("FieldName title two test", "FieldValue two"));
		writer.addDocument(createDocument("FieldName title three test", "FieldValue three"));
		writer.addDocument(createDocument("FieldName test title fore test", "FieldValue fore"));
		
		writer.commit();
		writer.close();
	}
	
	public void luceneDemo() throws Exception {
		IndexReader ir = DirectoryReader.open(directory);
		IndexSearcher searcher = new IndexSearcher(ir);
		/*
		 * 此处有个"title"，下行代码也有一个"title"，
		 * 查看QueryParser源代码发现，
		 * QueryParser(String f, Analyzer a)
		 * 对这个构造方法的第一个参数注释
		 * the default field for query terms.
		 */
		QueryParser qp = new QueryParser("title", analyzer);
		
		/*
		 * 查看Query parse(String query)源代码发现，
		 * 参数此处格式为"name:'value'"时，从当前指定name中检索
		 * 若是只是一个普通String类型对象时，则从QueryParser中第一个参数String f指定的name中检索
		 */
		Query query = qp.parse("title:'fieldName test'");
		
		/*
		 * 此处是执行检索的语句。
		 * TopDocs search(Query, int)
		 * 经过前面的准备工做，Query封装了查询所须要的各类前提，
		 * 包括内容和规则
		 * int，经过查源码得知，是返回的最大结果数。
		 * 
		 * TopDocs封装了查询返回的结果，
		 * 其中totalHits是命中的个数
		 * 
		 * 此处有疑问请接着往下看
		 */
		TopDocs topdoc = searcher.search(query, 2);
		System.out.println("命中个数:" + topdoc.totalHits);
		

		/*
		 * scoreDocs封装了返回的文档的检索得分排序，里面三个属性
		 * 
		 * The score of this document for the query
		 * float score 文档相关程度的得分
		 * 
		 * A hit document's number. @see IndexSearcher#doc(int)
		 * int doc 文档的index序号
		 * 
		 * Only set by {@link TopDocs#merge}
		 * int shardIndex	查源码得知，是在TopDocs[]合并的时候用到的index
		 * 此处咱们只有一个TopDocs对象，因此就用不上了
		 * 
		 */
		ScoreDoc[] hits = topdoc.scoreDocs;
		System.out.println("hits: "+Arrays.toString(hits));

		/*
		 * 从输出结果能够看出，
		 * FieldName test title fore test文档关联程度得分score较高，
		 * 因此排在前面
		 * 
		 * ****** 此处能够看出search(Query, int)中的int、totalHits与hits.length的区别 *****
		 * int是最大结果数，search源码里调用IndexSearcher类中TopDocs searchAfter(ScoreDoc, Query, int)能够看到，
		 * final int limit = Math.max(1, reader.maxDoc());
		 * numHits = Math.min(numHits, limit);
		 * 此处numHits为传入的int，
		 * 最终检索结果取的是int和和命中数目小的那个值
		 * 这次检索命中3个，但传入的int为2，
		 * 因此最终显示了两条结果
		 */
		if (hits != null && hits.length > 0) {
			for (int i = 0; i < hits.length; i++) {
				Document hitDoc = searcher.doc(hits[i].doc);
				//因为content的FieldType为TYPE_STORED，因此能够输出
				System.out.println(hitDoc.get("content"));
				//因为author的FieldType为TYPE_NOT_STORED，因此输出为null
				System.out.println(hitDoc.get("author"));
			}
		}
	}

	public static void main(String[] args) {
		try {
			FSDirectoryDemo fsdd = new FSDirectoryDemo();
			/*
			 * 第一次执行时，要建立索引，
			 * 以后再执行时，下面行就能够注释掉了
			 */
//			fsdd.addDirectory();
			fsdd.luceneDemo();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}

输出结果xml

命中个数:3
hits: [doc=3 score=0.7275808 shardIndex=0, doc=1 score=0.6739625 shardIndex=0]
FieldValue fore
null
FieldValue two
null

若是用jdk1.7，报错以下对象

Exception in thread "main" java.lang.UnsupportedClassVersionError: org/apache/lucene/analysis/Analyzer : Unsupported major.minor version 52.0
	at java.lang.ClassLoader.defineClass1(Native Method)
	at java.lang.ClassLoader.defineClass(ClassLoader.java:800)
	at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
	at java.net.URLClassLoader.defineClass(URLClassLoader.java:449)
	at java.net.URLClassLoader.access$100(URLClassLoader.java:71)
	at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
	at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
	at java.security.AccessController.doPrivileged(Native Method)
	at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
	at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
	at java.lang.Class.getDeclaredMethods0(Native Method)
	at java.lang.Class.privateGetDeclaredMethods(Class.java:2531)
	at java.lang.Class.getMethod0(Class.java:2774)
	at java.lang.Class.getMethod(Class.java:1663)
	at sun.launcher.LauncherHelper.getMainMethod(LauncherHelper.java:494)
	at sun.launcher.LauncherHelper.checkAndLoadMain(LauncherHelper.java:486)

参考文档：http://blog.csdn.net/lb521200200/article/details/53549660blog