前言: java
使用lucene主要的几步骤: 数据库
准备(建立索引): apache
使用(查询): spa
使用lucene要把大部分的精力放在建立索引这块 code
code: orm
建立索引: 对象
import java.io.File; import java.io.FileReader; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public static void init() throws IOException { /** * step 1 * 建立Directory索引文件存放目录 */ Path path = Paths.get("F:/lucene/index"); Directory directory = FSDirectory.open(path); /** * step 2 * 选用构建分词解析器 */ CharArraySet stopword = new CharArraySet(200, true);//除去没必要构建索引的词 stopword.add("是,的,我,们,你,他,那,这,它".split(",")); Analyzer analyzer = new SmartChineseAnalyzer(stopword); /** * step3 * 构建写索引器 */ IndexWriterConfig iwc = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(directory, iwc); writer.deleteAll();//清空索引 /** * step4 & stpe5 * 将数据库的表记录,文件系统的文本,或其它的数据封装成一个Document, * 并经过写索引器解析Document生成索引文件 */ File doc = new File("F:/lucene/doc");//源文件 for (File file : doc.listFiles()) { System.out.println(String.format("read file : %s", file.getName())); Document document = new Document(); document.add(new TextField("title", file.getName(), Field.Store.YES)); //文章的标题 document.add(new TextField("contents", new FileReader(file))); //文章的内容 document.add(new StringField("path", file.getAbsolutePath(), Field.Store.YES)); //文章的地址 writer.addDocument(document); } writer.close(); }
查询: 索引
import java.io.IOException; import java.nio.file.Paths; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public static ScoreDoc[] query(String key) throws IOException, ParseException{ /** * stpe1 * 构建读索引器(指定索引目录) */ Directory directory = FSDirectory.open(Paths.get("F:/lucene/index"));//索引文件存放目录 IndexReader indexReader = DirectoryReader.open(directory); /** * stpe2 * 查询器 */ IndexSearcher searcher = new IndexSearcher(indexReader); /** * stpe3 * 选用构建分词解析器,要和建立索引时的分析器一致 */ Analyzer analyzer = new SmartChineseAnalyzer(); /** * step4 * 建立Query * key: 查询关键字 * contents:只在文章内容中配置查询(这是你建立索引时指定的域名). * 全部你也能够指定“title”即按标题查询,固然lunene提交多域查询 */ QueryParser queryParser = new QueryParser("contents", analyzer); Query query = queryParser.parse(key); /** * step 5 * 查询获得查询结果 */ TopDocs topDocs = searcher.search(query, 10); indexReader.close(); return topDocs.scoreDocs; }
根据业务来处理获得的结果 get
ScoreDoc[] hits = query("lucene文章"); for (ScoreDoc scoreDoc : hits) { int docId = scoreDoc.doc; Document d = indexSearcher.doc(docId); System.out.println(String.format("文章名字:%s /t 路径:%s", d.get("title"),d.get("path"))); }