1.1 准备lucene的开发环境
java
搭建lucene的开发环境,要准备lucene的jar包,要加入的jar包至少有:
1) lucene-core-3.1.0.jar (核心包)
2) lucene-analyzers-3.1.0.jar (分词器)
3) lucene-highlighter-3.1.0.jar (高亮器)
4) lucene-memory-3.1.0.jar (高亮器)
apache
1.this
1 package com.home.utils; 2 3 public class Article { 4 private Long id; 5 private String title; 6 private String content; 7 8 public Long getId() { 9 return id; 10 } 11 12 public void setId(Long id) { 13 this.id = id; 14 } 15 16 public String getTitle() { 17 return title; 18 } 19 20 public void setTitle(String title) { 21 this.title = title; 22 } 23 24 public String getContent() { 25 return content; 26 } 27 28 public void setContent(String content) { 29 this.content = content; 30 31 } 32 }
2.搜索引擎
1 package com.home.utils; 2 3 import java.io.File; 4 import java.util.ArrayList; 5 import java.util.List; 6 7 import org.apache.lucene.analysis.Analyzer; 8 import org.apache.lucene.analysis.standard.StandardAnalyzer; 9 import org.apache.lucene.document.Document; 10 import org.apache.lucene.document.Field; 11 import org.apache.lucene.document.Field.Index; 12 import org.apache.lucene.document.Field.Store; 13 import org.apache.lucene.index.IndexWriter; 14 import org.apache.lucene.index.IndexWriter.MaxFieldLength; 15 import org.apache.lucene.queryParser.QueryParser; 16 import org.apache.lucene.search.IndexSearcher; 17 import org.apache.lucene.search.Query; 18 import org.apache.lucene.search.ScoreDoc; 19 import org.apache.lucene.search.TopDocs; 20 import org.apache.lucene.store.Directory; 21 import org.apache.lucene.store.FSDirectory; 22 import org.apache.lucene.util.Version; 23 import org.junit.Test; 24 25 public class HelloWorld { 26 27 @Test 28 public void testCreateIndex() throws Exception { 29 /** 30 * 一、建立一个article对象,而且把信息存放进去 二、调用indexWriter的API把数据存放在索引库中 31 * 三、关闭indexWriter 32 */ 33 34 // 建立一个article对象,而且把信息存放进去 35 Article article = new Article(); 36 article.setId(1L); 37 article.setTitle("lucene能够作搜索引擎"); 38 article.setContent("baidu,google都是很好的搜索引擎"); 39 // 调用indexWriter的API把数据存放在索引库中 40 /** 41 * 建立一个IndexWriter 参数三个 一、索引库 指向索引库的位置 二、分词器 42 */ 43 44 // 建立索引库 45 Directory directory = FSDirectory.open(new File("E:\\s")); 46 // 建立分词器 47 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); 48 IndexWriter indexWriter = new IndexWriter(directory, analyzer, 49 MaxFieldLength.LIMITED); 50 // 把一个article对象转化成document 51 Document document = new Document(); 52 Field idField = new Field("id", article.getId().toString(), Store.YES, 53 Index.NOT_ANALYZED); 54 Field titleField = new Field("title", article.getTitle(), Store.YES, 55 Index.ANALYZED); 56 Field contentField = new Field("content", article.getContent(), 57 Store.YES, Index.ANALYZED); 58 document.add(idField); 59 document.add(titleField); 60 document.add(contentField); 61 62 indexWriter.addDocument(document); 63 // 关闭indexWriter 64 indexWriter.close(); 65 } 66 67 @Test 68 public void testSearchIndex() throws Exception { 69 70 /** 71 * 一、建立一个 IndexSearch对象 二、调用search方法进行检索 三、输出内容 72 */ 73 // 建立索引库 74 Directory directory = FSDirectory.open(new File("E:\\s")); 75 76 IndexSearcher indexSearcher = new IndexSearcher(directory); 77 // 调用search方法进行检索 78 79 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); 80 // 为查询分析器QueryParser 指定查询字段和分词器 81 QueryParser queryParser = new QueryParser(Version.LUCENE_30, "content", 82 analyzer); 83 // 查询 84 Query query = queryParser.parse("baidu");// 关键词 85 // 指向相匹配的搜索条件的前N个搜索结果 86 TopDocs topDocs = indexSearcher.search(query, 2); 87 // 根据关键词查询出来的总的记录数 88 int count = topDocs.totalHits; 89 // 。ScoreDoc是表明一个结果的相关度得分与文档编号等信息的对象。 90 ScoreDoc[] scoreDocs = topDocs.scoreDocs; 91 List<Article> articleList = new ArrayList<Article>(); 92 93 for (ScoreDoc scoreDoc : scoreDocs) { 94 float score = scoreDoc.score;// 关键词得分 95 int index = scoreDoc.doc;// 索引的下标 96 // 查询文档 97 Document document = indexSearcher.doc(index); 98 // 把document转化成article 99 Article article = new Article(); 100 article.setId(Long.parseLong(document.get("id")));// document.getField("id").stringValue() 101 article.setTitle(document.get("title")); 102 article.setContent(document.get("content")); 103 articleList.add(article); 104 105 } 106 107 for (Article article : articleList) { 108 System.out.println(article.getId()); 109 System.out.println(article.getTitle()); 110 System.out.println(article.getContent()); 111 } 112 } 113 }
如何把一个信息写到索引库中google
读取信息的过程spa