本文主要记录一下lucene的另外两个要点的api使用:查询与排序。html
/** * 查找指定field中包含某个关键字 * @throws IOException */ @Test public void termQuery() throws IOException { String field = "title"; // String queryStr = "in"; // String queryStr = "Lucene in Action"; // String queryStr = "action"; String queryStr = "lucene"; Term term = new Term(field,queryStr); Query query = new TermQuery(term); executeQuery(query); }
/** * 查找指定字段中包含与关键字类似的文档 * 查询用于匹配与指定项类似的项 * 编辑距离算法,两个字符串之间类似度的一个度量方法 * 用来决定索引文件中的项与指定目标项的类似程度. * 取全部相同前缀(前缀长度能够设定)的词项作编辑距离 * * 编辑距离其实是代表两个不一样的字符串须要通过多少次编辑和变换才能变为对方。 * 一般的编辑行为包括了增长一个检索项,删除一个检索项,修改一个检索项, * 与普通的字符串匹配函数不一样,模糊搜索里的编辑距离是以索引项为单位的。 * * http://www.xinxilong.com/html/?2481.html * @throws IOException */ @Test public void fuzzyQuery() throws IOException { String field = "title"; String queryStr = "act";// 自动在结尾添加 ~ ,即查询act~ Term term = new Term(field,queryStr); int maxEdits = 1; //编辑距离最多不能超过多少 int prefixLength = 3; //相同的前缀长度 // Query query = new FuzzyQuery(term,maxEdits,prefixLength); Query query = new FuzzyQuery(term,maxEdits); // Query query = new FuzzyQuery(term); executeQuery(query); }
/** * http://my.oschina.net/MrMichael/blog/220694 * 同一个关键词多个字段搜索 * 用MultiFieldQueryParser类实现对同一关键词的跨域搜索 */ @Test public void multiFieldQueryCrossFields() throws ParseException, IOException { String[] fields = new String[]{"title","desc"}; String queryStr = "good"; Map<String , Float> boosts = new HashMap<String, Float>(); //设定它们在搜索结果排序过程当中的权重,权重越高,排名越靠前 boosts.put("title", 1.0f); boosts.put("desc", 0.7f); MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, new StandardAnalyzer(),boosts); Query query = parser.parse(queryStr); executeQuery(query); }
/** * 使用多个关键字,及多个field进行查询 */ @Test public void multiFieldQueryMultiKeyword() throws ParseException, IOException { String[] queries = {"good","lucene"}; String[] fields = {"title","desc"}; BooleanClause.Occur[] clauses = {BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD}; Query query = MultiFieldQueryParser.parse(queries,fields,clauses,new StandardAnalyzer()); executeQuery(query); }
/** * 通配符查询 * 星号*:表明0个或多个字母 * 问号?:表明0个或1个字母 */ @Test public void wildcardQuery() throws IOException { String field = "title"; // String queryStr = "*pute?"; String queryStr = "act*"; Term term = new Term(field,queryStr); Query query = new WildcardQuery(term); executeQuery(query); }
/** * 前缀查询 * 自动在关键词末尾添加* */ @Test public void prefixQuery() throws IOException { String field = "title"; String queryStr = "act"; //act* Term term = new Term(field,queryStr); Query query = new PrefixQuery(term); executeQuery(query); }
/** * http://blog.csdn.net/rick_123/article/details/6708527 * 短语查询,对关键词加引号,经过位置移动来匹配 * slop的概念:slop是指两个项的位置之间容许的最大间隔距离 * 例如:slop设置为1,则 quick brown fox 能够匹配 quick fox */ @Test public void phraseQuery() throws IOException { Query query = new PhraseQuery.Builder() .setSlop(3) .add(new Term("title", "computer")) .add(new Term("title","art")) .build(); executeQuery(query); }
/** * http://callan.iteye.com/blog/154251 * 跨度查询,用于查询多个词的时候考虑几个词在文档中的匹配位置 * 与phraseQuery和multiFieldQuery很类似,都是经过位置限制匹配 * 可是spanQuery更加灵活 * * SpanQuery包括如下几种: * SpanTermQuery:词距查询的基础,结果和TermQuery类似,只不过是增长了查询结果中单词的距离信息。 * SpanFirstQuery:在指定距离能够找到第一个单词的查询。 * SpanNearQuery:查询的几个语句之间保持者必定的距离。 * SpanOrQuery:同时查询几个词句查询。 * SpanNotQuery:从一个词距查询结果中,去除一个词距查询。 */ @Test public void spanQuery() throws IOException { SpanTermQuery query = new SpanTermQuery(new Term("title","art")); executeQuery(query); } /** * 第一次出如今指定位置 * @throws IOException */ @Test public void spanFirstQuery() throws IOException { SpanTermQuery query = new SpanTermQuery(new Term("title","art")); SpanFirstQuery spanFirstQuery =new SpanFirstQuery(query,2); //出如今第2个位置 executeQuery(spanFirstQuery); } /** * SpanNearQuery中将SpanTermQuery对象做为SpanQuery对象使用的效果,与使用PharseQuery的效果很是类似。 * 最大的区别是:在SpanNearQuery的构造函数中的第三个参数为inOrder标志,设置这个标志为true,项添加的顺序和其文档中出现的顺序相同 */ @Test public void spanNearQuery() throws IOException { SpanTermQuery queryScience = new SpanTermQuery(new Term("title","science")); SpanTermQuery queryArt = new SpanTermQuery(new Term("title","art")); SpanQuery[] queries = new SpanQuery[]{queryScience,queryArt}; int slop = 2;//science 与 art两个词间隔在2之内 boolean inOrder = false;//不须要按数组中的顺序出如今文档中 SpanNearQuery query = new SpanNearQuery(queries,slop,inOrder); executeQuery(query); } @Test public void spanOrQuery() throws IOException { SpanTermQuery queryScience = new SpanTermQuery(new Term("title","science")); SpanTermQuery queryArt = new SpanTermQuery(new Term("title","art")); SpanQuery[] queries = new SpanQuery[]{queryScience,queryArt}; int slop = 2;//science 与 art两个词间隔在2之内 boolean inOrder = false;//不须要按数组中的顺序出如今文档中 SpanNearQuery spanNearQuery = new SpanNearQuery(queries,slop,inOrder); SpanTermQuery queryComputer = new SpanTermQuery(new Term("title","lucene")); SpanOrQuery query = new SpanOrQuery(new SpanQuery[]{spanNearQuery,queryComputer}); executeQuery(query); }
/** * 组合查询 * MUST与MUST组合表示并集 * MUST与MUST_NOT表示包含与不包含 * MUST_NOT与MUST_NOT组合没有意义 * SHOULD与SHOULD组合表示或 * SHOULD与MUST表示MUST,其中SHOULD没有任何价值 * SHOULD与MUST_NOT至关于MUST与MUST_NOT表示包含与不包含 */ @Test public void booleanQuery() throws IOException { TermQuery queryComputerInTitle = new TermQuery(new Term("title","computer")); TermQuery queryGoodInDesc = new TermQuery(new Term("desc","good")); BooleanQuery booleanQuery = new BooleanQuery.Builder() .add(queryComputerInTitle,BooleanClause.Occur.SHOULD) .add(queryGoodInDesc,BooleanClause.Occur.SHOULD) .setMinimumNumberShouldMatch(1) .build(); executeQuery(booleanQuery); }
/** * 按指定字段排序 * @throws IOException * @throws ParseException */ @Test public void sortByField() throws IOException, ParseException { //Sort using term values as encoded Integers. Sort values are Integer and lower values are at the front. boolean isReverse = false; SortField sortField = new SortField("title", SortField.Type.STRING,isReverse); Query query = new TermQuery(new Term("title","lucene")); Sort sort = new Sort(sortField); executeQuery(query, sort); }
/** * 按索引顺序排序 * @throws IOException */ @Test public void sortByIndexOrder() throws IOException { Query query = new TermQuery(new Term("title","lucene")); executeQuery(query,Sort.INDEXORDER); }
/** * 按文档的得分排序 * @throws IOException */ @Test public void sortByRelevance() throws IOException { TermQuery queryComputerInTitle = new TermQuery(new Term("title","computer")); TermQuery queryGoodInDesc = new TermQuery(new Term("desc","good")); BooleanQuery query = new BooleanQuery.Builder() .add(queryComputerInTitle,BooleanClause.Occur.SHOULD) .add(queryGoodInDesc,BooleanClause.Occur.SHOULD) .setMinimumNumberShouldMatch(1) .build(); executeQuery(query,Sort.RELEVANCE); }
本工程githubjava