以前作去转盘网的时候,我已经公开了非全文搜索的代码,须要的朋友但愿可以前去阅读个人博客。本文主要讨论如何进行全文搜索,因为本人花了很长时间设计了新做:观点,观点对全文搜索的要求仍是很高的,因此我又花了很多时间研究全文搜索,你能够先体验下:点我搜索。废话也很少说了,直接上代码:html
public Map<String,Object> articleSearchAlgorithms(SearchCondition condition,IndexSearcher searcher) throws ParseException, IOException{ Map<String,Object> map =new HashMap<String,Object>(); String[] filedsList=condition.getFiledsList(); String keyWord=condition.getKeyWord(); int currentPage=condition.getCurrentPage(); int pageSize=condition.getPageSize(); String sortField=condition.getSortField(); boolean isASC=condition.isDESC(); String sDate=condition.getsDate(); String eDate=condition.geteDate(); String classify=condition.getClassify(); //过滤终结字符 keyWord=escapeExprSpecialWord(keyWord); BooleanQuery q1 = new BooleanQuery(); BooleanQuery q2 = new BooleanQuery(); BooleanQuery booleanQuery = new BooleanQuery(); //boolean查询 if(classify!=null&&(classify.equals("guanzhi")||classify.equals("opinion")||classify.equals("write"))){ String typeId="1";//默认言论 if(classify.equals("guanzhi")){ typeId="2"; } if(classify.equals("opinion")){ typeId="3"; } Query termQuery = new TermQuery(new Term("typeId",typeId)); q1.add(termQuery,BooleanClause.Occur.MUST); } if(sDate!=null&&eDate!=null){//是否范围查询由这两个参数决定 Query rangeQuery = new TermRangeQuery("writingTime", new BytesRef(sDate), new BytesRef(eDate),true, true); q1.add(rangeQuery,BooleanClause.Occur.MUST); } Sort sort = new Sort(); // 排序 sort.setSort(SortField.FIELD_SCORE); if(sortField!=null){ sort.setSort(new SortField(sortField, SortField.Type.STRING, isASC)); } int start = (currentPage - 1) * pageSize; int hm = start + pageSize; TopFieldCollector res = TopFieldCollector.create(sort,hm,false, false, false, false); //彻底匹配查询 Term t0=new Term(filedsList[1],keyWord); TermQuery termQuery = new TermQuery(t0);//两种高度匹配的查询 q2.add(termQuery,BooleanClause.Occur.SHOULD); //前缀匹配 Term t1=new Term(filedsList[1],keyWord); PrefixQuery prefixQuery=new PrefixQuery(t1); q2.add(prefixQuery,BooleanClause.Occur.SHOULD); //短语,类似度匹配,适用于分词的内容 for(int i=0;i<filedsList.length;i++){ //多字段term查询算法 if(i!=1){ PhraseQuery phraseQuery=new PhraseQuery(); Term ts0=new Term(filedsList[i],keyWord); phraseQuery.add(ts0); FuzzyQuery fQuery=new FuzzyQuery(new Term(filedsList[i],keyWord),2);//最后类似度查询 q2.add(phraseQuery,BooleanClause.Occur.SHOULD); q2.add(fQuery,BooleanClause.Occur.SHOULD);//后缀类似的拿出来 } } MultiFieldQueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_47,filedsList,analyzer); queryParser.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = queryParser.parse(keyWord); q2.add(query,BooleanClause.Occur.SHOULD); //必须加逻辑判断,不然结果是不一样的 if(q1!=null && q1.toString().length()>0){ booleanQuery.add(q1,BooleanClause.Occur.MUST); } if(q2!=null && q2.toString().length()>0){ booleanQuery.add(q2,BooleanClause.Occur.MUST); } searcher.search(booleanQuery, res); long amount = res.getTotalHits(); TopDocs tds = res.topDocs(start, pageSize); map.put("amount",amount); map.put("tds",tds); map.put("query",booleanQuery); return map; }
注意下:上面代码的搜索条件(SearchCondition )是观点网的具体需求,您能够按照您本身的搜索条件作改动,这里也很难适配全部读者。算法
public Map<String, Object> searchArticle(SearchCondition condition) throws Exception{ Map<String,Object> map =new HashMap<String,Object>(); List<Write> list=new ArrayList<Write>(); DirectoryReader reader=condition.getReader(); String URL=condition.getURL(); boolean isHighligth=condition.isHighlight(); String keyWord=condition.getKeyWord(); IndexSearcher searcher=getSearcher(reader,URL); try{ Map<String,Object> output=articleSearchAlgorithms(condition,searcher); if(output==null){ map.put("amount",0L); map.put("source",null); return map; } map.put("amount", output.get("amount")); TopDocs tds = (TopDocs) output.get("tds"); ScoreDoc[] sd = tds.scoreDocs; Query query =(Query) output.get("query"); for (int i = 0; i < sd.length; i++) { Document doc = searcher.doc(sd[i].doc); String id = doc.get("id"); /**********************start*************************须要处理的放一起********************/ String temp=doc.get("title"); String title =temp; //默认不高亮 if(isHighligth){ //高亮文章标题 Highlighter highlighterTitle = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); highlighterTitle.setTextFragmenter(new SimpleFragmenter(40)); // 字长度 TokenStream ts = analyzer.tokenStream("title", new StringReader(temp)); title= highlighterTitle.getBestFragment(ts,temp); if(title==null){ title=temp.replace(keyWord,"<span style='color:red'>"+keyWord+"</span>");//高亮处理插件bug,加这句话避免 } } String temp1=HtmlEnDecode.htmlEncode(doc.get("content")); String content=temp1;//使用本身封装的方法来转义 if(isHighligth){ //作高亮处理,content Highlighter highlighterContent = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); highlighterContent.setTextFragmenter(new SimpleFragmenter(Constant.HIGHLIGHT_CONTENT_LENGTH)); // 字长度 //temp1=StringEscapeUtils.escapeHtml(temp1);//将汉字转义致使高亮失效 TokenStream ts1 = analyzer.tokenStream("content", new StringReader(temp1)); content = highlighterContent.getBestFragment(ts1,temp1); if(content==null){ content=temp1.replace(keyWord,"<span style='color:red'>"+keyWord+"</span>");//高亮处理插件bug,加这句话避免 //假设赶上这种状况作处理,其余的高亮器会自动截图 content=subContent(content);//截取处理 content=HtmlEnDecode.htmldecode(content);//html解码 content=SubStringHTML.sub(content,Constant.HIGHLIGHT_CONTENT_LENGTH); } } /*---------------------------------------不断变更的数据放一起----------------------------*/ Write write=writeDao.getArticle(Long.parseLong(id)); if(write!=null){ write.setTitle(title); write.setContent(content); Date writingTime=write.getWritingTime(); String timeGap=DateUtil.dateGap(writingTime);//timeGap write.setTimeGap(timeGap); list.add(write); } } }catch(Exception e){ e.printStackTrace(); } map.put("source",list); return map; }
注意上面,这是具体的搜索代码,不一样的应用场景有不一样的需求,请您按照本身的需求封装对象,查询数据库等,代码毫无保留,绝对可用。数据库
若是有什么疑问能够加qq群:284205104 若是群满了就麻烦去趟去转盘找下最新的群加了便可,谢谢您的阅读。spa