数据:结构化数据和非结构化数据linux
搜索数据算法
分词:索引建立(Indexing)和搜索索引(Search)sql
底层数据结构数据库
* **最后的结构** * **document Frequency**:文档频次,总共右多少文件包含词 * **Frequency**:此文件中包含了**几回此词**
* 总体逻辑图
@Test public void testDemo() throws IOException{ Analyzer analyzer = new StandardAnalyzer(); //存放在disk上 final Path path = Paths.get(targetFileIndex); Directory directory = FSDirectory.open(path); //写入器 IndexWriterConfig iwc = new IndexWriterConfig(analyzer); IndexWriter iw = new IndexWriter(directory,iwc); Document doc = new Document(); String text = "This is the text to be indexed"; doc.add(new Field("fileName",text,TextField.TYPE_STORED)); iw.addDocument(doc); iw.close(); } @Test public void testFind() throws IOException, ParseException { Analyzer analyzer = new StandardAnalyzer(); //从disk上读取 final Path path = Paths.get(targetFileIndex); Directory directory = FSDirectory.open(path); DirectoryReader dr = DirectoryReader.open(directory); //索引查询器 IndexSearcher isc = new IndexSearcher(dr); QueryParser parser = new QueryParser("filename",analyzer); Query query = parser.parse("text"); ScoreDoc[] hits = isc.search(query, 1000).scoreDocs;; for (int i = 0; i < hits.length; i++) { Document hitDoc = isc.doc(hits[i].doc); } dr.close(); directory.close(); }