Lucene搜索功能
程序员文章站
2022-03-23 16:16:04
...
1. 对特定项搜索
2. 查询表达式:QueryParser
3. 分页实现
2. 查询表达式:QueryParser
3. 分页实现
New maven project -> Create a simple project -> Group Id: com.andrew.lucene Artifact Id: Lucene03 Version: 0.0.1-SNAPSHOT Packaging: jar
pom.xml <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.andrew.lucene</groupId> <artifactId>Lucene03</artifactId> <version>0.0.1-SNAPSHOT</version> <dependencies> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>5.3.1</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>5.3.1</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>5.3.1</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> </dependency> </dependencies> </project>
Indexer.java代码 package com.andrew.lucene; import java.io.File; import java.io.FileReader; import java.nio.file.Paths; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class Indexer { private IndexWriter writer; // 写索引实例 // 构造方法 实例化IndexWriter public Indexer(String indexDir) throws Exception { Directory dir = FSDirectory.open(Paths.get(indexDir)); Analyzer analyzer = new StandardAnalyzer(); // 标准分词器 IndexWriterConfig iwc = new IndexWriterConfig(analyzer); writer = new IndexWriter(dir, iwc); } // 索引指定目录的所有文件 public int index(String dataDir) throws Exception { File[] files = new File(dataDir).listFiles(); for (File f : files) { indexFile(f); } return writer.numDocs(); } // 索引指定文件 private void indexFile(File f) throws Exception { System.out.println("索引文件:" + f.getCanonicalPath()); Document doc = getDocument(f); writer.addDocument(doc); } // 获取文档,文档里再设置每个字段 private Document getDocument(File f) throws Exception { Document doc = new Document(); doc.add(new TextField("contents", new FileReader(f))); doc.add(new TextField("fileName", f.getName(), Field.Store.YES)); doc.add(new TextField("fullPath", f.getCanonicalPath(), Field.Store.YES)); return doc; } // 关闭写索引 public void close() throws Exception { writer.close(); } public static void main(String[] args) { String indexDir = "E:\\lucene4"; String dataDir = "E:\\lucene4\\data"; Indexer indexer = null; int numIndexed = 0; long start = System.currentTimeMillis(); try { indexer = new Indexer(indexDir); numIndexed = indexer.index(dataDir); } catch (Exception e) { e.printStackTrace(); } finally { try { indexer.close(); } catch (Exception e) { e.printStackTrace(); } } long end = System.currentTimeMillis(); System.out.println("索引:" + numIndexed + "个文件花费了" + (end - start) + "毫秒"); } } 运行结果: 索引文件:E:\lucene4\data\CHANGES.txt 索引文件:E:\lucene4\data\LICENSE.txt 索引文件:E:\lucene4\data\NOTICE.txt 索引文件:E:\lucene4\data\SYSTEM_REQUIREMENTS.txt 索引:4个文件花费了6003毫秒
SearchTest.java代码 package com.andrew.lucene; import java.nio.file.Paths; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.junit.After; import org.junit.Before; import org.junit.Test; public class SearchTest { private Directory dir; private IndexReader reader; private IndexSearcher is; @Before public void setUp() throws Exception { dir = FSDirectory.open(Paths.get("E:\\lucene4")); reader = DirectoryReader.open(dir); is = new IndexSearcher(reader); } @After public void tearDown() throws Exception { reader.close(); } // 对特定项搜索 @Test public void testTermQuery() throws Exception { String searchField = "contents"; String q = "particular"; Term t = new Term(searchField, q); Query query = new TermQuery(t); TopDocs hits = is.search(query, 10); System.out.println("匹配 '" + q + "',总共查询到" + hits.totalHits + "个文档"); for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); System.out.println(doc.get("fullPath")); } } // 解析查询表达式 @Test public void testQueryParser() throws Exception { Analyzer analyzer = new StandardAnalyzer(); // 标准分词器 String searchField = "contents"; String q = "abc~"; // String q = "particular AND benchmarks"; QueryParser parser = new QueryParser(searchField, analyzer); Query query = parser.parse(q); TopDocs hits = is.search(query, 100); System.out.println("匹配 " + q + "查询到" + hits.totalHits + "个记录"); for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); System.out.println(doc.get("fullPath")); } } } 运行结果: 匹配 'particular',总共查询到4个文档 E:\lucene4\data\SYSTEM_REQUIREMENTS.txt E:\lucene4\data\LICENSE.txt E:\lucene4\data\NOTICE.txt E:\lucene4\data\CHANGES.txt // 匹配 particular AND benchmarks查询到1个记录 // E:\lucene4\data\SYSTEM_REQUIREMENTS.txt 匹配 abc~查询到4个记录 E:\lucene4\data\CHANGES.txt E:\lucene4\data\NOTICE.txt E:\lucene4\data\LICENSE.txt E:\lucene4\data\SYSTEM_REQUIREMENTS.txt
上一篇: 再论 Acegi 权限存储策略
下一篇: 癌症患者 补补身子