Lucene入门(一)
程序员文章站
2022-07-09 10:19:44
...
Lunece 简介
Lucene提供基于Java的索引和搜索技术,以及拼写检查,命中突出显示和高级分析/标记化功能。
直接撸代码
maven依赖,需要手动加载IKAnalyzer分词器的包,目录Lunece/lib/下
<dependencies>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.4</version>
</dependency>
<!--lucene 核心包-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>7.2.1</version>
</dependency>
<!--检索关键字高亮显示-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>7.2.1</version>
</dependency>
<!--与查询比较的高性能单文档索引 高亮显示需要此jar-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-memory</artifactId>
<version>7.2.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-queryparser -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>7.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-demo</artifactId>
<version>7.2.1</version>
</dependency>
<!--一般分词器,适用于英文分词-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>7.2.1</version>
</dependency>
<!--中文分词器-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-smartcn</artifactId>
<version>7.2.1</version>
</dependency>
</dependencies>
接下来就是入门Lunece应用程序
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* 创建时间: 2019/6/7 19:19
* 备注:Lunece入门
**/
public class LuneceTest {
public static void main(String[] args) throws Exception {
// 创建中文分词器 jar包需要自己手动导入
IKAnalyzer ikAnalyzer = new IKAnalyzer();
// 准备好查找的数据 可以从数据库中进行查询
List<String> titleList = new ArrayList<String>();
titleList.add("湖南省纪委通报6起违反*八项规定精神典型问题");
titleList.add("赵步长:“脑心同治”力推实现健康中国梦");
titleList.add("中国工程院院士增选第二轮评审名单公布 湖南9人入选");
titleList.add("今年湖南退休人员每人每月增加基本养老金50元");
titleList.add("高三学生22%近视600度以上 户外运动少睡眠不达标");
// RAM 内存索引,比查询数据库要快得多
Directory ramDirectory = new RAMDirectory();
// 中文分词器配置对象
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(ikAnalyzer);
// 创建索引写入对象
IndexWriter indexWriter = new IndexWriter(ramDirectory, indexWriterConfig);
// 进行索引写入操作
for (String title : titleList) {
// 创建文档对象
Document document = new Document();
document.add(new TextField("title", title, Field.Store.YES));
// 加入到索引中
indexWriter.addDocument(document);
}
indexWriter.close();
// 查找关键字
String searchTitle = "湖南";
// 创建一个查找器 Query
Query query = new QueryParser("title", ikAnalyzer).parse(searchTitle);
// 索引读对象
IndexReader indexReader = DirectoryReader.open(ramDirectory);
// 索引查找对象
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
// 搜索结果
ScoreDoc[] scoreDocs = indexSearcher.search(query, titleList.size()).scoreDocs;
System.out.println("匹配个数:" + scoreDocs.length);
for (ScoreDoc scoreDoc : scoreDocs) {
int doc = scoreDoc.doc;
float score = scoreDoc.score;
Document doc1 = indexSearcher.doc(scoreDoc.doc);
List<IndexableField> fields = doc1.getFields();
for (IndexableField field : fields) {
System.out.println(doc + "\t" + score + "\t" + field.stringValue());
}
}
}
}
码云地址:https://gitee.com/MyXiaoXiaoBin/elk/tree/master/lucene/chapter_01
上一篇: 别买定时炸弹 揭电源奸商三大雷人技巧
下一篇: 浏览器路径 隐藏