LUCENE入门案例
程序员文章站
2022-06-09 14:07:31
...
1.创建mevan项目 引入需要坐标
<dependencies>
<!--lucene核心包-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>4.10.3</version>
</dependency>
<!--lucene查询解析-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>4.10.3</version>
</dependency>
<!--io工具包读取磁盘文件-->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.4</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
</dependency>
<!--ik中文分词器-->
<dependency>
<groupId>com.janeluo</groupId>
<artifactId>ikanalyzer</artifactId>
<version>2012_u6</version>
</dependency>
</dependencies>
<build>
<plugins>
<!--jdk的版本-->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<target>1.8</target>
<source>1.8</source>
</configuration>
</plugin>
</plugins>
</build>
2.配置文件 使用ikanalyzer 中文分词器
IKAnalyzer.cfg.xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
<comment>IK Analyzer 扩展配置</comment>
<!--用户可以在这里配置自己的扩展字典 -->
<entry key="ext_dict">ext.dic;</entry>
<!--用户可以在这里配置自己的扩展停止词字典-->
<entry key="ext_stopwords">stopword.dic;</entry>
</properties>
ext.dic 和 stopword.dic 注意路径
3.创建测试类
package com.itheima;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;
import org.apache.lucene.search.*;
import java.io.File;
public class Lucence {
@Test //创建索引
public void CreateIndex() throws Exception {
//1.通过FSDirectory.open指定索引目录
FSDirectory directory = FSDirectory.open(new File("E:\\LUCENE"));
//2.声明分词器为标准分词器 StandardAnalyzer 参数1:设置分词器版本
// Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_3);
//将标准分词器改成ik分词器
Analyzer analyzer = new IKAnalyzer();
//3.声明索引库写出配置 IndexWriterConfig 参数1:设置版本 参数2:需要分词器
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3,analyzer);
//4.声明索引库写入对象IndexWriter(参数1:需要索引库位置 ,参数2:)
IndexWriter indexWriter = new IndexWriter(directory, config);
//5.声明File指定读取本地磁盘文件路径
File fileD = new File("E:\\findAllLUCENE");
//6.file对象通过listFiles得到文件路径下的所有文件
File[] files = fileD.listFiles();
//7.循环显示并通过writer将doc保存索引库
for (File file : files) {
//文件名 file.getName()文件名,
System.out.println("文件名:"+file.getName());
//文件内容 FileUtils.readFileToString(file) //读取文件内容
System.out.println("文件内容:"+ FileUtils.readFileToString(file));
//文件大小 FileUtils.sizeOf(file)
System.out.println("文件大小:"+ FileUtils.sizeOf(file));
//文件路径 file.getPath()
System.out.println("文件路径"+file.getPath());
//8.声明Document文档对象
Document document = new Document();
/**
* TextField 文本存储
* LongField 存储数值
* Field.Store.YES 需要存储内容到索引库
*/
document.add(new TextField( "fileName", file.getName(), Field.Store.YES));
document.add(new TextField( "fileContent", FileUtils.readFileToString(file), Field.Store.YES));
document.add(new LongField( "fileSize", FileUtils.sizeOf(file), Field.Store.YES));
document.add(new TextField( "filePath", file.getPath(), Field.Store.YES));
//9.writer对象将doc对象写入索引库
indexWriter.addDocument(document);
}
//10.提交数据
indexWriter.commit();
//11.writer关闭
indexWriter.close();
}
@Test //直接查询
public void queryIndex01() throws Exception {
//1.创建索引库读取对象DirectoryReader.open
DirectoryReader reader = DirectoryReader.open(FSDirectory.open(new File("E:\\LUCENE")));
//2.声明IndexSearcher的搜索对象
IndexSearcher searcher = new IndexSearcher(reader);
//3.声明MatchAllDocsQuery查询全部对象
// Query query = new MatchAllDocsQuery();//全部查询
//3.TermQuery词条查询 param1词条域范围 param2词条内容
//Query query = new TermQuery(new Term("fileName","传智播客"));
//3.数值范围查询 参数1:查询的域,参数2:起始字节数值,参数3:结束字节数值,参数4:包含起始,参数5:包含结束
Query query = NumericRangeQuery.newLongRange("fileSize",1L,50L,true,true);
//4.通过indexSearcher对象的search方法进行查询 参数1:需要查询条件对象 参数2:查询条数
TopDocs docs = searcher.search(query, 10);
//5.topDocs.scoreDocs方法获取文档集合
ScoreDoc[] scoreDocs = docs.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
//6.循环文档集合,通过文档.doc获取到文档ID
System.out.println("文档id===="+scoreDoc.doc);
//7.循环内通过indexSearcher.doc(文档.doc)获取到文档对象
Document doc = searcher.doc(scoreDoc.doc);
//显示文档内容
System.out.println("名称"+doc.getField("fileName"));
System.out.println("内容"+doc.getField("fileContent"));
System.out.println("大小"+doc.getField("fileSize"));
System.out.println("路径"+doc.getField("filePath"));
}
//8.topDocs.totalHits可以获取符合条件的文档总数
System.out.println("查询总记录数="+docs.totalHits);
}
@Test //分词查询
public void queryIndex() throws Exception {
//1.创建索引库读取对象DirectoryReader.open
DirectoryReader reader = DirectoryReader.open(FSDirectory.open(new File("E:\\LUCENE")));
//2.声明IndexSearcher的搜索对象
IndexSearcher searcher = new IndexSearcher(reader);
//3.通过QueryParser 可以将搜索内容也通过分词器进行分词 单域查询
String searchStr="传智播客的发射点发生";
QueryParser parser = new QueryParser("fileName", new IKAnalyzer());
Query query = parser.parse(searchStr);
//多域查询
/*String[] fileds = {"fileName", "fileContent"};
MultiFieldQueryParser parser = new MultiFieldQueryParser(fileds,new IKAnalyzer());
Query query = parser.parse(searchStr);*/
//4.通过indexSearcher对象的search方法进行查询 参数1:需要查询条件对象 参数2:查询条数
TopDocs docs = searcher.search(query, 10);
//5.topDocs.scoreDocs方法获取文档集合
ScoreDoc[] scoreDocs = docs.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
//6.循环文档集合,通过文档.doc获取到文档ID
System.out.println("文档id===="+scoreDoc.doc);
//7.循环内通过indexSearcher.doc(文档.doc)获取到文档对象
Document doc = searcher.doc(scoreDoc.doc);
//显示文档内容
System.out.println("名称"+doc.getField("fileName"));
System.out.println("内容"+doc.getField("fileContent"));
System.out.println("大小"+doc.getField("fileSize"));
System.out.println("路径"+doc.getField("filePath"));
}
//8.topDocs.totalHits可以获取符合条件的文档总数
System.out.println("查询总记录数="+docs.totalHits);
}
@Test //删除索引
public void deleteIndex() throws Exception {
//指定索引目录
FSDirectory directory = FSDirectory.open(new File("D:\\LUCENE"));
//指定分词器为标准分词器 StandardAnalyzer
//StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_3);
Analyzer analyzer = new IKAnalyzer();
//指定索引库写出配置 参数一,版本 参数二,分词器
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3,analyzer);
//创建索引库写入对象,参数一指定目录 参数二指定配置
IndexWriter writer = new IndexWriter(directory,config);
//删除索引库中的文档对象
writer.deleteDocuments(new Term("fileName","传智播客"));
//删除所有索引库中数据
// writer.deleteAll();
//提交数据
writer.commit();
//关闭
writer.close();
}
}