Lucene的简单用法
程序员文章站
2024-01-13 08:00:28
1.创建索引 2.查询索引 3.删除索引 4.修改索引 ......
1.创建索引
package com.dingyu.test; import java.io.file; import java.io.fileinputstream; import java.io.filenotfoundexception; import java.io.ioexception; import java.io.unsupportedencodingexception; import java.nio.file.paths; import org.apache.lucene.analysis.analyzer; import org.apache.lucene.analysis.standard.standardanalyzer; import org.apache.lucene.document.document; import org.apache.lucene.document.field; import org.apache.lucene.document.field.store; import org.apache.lucene.document.storedfield; import org.apache.lucene.index.indexwriter; import org.apache.lucene.index.indexwriterconfig; import org.apache.lucene.store.directory; import org.apache.lucene.store.fsdirectory; import org.junit.test; /** * 我们的目标是把索引和文档存入索引库中, 所以首先我们需要创建一个索引库 然后创建一个indexwrite对象把索引,和文档对象写入, * 文档对象中需要自己设置域,索引是通过分词器对域进行分词产生的, 所以我们需要分词器 * * @author 丁宇 * */ public class lucenetest { /** * 创建索引 * @throws ioexception */ @test public void createindex() throws ioexception { // 标准分词器 analyzer analyzer = new standardanalyzer(); // 创建一个索引 directory directory = fsdirectory.open(paths.get("d:\\luceneindex")); // 创建一个indexwriteconfig对象 indexwriterconfig config = new indexwriterconfig(analyzer); // 创建一个indexwrite对象 indexwriter write = new indexwriter(directory, config); // 获得所有文件下的文件 file[] files = new file("d:\\lucenetest").listfiles(); for (file file : files) { // 创建一个文档对象 document document = new document(); // 增加一个filepath域,不分析 不索引 但会存储在索引库里 把文件路径放到域中 field field1 = new storedfield("filepath", file.getpath()); // 增加一个filename域,会分词,会索引, field field2 = new org.apache.lucene.document.textfield("filename", file.getname(), store.yes); // 增加一个filecontent域,会分词,会索引,只放文件内容的索引 field field3 = new org.apache.lucene.document.textfield("filecontent", filecontent(file), store.no); // 增加一个filesize域,不分析 不索引 但会存储在索引库里 把文件路径放到域中 field field4 = new storedfield("filesize", file.length()); document.add(field1); document.add(field2); document.add(field3); document.add(field4); write.adddocument(document); } write.close(); } /** * 获得文件内容 * @param file * @return */ public string filecontent(file file) { byte[] filecontent = new byte[(int) file.length()]; fileinputstream in = null; try { in = new fileinputstream(file); } catch (filenotfoundexception e2) { e2.printstacktrace(); } try { in.read(filecontent); } catch (ioexception e1) { e1.printstacktrace(); } try { in.close(); } catch (ioexception e) { e.printstacktrace(); } try { return new string(filecontent, "utf-8"); } catch (unsupportedencodingexception e) { e.printstacktrace(); } return null; } }
2.查询索引
package com.dingyu.test; import java.io.ioexception; import java.nio.file.path; import java.nio.file.paths; import org.apache.lucene.document.document; import org.apache.lucene.index.directoryreader; import org.apache.lucene.index.indexreader; import org.apache.lucene.index.term; import org.apache.lucene.search.booleanclause.occur; import org.apache.lucene.search.booleanquery; import org.apache.lucene.search.indexsearcher; import org.apache.lucene.search.numericrangequery; import org.apache.lucene.search.query; import org.apache.lucene.search.scoredoc; import org.apache.lucene.search.termquery; import org.apache.lucene.search.topdocs; import org.apache.lucene.store.directory; import org.apache.lucene.store.fsdirectory; import org.junit.test; /** * 查询索引 * * @author 丁宇 * */ public class lucenetest1 { // 获得indexsearcher对象 private indexsearcher getindexsearcher() throws ioexception { // 指定索引库 directory directory = fsdirectory.open(paths.get("d:\\luceneindex")); // 打开索引库 indexreader reader = directoryreader.open(directory); // 创建查询的对象 indexsearcher searcher = new indexsearcher(reader); return searcher; } // 输出查到的内容 private void printindex(topdocs docs,indexsearcher searcher) throws ioexception { // 获得顶部匹配记录 scoredoc[] scoredocs = docs.scoredocs; // 获得在索引库中存着的文档的id,利用id去寻找文档 for (scoredoc scoredoc : scoredocs) { // 获得id int doc = scoredoc.doc; // 获得文档 document document = searcher.doc(doc); // 获得这个文档的域 system.out.println(document.get("filename")); system.out.println(document.get("filecontent")); system.out.println(document.get("filepath")); system.out.println(document.get("filesize")); system.out.println("------------------------"); } } /** * 精准查询 * @throws ioexception */ @test public void termqueryindex() throws ioexception { indexsearcher searcher = getindexsearcher(); // 选择合适的查询方法,这里用最简单的,具体的看下图 query query = new termquery(new term("filename", "txt")); // 执行查询 topdocs docs = searcher.search(query, 2); //输出查询内容 printindex(docs, searcher); // 关闭索引库 searcher.getindexreader().close(); } /** * 范围查询 五个参数 第一个域名,第二个第三个表示范围,第四个第五个表示是否包含最小值和最大值。 * @throws ioexception */ @test public void numrangequeryindex() throws ioexception { indexsearcher searcher = getindexsearcher(); // 选择合适的查询方法,这里用最简单的,具体的看下图 query query = numericrangequery.newlongrange("filesize", 0l, 1000l, true, true); // 执行查询 topdocs docs = searcher.search(query, 2); //输出查询内容 printindex(docs, searcher); // 关闭索引库 searcher.getindexreader().close(); } /** * 组合查询 * @throws ioexception */ @test public void booleanqueryindex() throws ioexception { indexsearcher searcher = getindexsearcher(); booleanquery booleanquery = new booleanquery(); query query = new termquery(new term("filename","txt")); query query2 = numericrangequery.newlongrange("filesize", 0l, 1000l, true, true); //表示query是必须的 query2也是必须 相当于并集 booleanquery.add(query,occur.must); booleanquery.add(query2, occur.must); // 执行查询 topdocs docs = searcher.search(query, 2); //输出查询内容 printindex(docs, searcher); // 关闭索引库 searcher.getindexreader().close(); } }
3.删除索引
package com.dingyu.test; import java.io.ioexception; import java.nio.file.paths; import org.apache.lucene.analysis.analyzer; import org.apache.lucene.analysis.standard.standardanalyzer; import org.apache.lucene.index.indexwriter; import org.apache.lucene.index.indexwriterconfig; import org.apache.lucene.index.term; import org.apache.lucene.search.query; import org.apache.lucene.search.termquery; import org.apache.lucene.store.directory; import org.apache.lucene.store.fsdirectory; import org.junit.test; /** * 删除索引 一般增删改都是同一个操作对象 这里使用indexwriter对象 * * @author 丁宇 * */ public class lucenetest3 { /** * 获得indexwrite对象 * @return * @throws ioexception */ public indexwriter getindexwrite() throws ioexception { analyzer analyzer = new standardanalyzer(); directory directory = fsdirectory.open(paths.get("d:\\luceneindex")); indexwriterconfig config = new indexwriterconfig(analyzer); return new indexwriter(directory, config); } /** * 删除所有的索引 * * @throws ioexception */ @test public void deleteallindex() throws ioexception { indexwriter indexwrite = getindexwrite(); indexwrite.deleteall(); indexwrite.close(); } /** * 根据条件删除索引,同时删除文档 * @throws ioexception */ @test public void deletesomeindex() throws ioexception { indexwriter indexwrite = getindexwrite(); query query = new termquery(new term("filename","txt")); indexwrite.deletedocuments(query); indexwrite.close(); } }
4.修改索引
package com.dingyu.test; import java.io.ioexception; import java.nio.file.paths; import org.apache.lucene.analysis.analyzer; import org.apache.lucene.analysis.standard.standardanalyzer; import org.apache.lucene.document.document; import org.apache.lucene.document.field.store; import org.apache.lucene.document.stringfield; import org.apache.lucene.index.indexwriter; import org.apache.lucene.index.indexwriterconfig; import org.apache.lucene.index.indexablefield; import org.apache.lucene.index.term; import org.apache.lucene.store.directory; import org.apache.lucene.store.fsdirectory; import org.junit.test; /** * 索引的修改 * @author 丁宇 * */ public class lucenetest2 { private indexwriter getindexwriter() throws ioexception { analyzer analyzer = new standardanalyzer(); directory directory = fsdirectory.open(paths.get("d:\\luceneindex")); indexwriterconfig config = new indexwriterconfig(analyzer); return new indexwriter(directory, config); } @test public void updateindex() throws ioexception { indexwriter indexwriter = getindexwriter(); document document = new document(); document.add(new stringfield("filename", "think in java", store.yes)); //update 就是删除一个你指定的 创建一个你想要的 。 indexwriter.updatedocument(new term("filecontent","txt"), document); indexwriter.close(); } }
上一篇: python基础第一章
下一篇: Python最简编码规范