Lucene的简单用法

程序员文章站 2022-04-24 20:06:26

1.创建索引 2.查询索引 3.删除索引 4.修改索引 ......

1.创建索引

　　 Lucene的简单用法

package com.dingyu.test;

import java.io.file;
import java.io.fileinputstream;
import java.io.filenotfoundexception;
import java.io.ioexception;
import java.io.unsupportedencodingexception;
import java.nio.file.paths;

import org.apache.lucene.analysis.analyzer;
import org.apache.lucene.analysis.standard.standardanalyzer;
import org.apache.lucene.document.document;
import org.apache.lucene.document.field;
import org.apache.lucene.document.field.store;
import org.apache.lucene.document.storedfield;
import org.apache.lucene.index.indexwriter;
import org.apache.lucene.index.indexwriterconfig;
import org.apache.lucene.store.directory;
import org.apache.lucene.store.fsdirectory;
import org.junit.test;

/**
 * 我们的目标是把索引和文档存入索引库中， 所以首先我们需要创建一个索引库 然后创建一个indexwrite对象把索引，和文档对象写入，
 * 文档对象中需要自己设置域，索引是通过分词器对域进行分词产生的， 所以我们需要分词器
 * 
 * @author 丁宇
 *
 */
public class lucenetest {
    /**
     * 创建索引
     * @throws ioexception
     */
    @test
    public void createindex() throws ioexception {
        // 标准分词器
        analyzer analyzer = new standardanalyzer();
        // 创建一个索引
        directory directory = fsdirectory.open(paths.get("d:\\luceneindex"));
        // 创建一个indexwriteconfig对象
        indexwriterconfig config = new indexwriterconfig(analyzer);
        // 创建一个indexwrite对象
        indexwriter write = new indexwriter(directory, config);
        // 获得所有文件下的文件
        file[] files = new file("d:\\lucenetest").listfiles();
        for (file file : files) {
            // 创建一个文档对象
            document document = new document();
            // 增加一个filepath域，不分析 不索引 但会存储在索引库里 把文件路径放到域中
            field field1 = new storedfield("filepath", file.getpath());
            // 增加一个filename域，会分词，会索引，
            field field2 = new org.apache.lucene.document.textfield("filename", file.getname(), store.yes);
            // 增加一个filecontent域，会分词，会索引，只放文件内容的索引
            field field3 = new org.apache.lucene.document.textfield("filecontent", filecontent(file), store.no);
            // 增加一个filesize域，不分析 不索引 但会存储在索引库里 把文件路径放到域中
            field field4 = new storedfield("filesize", file.length());
            document.add(field1);
            document.add(field2);
            document.add(field3);
            document.add(field4);
            write.adddocument(document);
        }
        write.close();
    }
    /**
     * 获得文件内容
     * @param file
     * @return
     */
    public string filecontent(file file)  {
        byte[] filecontent = new byte[(int) file.length()];
        fileinputstream in = null;
        try {
            in = new fileinputstream(file);
        } catch (filenotfoundexception e2) {
            e2.printstacktrace();
        }
        try {
            in.read(filecontent);
        } catch (ioexception e1) {
            e1.printstacktrace();
        }
        try {
            in.close();
        } catch (ioexception e) {
            e.printstacktrace();
        }
        try {
            return new string(filecontent, "utf-8");
        } catch (unsupportedencodingexception e) {
            e.printstacktrace();
        }
        return null;
    }
}

Lucene的简单用法

2.查询索引

package com.dingyu.test;

import java.io.ioexception;
import java.nio.file.path;
import java.nio.file.paths;

import org.apache.lucene.document.document;
import org.apache.lucene.index.directoryreader;
import org.apache.lucene.index.indexreader;
import org.apache.lucene.index.term;
import org.apache.lucene.search.booleanclause.occur;
import org.apache.lucene.search.booleanquery;
import org.apache.lucene.search.indexsearcher;
import org.apache.lucene.search.numericrangequery;
import org.apache.lucene.search.query;
import org.apache.lucene.search.scoredoc;
import org.apache.lucene.search.termquery;
import org.apache.lucene.search.topdocs;
import org.apache.lucene.store.directory;
import org.apache.lucene.store.fsdirectory;
import org.junit.test;

/**
 * 查询索引
 * 
 * @author 丁宇
 *
 */
public class lucenetest1 {
    // 获得indexsearcher对象
    private indexsearcher getindexsearcher() throws ioexception {
        // 指定索引库
        directory directory = fsdirectory.open(paths.get("d:\\luceneindex"));
        // 打开索引库
        indexreader reader = directoryreader.open(directory);
        // 创建查询的对象
        indexsearcher searcher = new indexsearcher(reader);
        return searcher;
    }

    // 输出查到的内容
    private void printindex(topdocs docs,indexsearcher searcher) throws ioexception {
        // 获得顶部匹配记录
        scoredoc[] scoredocs = docs.scoredocs;
        // 获得在索引库中存着的文档的id,利用id去寻找文档
        for (scoredoc scoredoc : scoredocs) {
            // 获得id
            int doc = scoredoc.doc;
            // 获得文档
            document document = searcher.doc(doc);
            // 获得这个文档的域
            system.out.println(document.get("filename"));
            system.out.println(document.get("filecontent"));
            system.out.println(document.get("filepath"));
            system.out.println(document.get("filesize"));
            system.out.println("------------------------");
        }
    }
    /**
     * 精准查询
     * @throws ioexception
     */
    @test
    public void termqueryindex() throws ioexception {

        indexsearcher searcher = getindexsearcher();
        // 选择合适的查询方法，这里用最简单的，具体的看下图
        query query = new termquery(new term("filename", "txt"));
        // 执行查询
        topdocs docs = searcher.search(query, 2);
        //输出查询内容
        printindex(docs, searcher);
        // 关闭索引库
        searcher.getindexreader().close();
    }
    /**
     * 范围查询 五个参数 第一个域名，第二个第三个表示范围，第四个第五个表示是否包含最小值和最大值。
     * @throws ioexception
     */
    @test
    public void numrangequeryindex() throws ioexception {
        indexsearcher searcher = getindexsearcher();
        // 选择合适的查询方法，这里用最简单的，具体的看下图
        query query = numericrangequery.newlongrange("filesize", 0l, 1000l, true, true);
        // 执行查询
        topdocs docs = searcher.search(query, 2);
        //输出查询内容
        printindex(docs, searcher);
        // 关闭索引库
        searcher.getindexreader().close();
    }
    /**
     * 组合查询
     * @throws ioexception 
     */
    @test
    public void booleanqueryindex() throws ioexception {
        indexsearcher searcher = getindexsearcher();
        booleanquery booleanquery = new booleanquery();
        query query = new termquery(new term("filename","txt"));
        query query2 = numericrangequery.newlongrange("filesize", 0l, 1000l, true, true);
        //表示query是必须的 query2也是必须 相当于并集
        booleanquery.add(query,occur.must);
        booleanquery.add(query2, occur.must);
        // 执行查询
        topdocs docs = searcher.search(query, 2);
        //输出查询内容
        printindex(docs, searcher);
        // 关闭索引库
        searcher.getindexreader().close();
    }
}

Lucene的简单用法

3.删除索引

package com.dingyu.test;

import java.io.ioexception;
import java.nio.file.paths;

import org.apache.lucene.analysis.analyzer;
import org.apache.lucene.analysis.standard.standardanalyzer;
import org.apache.lucene.index.indexwriter;
import org.apache.lucene.index.indexwriterconfig;
import org.apache.lucene.index.term;
import org.apache.lucene.search.query;
import org.apache.lucene.search.termquery;
import org.apache.lucene.store.directory;
import org.apache.lucene.store.fsdirectory;
import org.junit.test;

/**
 * 删除索引 一般增删改都是同一个操作对象 这里使用indexwriter对象
 * 
 * @author 丁宇
 *
 */
public class lucenetest3 {
    /**
     * 获得indexwrite对象
     * @return
     * @throws ioexception
     */
    public indexwriter getindexwrite() throws ioexception {
        analyzer analyzer = new standardanalyzer();
        directory directory = fsdirectory.open(paths.get("d:\\luceneindex"));
        indexwriterconfig config = new indexwriterconfig(analyzer);
        return new indexwriter(directory, config);
    }

    /**
     * 删除所有的索引
     * 
     * @throws ioexception
     */
    @test
    public void deleteallindex() throws ioexception {
        indexwriter indexwrite = getindexwrite();
        indexwrite.deleteall();
        indexwrite.close();
    }
    /**
     * 根据条件删除索引，同时删除文档
     * @throws ioexception
     */
    @test
    public void deletesomeindex() throws ioexception {
        indexwriter indexwrite = getindexwrite();
        query query = new termquery(new term("filename","txt"));
        indexwrite.deletedocuments(query);
        indexwrite.close();        
    }
}

4.修改索引

package com.dingyu.test;

import java.io.ioexception;
import java.nio.file.paths;

import org.apache.lucene.analysis.analyzer;
import org.apache.lucene.analysis.standard.standardanalyzer;
import org.apache.lucene.document.document;
import org.apache.lucene.document.field.store;
import org.apache.lucene.document.stringfield;
import org.apache.lucene.index.indexwriter;
import org.apache.lucene.index.indexwriterconfig;
import org.apache.lucene.index.indexablefield;
import org.apache.lucene.index.term;
import org.apache.lucene.store.directory;
import org.apache.lucene.store.fsdirectory;
import org.junit.test;

/**
 * 索引的修改
 * @author 丁宇
 *
 */
public class lucenetest2 {
    
    private indexwriter getindexwriter() throws ioexception {
        analyzer analyzer = new standardanalyzer();
        directory directory = fsdirectory.open(paths.get("d:\\luceneindex"));
        indexwriterconfig config = new indexwriterconfig(analyzer);
        return new indexwriter(directory, config);
    }
    
    @test
    public void updateindex() throws ioexception {
        indexwriter indexwriter = getindexwriter();
        document document = new document();
        document.add(new stringfield("filename", "think in java", store.yes));
        //update 就是删除一个你指定的 创建一个你想要的 。
        indexwriter.updatedocument(new term("filecontent","txt"), document);
        indexwriter.close();
    }
}

上一篇： java发送http的get、post请求 javagetpost

下一篇： 2015黑帽大会人工智能新突破引全球黑客“关心”

Lucene的简单用法

1.创建索引

2.查询索引

3.删除索引

4.修改索引

php对数组排序的简单实例

JS用最简单的方法实现四舍五入

PHP数据流应用的一个简单实例

二岁宝宝食谱超级简单的做法，宝宝营养要跟上！

Android Retrofit的简单介绍和使用

使用CSS3制作一个简单的进度条(demo)

Python面向对象之类的定义与继承用法示例

matlab中分号、冒号、逗号等常用标点符号的功能和用法总结

推荐一款简单好用、功能强大的服务器共享文件管理软件、共享文件管理免客户端的软件

Java8新特性Lambda表达式的一些复杂用法总结