Lucene简介与实现博客分类： Lucene lucene

程序员文章站 2024-03-18 20:42:40

...

1. Lucene简介

Lucece官网：http://lucene.apache.org/

2. Lucene实现

New maven project ->
Create a simple project ->
    Group Id: com.andrew.lucene
    Artifact Id: Lucene01
    Version: 0.0.1-SNAPSHOT
    Packaging: jar

Indexer.java代码

package com.andrew.lucene;

import java.io.File;
import java.io.FileReader;
import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class Indexer {
    private IndexWriter writer; // 写索引实例
    // 构造方法 实例化IndexWriter
    public Indexer(String indexDir) throws Exception {
        Directory dir = FSDirectory.open(Paths.get(indexDir));
        Analyzer analyzer = new StandardAnalyzer(); // 标准分词器
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        writer = new IndexWriter(dir, iwc);
    }
    // 索引指定目录的所有文件
    public int index(String dataDir) throws Exception {
        File[] files = new File(dataDir).listFiles();
        for (File f : files) {
            indexFile(f);
        }
        return writer.numDocs();
    }
    // 索引指定文件
    private void indexFile(File f) throws Exception {
        System.out.println("索引文件：" + f.getCanonicalPath());
        Document doc = getDocument(f);
        writer.addDocument(doc);
    }
    // 获取文档，文档里再设置每个字段
    private Document getDocument(File f) throws Exception {
        Document doc = new Document();
        doc.add(new TextField("contents", new FileReader(f)));
        doc.add(new TextField("fileName", f.getName(), Field.Store.YES));
        doc.add(new TextField("fullPath", f.getCanonicalPath(), Field.Store.YES));
        return doc;
    }
    // 关闭写索引
    public void close() throws Exception {
        writer.close();
    }
    public static void main(String[] args) {
        String indexDir = "E:\\lucene";
        String dataDir = "E:\\lucene\\data";
        Indexer indexer = null;
        int numIndexed = 0;
        long start = System.currentTimeMillis();
        try {
            indexer = new Indexer(indexDir);
            numIndexed = indexer.index(dataDir);
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                indexer.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        long end = System.currentTimeMillis();
        System.out.println("索引：" + numIndexed + " 个文件,花费了" + (end - start) + "毫秒");
    }
}

执行结果

索引文件：E:\lucene\data\CHANGES.txt
索引文件：E:\lucene\data\JRE_VERSION_MIGRATION.txt
索引文件：E:\lucene\data\LICENSE.txt
索引文件：E:\lucene\data\MIGRATE.txt
索引文件：E:\lucene\data\NOTICE.txt
索引文件：E:\lucene\data\README.txt
索引文件：E:\lucene\data\SYSTEM_REQUIREMENTS.txt
索引：7 个文件,花费了901毫秒

Searcher.java代码

package com.andrew.lucene;

import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class Searcher {
    public static void search(String indexDir, String q) throws Exception {
        Directory dir = FSDirectory.open(Paths.get(indexDir));
        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher is = new IndexSearcher(reader);
        Analyzer analyzer = new StandardAnalyzer(); // 标准分词器
        QueryParser parser = new QueryParser("contents", analyzer);
        Query query = parser.parse(q);
        long start = System.currentTimeMillis();
        TopDocs hits = is.search(query, 10);
        long end = System.currentTimeMillis();
        System.out.println("匹配 " + q + ",总共花费" + (end - start) + "毫秒" + "查询到" + hits.totalHits + "个记录");
        for (ScoreDoc scoreDoc : hits.scoreDocs) {
            Document doc = is.doc(scoreDoc.doc);
            System.out.println(doc.get("fullPath"));
        }
        reader.close();
    }
    public static void main(String[] args) {
        String indexDir = "E:\\lucene";
        String q = "Zygmunt Saloni";
        try {
            search(indexDir, q);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

执行结果

匹配 Zygmunt Saloni,总共花费24毫秒查询到1个记录
E:\lucene\data\LICENSE.txt

Lucene简介与实现博客分类： Lucene lucene

Lucene5学习之Facet(续) 博客分类： Lucene LuceneFacet

Annotation-Driven Indexing and Searching with Lucene (转载) 博客分类：工作 lucene.net

Lucene5学习之Facet简单入门博客分类： Lucene LuceneFacet

lucene 入门(转) 博客分类： Lucene lucene搜索引擎Apache全文检索Eclipse

Lucene 维度统计(facet) 博客分类： lucene facet

Lucene简介与实现博客分类： Lucene lucene

lucene中的docValue实现源码解读（一）——综述博客分类： lucene lucenedocValue存储格式

Jive学习心得博客分类： jive学习设计模式lucene网络应用数据结构全文检索

wsm-lucene 一个简单的Lucene工具类博客分类： lucene lucenemaven

Lucene JAVA全文检索引擎博客分类： Lucene Lucene

Lucene简介与实现 博客分类： Lucene lucene

Lucene5学习之Facet(续) 博客分类： Lucene LuceneFacet

Annotation-Driven Indexing and Searching with Lucene (转载) 博客分类： 工作 lucene.net

Lucene5学习之Facet简单入门 博客分类： Lucene LuceneFacet

lucene 入门(转) 博客分类： Lucene lucene搜索引擎Apache全文检索Eclipse

Lucene 维度统计(facet) 博客分类： lucene facet

Lucene简介与实现 博客分类： Lucene lucene

lucene中的docValue实现源码解读（一）——综述 博客分类： lucene lucenedocValue存储格式

Jive学习心得 博客分类： jive学习 设计模式lucene网络应用数据结构全文检索

wsm-lucene 一个简单的Lucene工具类 博客分类： lucene lucenemaven

Lucene JAVA全文检索引擎 博客分类： Lucene Lucene

Lucene简介与实现博客分类： Lucene lucene

Annotation-Driven Indexing and Searching with Lucene (转载) 博客分类：工作 lucene.net

Lucene5学习之Facet简单入门博客分类： Lucene LuceneFacet

Lucene简介与实现博客分类： Lucene lucene

lucene中的docValue实现源码解读（一）——综述博客分类： lucene lucenedocValue存储格式

Jive学习心得博客分类： jive学习设计模式lucene网络应用数据结构全文检索

wsm-lucene 一个简单的Lucene工具类博客分类： lucene lucenemaven

Lucene JAVA全文检索引擎博客分类： Lucene Lucene