lucene全文检索实例

程序员文章站 2022-07-13 16:10:58

...

在网上找了些关于lucene全文检索的资料，发现对于初学者那些资料还是难于理解，自己总结了些代码现在贴出来，首先在c:\\source文件夹下创建两个txt文件里面的信息可加入“测试”二字，执行TestFileIndexer.java创建索引文件，然后运行TestQuery.java根据关键字读取索引信息，具体的使用还是要看个人需求而定

我用的jar包是3.6.0，没有jar包的话可以到官网下载

创建索引文件：

TestFileIndexer.java

package text;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class TestFileIndexer {
	public   static   void  main(String[] args)  throws  Exception  {           
		/*  指明要索引文件夹的位置,这里是C盘的source文件夹下  */        
		File fileDir  =   new  File( "c:\\source " );  
		/*  这里放索引文件的位置  */       
		File indexDir  =   new  File( "c:\\index" );          
		Directory dir=FSDirectory.open(indexDir);//将索引存放在磁盘上
		Analyzer lucenAnalyzer=new StandardAnalyzer(Version.LUCENE_36);//分析器
		IndexWriterConfig iwc=new IndexWriterConfig(Version.LUCENE_36,lucenAnalyzer);
		iwc.setOpenMode(OpenMode.CREATE);//创建新的索引文件create 表示创建或追加到已有索引库
		IndexWriter indexWriter=new IndexWriter(dir,iwc);//把文档写入到索引库
		File[] textFiles=fileDir.listFiles();//得到索引文件夹下所有文件
		long startTime=new Date().getTime();
		//增加document到检索去
		for (int i = 0; i < textFiles.length; i++) {
//			if (textFiles[i].isFile()&& textFiles[i].getName().endsWith(".txt")) {
				System.out.println(":;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;");
				System.out.println("File"+textFiles[i].getCanonicalPath()+"正在被索引...");
				String temp=FileReaderAll(textFiles[i].getCanonicalPath(),"GBK");
				System.out.println(temp);
				Document document=new Document();
				Field FieldPath=new Field("path",textFiles[i].getPath(),Field.Store.YES,Field.Index.NO);
				Field FieldBody=new Field("body",temp,Field.Store.YES,Field.Index.ANALYZED,Field.TermVector.WITH_POSITIONS_OFFSETS);
				NumericField modifiField=new NumericField("modified");//所以key为modified
				modifiField.setLongValue(fileDir.lastModified());
				document.add(FieldPath);
				document.add(FieldBody);
				document.add(modifiField);
				indexWriter.addDocument(document);
				
//			}
		}
		indexWriter.close();
		//计算一下索引的时间
		long endTime=new Date().getTime();
		System.out.println("花了"+(endTime-startTime)+"毫秒把文档添加到索引里面去"+fileDir.getPath());
	}
	public static String FileReaderAll(String FileName,String charset)throws IOException{
		BufferedReader reader=new BufferedReader(new InputStreamReader(new FileInputStream(FileName),charset));
		String line=new String();
		String temp=new String();
		while ((line=reader.readLine())!=null) {
			temp+=line;
		}
		reader.close();
		return temp;
	}	
}

根据关键字查询索引文件里面的信息：

TestQuery.java

package text;

import java.io.File;
import java.io.IOException;



import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;



public class TestQuery {


	public static void main(String[] args) throws ParseException, IOException {
		String index="c:\\index";//搜索的索引路径
		IndexReader reader=IndexReader.open(FSDirectory.open(new File(index)));
		IndexSearcher searcher=new IndexSearcher(reader);//检索工具
		ScoreDoc[] hits=null;
		String queryString="测试";  //搜索的索引名称
		Query query=null;
		Analyzer analyzer=new StandardAnalyzer(Version.LUCENE_36);
		try {
			QueryParser qp=new QueryParser(Version.LUCENE_36,"body",analyzer);//用于解析用户输入的工具
			query=qp.parse(queryString);
		} catch (Exception e) {
			// TODO: handle exception
		}
		if (searcher!=null) {
			TopDocs results=searcher.search(query, 10);//只取排名前十的搜索结果
			hits=results.scoreDocs;
			Document document=null;
			for (int i = 0; i < hits.length; i++) {
				document=searcher.doc(hits[i].doc);
				String body=document.get("body");
				String path=document.get("path");
				String modifiedtime=document.get("modifiField");
				System.out.println(body+"        "); 
				System.out.println(path); 
			}
			if (hits.length>0) {
				System.out.println("找到"+hits.length+"条结果");
				
			}
			searcher.close();
			reader.close();
		}
		

	}
	

}

lucene全文检索实例

SQL Server 2005 中做全文检索的方法分享

Android仿微信朋友圈全文、收起功能的实例代码

I-team 博客全文检索 Elasticsearch 实战

Net Core使用Lucene.Net和盘古分词器实现全文检索

PHP+MySQL+sphinx+scws实现全文检索功能详解

SqlServer 2005 简单的全文检索

什么是全文检索

全文本检索的应用(3)

Oracle9i的全文检索技术开发者网络Oracle

干货 |《从Lucene到Elasticsearch全文检索实战》拆解实践

lucene全文检索实例

SQL Server 2005 中做全文检索的方法分享

Android仿微信朋友圈全文、收起功能的实例代码

I-team 博客全文检索 Elasticsearch 实战

Net Core使用Lucene.Net和盘古分词器 实现全文检索

PHP+MySQL+sphinx+scws实现全文检索功能详解

SqlServer 2005 简单的全文检索

什么是全文检索

全文本检索的应用(3)

Oracle9i的全文检索技术开发者网络Oracle

干货 |《从Lucene到Elasticsearch全文检索实战》拆解实践

Net Core使用Lucene.Net和盘古分词器实现全文检索