使用lucene进行数据库全文检索(新增排序功能)
程序员文章站
2022-07-01 15:32:06
...
这几天在做全文检索 花了我好大力气啊 尼玛啊各种异常!!
经过我不懈努力终于搞出来了。。。 :lol: :lol: :lol:
废话不多说 附源码
新手第一次发帖,勿喷。。。
源代码如下
[b]实体类Content [/b]
数据库操作这里就不写了。。。
我数据从数据库读出来一个list集合
[b]IFullSearch接口[/b]
[b]IFullSearch实现类FullSearch [/b]
[b]在该实现类中我加入了以下方法,来创建索引,全文检索,觉得不符合规范的可以自己新建一个类。。[/b]
代码就这些了 有不懂可以留言。。。。。有可以优化的,也请留言。。。。。
经过我不懈努力终于搞出来了。。。 :lol: :lol: :lol:
废话不多说 附源码
新手第一次发帖,勿喷。。。
源代码如下
[b]实体类Content [/b]
@Entity
@Table(name = "TB_CONTENT", schema = "PROEDU")
public class Content implements Serializable {
/**
*
*/
private static final long serialVersionUID = -7615375010821887584L;
private long id; //id
private Date createDate;//创建时间
private String title;//标题
private String keyWords;//关键字
private Date publishedDate;//发布日期
private Date invalidDate;//失效日期
private String content;//内容
private String source;//来源
private long groupId;//内容分类
private String authorCode;//作者
private String verifierCode;//审核人
private int status;//状态
private int count;//访问次数
//getter&&setter神马的最讨厌了
}
数据库操作这里就不写了。。。
我数据从数据库读出来一个list集合
[b]IFullSearch接口[/b]
public interface IFullSearch {
/**
* @return int 总记录数
*/
public int getCount();
/**
* @param key 检索关键字
* @param contents 检索集合,从数据库里面查询全部或者部分出来
* @param searchDir 索引路径
* @param pageIndex 页数
* @param pageCount 每页显示条数
* @param content 精确检索条件,根据content对应中的部分字段
* 进行全 文检索
* @param order 排序方式
* @return 检索结果集
* @throws Exception
*/
public List<Content> getResult(String key, List<Content> contents,String searchDir,int pageIndex, int pageCount,Content content,String order) throws Exception;
}
[b]IFullSearch实现类FullSearch [/b]
import java.io.File;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.springframework.stereotype.Service;
import org.wltea.analyzer.lucene.IKAnalyzer;
import com.bluedon.emis.common.usermanage.service.IFullSearch;
import com.bluedon.emis.common.usermanage.vo.Content;
/**
* @author wenb
* @version 1.0 全文检索
*/
@Service("IFullSearch")
public class FullSearch implements IFullSearch {
private int count;// 检索总记录数
@Override
public int getCount() {
// TODO Auto-generated method stub
return count;
}
@Override
public List<Content> getResult(String key, List<Content> contents,
String searchDir, int pageIndex, int pageCount, Content content,String order)
throws Exception {
this.createIndex(contents, searchDir);
Query query = null;
if (key == null||"".equals(key)) {
query = getQuery(content);
} else {
query = getQuery(key);
}
File indexFile = new File(searchDir);
IndexSearcher searcher = new IndexSearcher(
DirectoryReader.open(FSDirectory.open(indexFile)));
List<Content> list = getList(query, pageIndex, pageCount, searcher,
contents.size(), order);
// List<Content> list = search(key, searchDir, pageIndex, pageCount,
// contents.size());
return list != null && list.size() > 0 ? list : null;
}
[b]在该实现类中我加入了以下方法,来创建索引,全文检索,觉得不符合规范的可以自己新建一个类。。[/b]
/**
* 创建索引
*
* @param 检索集合
* @param 索引路径
* @throws Exception
*/
private void createIndex(List<Content> list, String searchDir)
throws Exception {
// if(this.isIndexExisted(searchDir)){
// return;
// }
Directory directory = null;
IndexWriter indexWriter = null;
File indexFile = new File(searchDir);//索引文件
directory = FSDirectory.open(indexFile); //索引目录
Analyzer analyzer = new IKAnalyzer();//分词器
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_42,
analyzer);//索引配置
iwc.setOpenMode(OpenMode.CREATE);
indexWriter = new IndexWriter(directory, iwc);
//为相应字段建立索引
for (Content content : list) {
Document doc = new Document();
Field cont = new TextField("content", content.getContent(),
Field.Store.YES);
doc.add(cont);
Field title = new TextField("title", content.getTitle(),
Field.Store.YES);
doc.add(title);
Field keyWords = new TextField("keyWords", content.getKeyWords(),
Field.Store.YES);
doc.add(keyWords);
Field source = new TextField("source", content.getSource(),
Field.Store.YES);
doc.add(source);
Field id = new TextField("id", content.getId() + "",
Field.Store.YES);
doc.add(id);
Field publishDate = new TextField("publishDate",
content.getPublishedDate().getTime() + "", Field.Store.YES);
doc.add(publishDate);
indexWriter.addDocument(doc);
}
indexWriter.commit();
indexWriter.close();
}
/**
* 获取Query对象
*
* @param key
* @return 关键字查找对象
* @throws Exception
*/
private Query getQuery(String key) throws Exception {
String[] fields = { "content", "title", "keyWords", "source", "id" };//索引的Field字段数组
Analyzer analyzer = new IKAnalyzer();
//生成Query对象
QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_42,
fields, analyzer);
Query query = queryParser.parse(key);
return query;
}
/**
*
* 获取索引结果数据及对结果排序
*
* @param query
* 查询对象
* @param searcher
* 索引
* @param order 排序条件
* @param size
* 查询数量
* @return ScoreDoc[]
* @throws Exception
*/
private ScoreDoc[] getScoreDoc(Query query, IndexSearcher searcher, int size,String order)
throws Exception {
Sort sort = new Sort();//创建排序
ScoreDoc[] hits= null;//索引结果数组
//默认排序,即相关度排序
if(order==null||"def".equals(order)||"".equals(order)){
hits= searcher.search(query, null,size).scoreDocs;
}
//按时间倒序
if("desc".equals(order)){
//排序条件设置,第一参数为索引字段,第二个为排序类型,第三个为排序方式true相当于数据库desc,false相当于数据库asc
SortField sortField =new SortField("publishDate",SortField.Type.LONG, true);
sort.setSort(sortField);
hits=searcher.search(query, null, size, sort).scoreDocs;
}
//按时间升序
if("asc".equals(order)){
SortField sortField =new SortField("publishDate",SortField.Type.LONG, false);
sort.setSort(sortField);
hits=searcher.search(query, null, size, sort).scoreDocs;
}
//ScoreDoc[] hits = searcher.search(query, null, size).scoreDocs;
count = hits.length;
return hits;
}
/**
* 将查询结果还原成对象集合
*
* @param query
* 查询对象
* @param pageIndex
* 页数
* @param pageCount
* 每页条数
* @param searcher
* 索引
* @param size
* 查询数量
* @return 结果集
* @throws Exception
*/
private List<Content> getList(Query query, int pageIndex, int pageCount,
IndexSearcher searcher, int size,String order) throws Exception {
List<Content> list = new ArrayList<Content>();
ScoreDoc[] hits = getScoreDoc(query, searcher, size, order); //得到搜索结果
Content content = null;
//SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
int start = (pageIndex - 1) * pageCount;
int end = pageIndex * pageCount < count ? pageIndex * pageCount : count;
for (int i = start; i < end; i++) {
Document doc = searcher.doc(hits[i].doc);
content = new Content();
content.setId(Long.parseLong(doc.get("id")));
String con = doc.get("content");
String title = doc.get("title");
String source = doc.get("source");
String keyWords = doc.get("keyWords");
content.setKeyWords(highLighter(query, "keyWords", keyWords));
content.setContent(highLighter(query, "content", con));
content.setTitle(highLighter(query, "title", title));
content.setSource(highLighter(query, "source", source));
//content.setPublishedDate(sdf.parse(doc.get("publishDate")));
content.setPublishedDate(new Date(Long.parseLong(doc.get("publishDate"))));
list.add(content);
}
return list;
}
/**
* 获取精确查找对象query
*
* @param content
* 精确查询条件
* @return 精确查找对象
* @throws Exception
*/
private Query getQuery(Content content) throws Exception {
//组合查询
BooleanQuery booleanQuery = new BooleanQuery();
Analyzer analyzer = new IKAnalyzer();
if (content.getContent()!=null&&!"".equals(content.getContent())){
QueryParser queryParser = new QueryParser(Version.LUCENE_42,
"content", analyzer);
//将查询条件添加到booleanQuery中, Occur.MUST表示条件必须,多个条件组合 相当于and
booleanQuery.add(queryParser.parse(content.getContent()), Occur.MUST);
}
if (content.getTitle()!=null&&!"".equals(content.getTitle())) {
QueryParser queryParser = new QueryParser(Version.LUCENE_42,
"title", analyzer);
booleanQuery.add(queryParser.parse(content.getTitle()), Occur.MUST);
}
if (content.getKeyWords()!=null&&!"".equals(content.getKeyWords())) {
QueryParser queryParser = new QueryParser(Version.LUCENE_42,
"keyWords", analyzer);
booleanQuery.add(queryParser.parse(content.getKeyWords()), Occur.MUST);
}
if (content.getSource()!=null&&!"".equals(content.getSource())) {
QueryParser queryParser = new QueryParser(Version.LUCENE_42,
"source", analyzer);
booleanQuery.add(queryParser.parse(content.getSource()), Occur.MUST);
}
return booleanQuery;
}
/**
* 高亮显示
*
* @param query
* 查询对象
* @param fieldName
* 索引字段
* @param cont
* 索引内容
* @return 索引高亮处理结果
* @throws Exception
*/
private String highLighter(Query query, String fieldName, String cont)
throws Exception {
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
"<font color='red' style='background: yellow;'><strong>",
"</strong></font>");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter,
new QueryScorer(query));
Analyzer analyzer = new IKAnalyzer();
String[] str = highlighter.getBestFragments(analyzer, fieldName, cont,
cont.length());
return str == null || str.length <= 0 ? cont : str[0];
}
代码就这些了 有不懂可以留言。。。。。有可以优化的,也请留言。。。。。