Lucene查询结果高亮
程序员文章站
2022-07-09 09:35:30
...
代码
package com.baifan.lucene.index;
import com.baifan.lucene.ik.IKAnalyzer6x;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.FSDirectory;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
/**
* @author: baifan
* @date: 2021/6/17
*/
public class HighlighterTest {
public static void main(String[] args) throws IOException, ParseException, InvalidTokenOffsetsException {
String field = "title";
Path indexdir = Paths.get("indexdir");
FSDirectory directory = FSDirectory.open(indexdir);
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new IKAnalyzer6x();
QueryParser parser = new QueryParser(field, analyzer);
Query query = parser.parse("北大");
System.out.println("Query:" + query.toString());
QueryScorer score = new QueryScorer(query, field);
//定制高亮标签
SimpleHTMLFormatter fors = new SimpleHTMLFormatter("<span style=\"color:red;\">", "</span>");
//高亮分词器
Highlighter highlighter = new Highlighter(fors, score);
// 返回前10条
TopDocs tds = searcher.search(query, 10);
for (ScoreDoc sd : tds.scoreDocs) {
Document doc = searcher.doc(sd.doc);
System.out.println("DocID:" + sd.doc);
System.out.println("id:" + doc.get("id"));
System.out.println("title:" + doc.get("title"));
//获取 tokenStream
TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, field, analyzer);
Fragmenter fragments = new SimpleSpanFragmenter(score);
highlighter.setTextFragmenter(fragments);
//获取高亮的片段
String str = highlighter.getBestFragment(tokenStream, doc.get(field));
System.out.println("高亮的片段:" + str);
}
directory.close();
reader.close();
}
}
运行结果:
加载扩展词典:ext.dic
加载扩展停止词典:stopword.dic
加载扩展停止词典:ext_stopword.dic
Query:title:北大
DocID:1
id:2
title:北大迎4380名新生 农村学生700多人近年最多
高亮的片段:<span style="color:red;">北大</span>迎4380名新生 农村学生700多人近年最多
上一篇: java 基于lucene 如何创建index【索引】
下一篇: lucene使用hanlp分词