欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

Lucene查询结果高亮

程序员文章站 2022-07-09 09:35:30
...

代码

package com.baifan.lucene.index;

import com.baifan.lucene.ik.IKAnalyzer6x;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.FSDirectory;

import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;

/**
 * @author: baifan
 * @date: 2021/6/17
 */
public class HighlighterTest {

	public static void main(String[] args) throws IOException, ParseException, InvalidTokenOffsetsException {
		String field = "title";
		Path indexdir = Paths.get("indexdir");
		FSDirectory directory = FSDirectory.open(indexdir);
		IndexReader reader = DirectoryReader.open(directory);
		IndexSearcher searcher = new IndexSearcher(reader);
		Analyzer analyzer = new IKAnalyzer6x();
		QueryParser parser = new QueryParser(field, analyzer);
		Query query = parser.parse("北大");
		System.out.println("Query:" + query.toString());
		QueryScorer score = new QueryScorer(query, field);
		//定制高亮标签
		SimpleHTMLFormatter fors = new SimpleHTMLFormatter("<span style=\"color:red;\">", "</span>");
		//高亮分词器
		Highlighter highlighter = new Highlighter(fors, score);
		// 返回前10条
		TopDocs tds = searcher.search(query, 10);
		for (ScoreDoc sd : tds.scoreDocs) {
			Document doc = searcher.doc(sd.doc);
			System.out.println("DocID:" + sd.doc);
			System.out.println("id:" + doc.get("id"));
			System.out.println("title:" + doc.get("title"));
			//获取 tokenStream
			TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, field, analyzer);
			Fragmenter fragments = new SimpleSpanFragmenter(score);
			highlighter.setTextFragmenter(fragments);
			//获取高亮的片段
			String str = highlighter.getBestFragment(tokenStream, doc.get(field));
			System.out.println("高亮的片段:" + str);
		}
		directory.close();
		reader.close();
	}
}

运行结果:

加载扩展词典:ext.dic
加载扩展停止词典:stopword.dic
加载扩展停止词典:ext_stopword.dic
Query:title:北大
DocID:1
id:2
title:北大迎4380名新生 农村学生700多人近年最多
高亮的片段:<span style="color:red;">北大</span>4380名新生 农村学生700多人近年最多