lucene最新版本为1.9,从apache svn中checkout 出来已经包括了Highlighter
测试一下:
package org.apache.lucene.search.highlight;
import junit.framework.TestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.store.RAMDirectory;
import java.io.IOException;
import java.io.StringReader;
/**
* @author martin
*/
public class WordsHighlighterTest extends TestCase {
private IndexReader reader;
RAMDirectory ramDirectory;
final private static String FIELD_NAME = "contents";
final private static String queryString = "索引";
String [] words = {
"1:索引内容结构:Document,以及包含于Document的多个Field索",
"2:索引内容优先性调整因子,boost(可对整个Document或Field指定).",
"3:索引的写入IndexWriter,索引的写入目标Directory,实现包括FsDirectory跟RamDirectory等",
"4:索引创建速度的调整"
};
protected void setUp() throws Exception {
ramDirectory = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(ramDirectory, new StandardAnalyzer(), true);
for (String s : words) {
addDoc(indexWriter, s);
}
indexWriter.optimize();
indexWriter.close();
reader = IndexReader.open(ramDirectory);
}
private void addDoc(IndexWriter indexWriter, String s) throws IOException {
Document doc = new Document();
doc.add(new Field(FIELD_NAME, s, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
indexWriter.addDocument(doc);
}
public void testSimpleWords() throws Exception {
Query query = new QueryParser(FIELD_NAME, new StandardAnalyzer()).parse(queryString);
query = query.rewrite(reader);
System.out.println("Searching for: " + query.toString(FIELD_NAME));
Searcher searcher = new IndexSearcher(ramDirectory);
Hits hits = searcher.search(query);
Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<font color="red">", "</font>"), new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(20));
Analyzer analyzer = new StandardAnalyzer();
for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
String result = highlighter.getBestFragments(tokenStream, text, 2, "");
System.out.println(" " + result);
}
}
protected void tearDown() throws Exception {
super.tearDown();
}
}
输出:
Searching for: "索 引"
4:索引创建速度的调整
3:索引的写入IndexWriter,索引的写入目标Directory,实现包
1:索引内容结构:Document,以索
2:索引内容优先性调整因子,boost(可对整个Document或Field指
import junit.framework.TestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.store.RAMDirectory;
import java.io.IOException;
import java.io.StringReader;
/**
* @author martin
*/
public class WordsHighlighterTest extends TestCase {
private IndexReader reader;
RAMDirectory ramDirectory;
final private static String FIELD_NAME = "contents";
final private static String queryString = "索引";
String [] words = {
"1:索引内容结构:Document,以及包含于Document的多个Field索",
"2:索引内容优先性调整因子,boost(可对整个Document或Field指定).",
"3:索引的写入IndexWriter,索引的写入目标Directory,实现包括FsDirectory跟RamDirectory等",
"4:索引创建速度的调整"
};
protected void setUp() throws Exception {
ramDirectory = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(ramDirectory, new StandardAnalyzer(), true);
for (String s : words) {
addDoc(indexWriter, s);
}
indexWriter.optimize();
indexWriter.close();
reader = IndexReader.open(ramDirectory);
}
private void addDoc(IndexWriter indexWriter, String s) throws IOException {
Document doc = new Document();
doc.add(new Field(FIELD_NAME, s, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
indexWriter.addDocument(doc);
}
public void testSimpleWords() throws Exception {
Query query = new QueryParser(FIELD_NAME, new StandardAnalyzer()).parse(queryString);
query = query.rewrite(reader);
System.out.println("Searching for: " + query.toString(FIELD_NAME));
Searcher searcher = new IndexSearcher(ramDirectory);
Hits hits = searcher.search(query);
Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<font color="red">", "</font>"), new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(20));
Analyzer analyzer = new StandardAnalyzer();
for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
String result = highlighter.getBestFragments(tokenStream, text, 2, "");
System.out.println(" " + result);
}
}
protected void tearDown() throws Exception {
super.tearDown();
}
}
输出:
Searching for: "索 引"
4:索引创建速度的调整
3:索引的写入IndexWriter,索引的写入目标Directory,实现包
1:索引内容结构:Document,以索
2:索引内容优先性调整因子,boost(可对整个Document或Field指