欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

lucene入门实例三 (index索引)

程序员文章站 2022-07-09 09:34:42
...

copy《lucene in action》的一个索引的例子:

 

 

package com.s.lucene.LIA.index;

import java.io.IOException;

import junit.framework.TestCase;

import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;

public class IndexingTest extends TestCase {
	protected String[] ids = { "1", "2" };
	protected String[] unindexed = { "Netherlands", "Italy" };
	protected String[] unstored = { "Amsterdam has lots of bridges",
			"Venice has lots of canals" };

	protected String[] text = { "Amsterdam", "Venice" };

	private Directory directory;

	protected void setUp() throws Exception {
		directory = new RAMDirectory();

		IndexWriter writer = getWriter();
		//Field.Store.YES 可以retrieved找到值
		//Field.Index.ANALYZED 保存为index,可以被检索到
		for (int i = 0; i < ids.length; i++) {
			Document doc = new Document();
			doc.add(new Field("id", ids[i], Field.Store.YES,
					Field.Index.NOT_ANALYZED));
			doc.add(new Field("country", unindexed[i], Field.Store.YES,
					Field.Index.NO));
			doc.add(new Field("contents", unstored[i], Field.Store.NO,
					Field.Index.ANALYZED));
			doc.add(new Field("city", text[i],
					Field.Store.YES,
					Field.Index.ANALYZED));
			writer.addDocument(doc);
		}

		writer.close();
	}

	private IndexWriter getWriter() throws Exception {
		return new IndexWriter(directory, new WhitespaceAnalyzer(),
				IndexWriter.MaxFieldLength.UNLIMITED);
	}

	protected int getHitCount(String fieldName, String searchString)
			throws IOException {
		IndexSearcher search = new IndexSearcher(directory);
		Term t = new Term(fieldName, searchString);
		Query query = new TermQuery(t);
		int hitcount = search.search(query, 1).totalHits;
		search.close();
		return hitcount;
	}

	public void testIndexWriter() throws Exception {
		IndexWriter writer = getWriter();
		assertEquals(ids.length, writer.numDocs());
		writer.close();
	}

	public void testIndexReader() throws IOException {
		IndexReader reader = IndexReader.open(directory);
		assertEquals(ids.length, reader.maxDoc());
		assertEquals(ids.length, reader.numDocs());
		reader.close();
	}

	public void testDeleteBeforeOptimize() throws Exception {
		IndexWriter writer = getWriter();
		assertEquals(2, writer.numDocs());
		writer.deleteDocuments(new Term("id", "1"));
		writer.commit();
		assertTrue(writer.hasDeletions());
		assertEquals(2, writer.maxDoc());
		assertEquals(1, writer.numDocs());
		writer.close();
	}

	public void testDeleteAfterOptimize() throws Exception {
		IndexWriter writer = getWriter();
		assertEquals(2, writer.numDocs());
		writer.deleteDocuments(new Term("id", "1"));
		writer.optimize();
		writer.commit();
		assertFalse(writer.hasDeletions());
		assertEquals(1, writer.maxDoc());
		assertEquals(1, writer.numDocs());
		writer.close();
	}

	public void testUpdate() throws Exception {
		assertEquals(1, getHitCount("city", "Amsterdam"));
		IndexWriter writer = getWriter();
		Document doc = new Document();
		doc.add(new Field("id", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
		doc.add(new Field("country", "Netherlands", Field.Store.YES,
				Field.Index.NO));
		doc.add(new Field("contents", "Den Haag has a lot of museums",
				Field.Store.NO, Field.Index.ANALYZED));
		doc.add(new Field("city", "Den Haag", Field.Store.YES,
				Field.Index.ANALYZED));
		writer.updateDocument(new Term("id", "1"), doc);
		writer.close();
		assertEquals(0, getHitCount("city", "Amsterdam"));
		assertEquals(1, getHitCount("city", "Den Haag"));//有空格,无法找到TODO
	}

}
相关标签: lucene 索引