欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

lucenc.net 全文检索 创建索引、 查询、分页

程序员文章站 2022-03-21 08:41:58
#region 创建、跟新词库 /// /// 创建、跟新词库 /// private void Create...
 #region 创建、跟新词库
        /// 
        /// 创建、跟新词库
        /// 
        private void CreateIndexData()
        {
            //索引库文件夹
            FSDirectory dir = FSDirectory.Open(new DirectoryInfo(path), new NativeFSLockFactory());
            //是否存在索引库
            bool has = IndexReader.IndexExists(dir);
            if (has)
            {
                //解锁被锁定的索引库
                if (IndexWriter.IsLocked(dir))
                {
                    IndexWriter.Unlock(dir);
                }
            }
            //索引库写对象
            IndexWriter iw = new IndexWriter(dir, new PanGuAnalyzer(), !has, IndexWriter.MaxFieldLength.UNLIMITED);
            List list = LuceneBll.Instance.Get();
            foreach (var o in list)
            {
                Document d = new Document();
                //Field.Store.YES:                         存储字段值(未分词前的字段值) 
                //Field.Store.NO:                          不存储,存储与索引没有关系 
                //Field.Store.COMPRESS:                    压缩存储,用于长文本或二进制,但性能受损 

                //Field.Index ANALYZED;                    分词建索引
                //Field.Index ANALYZED_NO_NORMS;           分词压缩建索引
                //Field.Index NO;
                //Field.Index NOT_ANALYZED;                不分词建索引
                //Field.Index NOT_ANALYZED_NO_NORMS;       不分词压缩建索引

                //Field.TermVector NO;                      
                //Field.TermVector WITH_OFFSETS;           存储偏移量
                //Field.TermVector WITH_POSITIONS;         存储位置
                //Field.TermVector WITH_POSITIONS_OFFSETS; 存储位置和偏移量
                //Field.TermVector YES;                    为每个文档(Document)存储该字段的TermVector

                d.Add(new Field("Id", o.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                d.Add(new Field("Title", o.Title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
                d.Add(new Field("Message", o.Message, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
                iw.AddDocument(d);
            }
            iw.Optimize();
            //释放文件
            iw.Close();
            dir.Close();
        }
        #endregion
#region 查询
        /// 
        /// 查询
        /// 
        /// 要查询的关键字
        /// 页索引(从1开始)
        /// 每页显示数据条数
        /// 
        private List Search(string str, int index = 1, int count = 3)
        {
            List list = null;
            //索引库文件夹
            FSDirectory dir = FSDirectory.Open(new DirectoryInfo(path), new NativeFSLockFactory());
            //是否存在索引库
            bool has = IndexReader.IndexExists(dir);
            //不存在则创建
            if (!has)
            {
                //创建索引库
                CreateIndexData();
            }
            //索引库读对象
            IndexReader ir = IndexReader.Open(dir, true);
            //索引库查询对象
            IndexSearcher searcher = new IndexSearcher(ir);
            //搜索条件
            PhraseQuery query = new PhraseQuery();
            //分词
            {
                Segment segment = new Segment();
                ICollection words = segment.DoSegment(str);
                foreach (var o in words)
                {
                    query.Add(new Term("Message", o.ToString()));
                }
            }
            //指定关键词相隔最大距离
            query.SetSlop(100);
            //盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(index * count, false);
            //根据query查询条件进行查询
            searcher.Search(query, null, collector);
            ScoreDoc[] docs = collector.TopDocs(count * (index - 1), count * (index - 1) + count).scoreDocs;
            if (docs.Length > 0)
            {
                list = new List();
                foreach (var o in docs)
                {
                    Document document = searcher.Doc(o.doc);
                    LuceneModel m = new LuceneModel();
                    m.Id = Convert.ToInt32(document.Get("Id"));
                    m.Title = document.Get("Title");
                    m.Message = document.Get("Message");
                    list.Add(m);
                }
            }
            //释放文件
            ir.Close();
            searcher.Close();
            dir.Close();
            return list;
        }
        #endregion