使用Lucene.NET实现站内搜索
程序员文章站
2024-02-21 14:01:28
导入lucene.net 开发包
lucene 是apache软件基金会一个开放源代码的全文检索引擎工具包,是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎,部分...
导入lucene.net 开发包
lucene 是apache软件基金会一个开放源代码的全文检索引擎工具包,是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎,部分文本分析引擎。lucene的目的是为软件开发人员提供一个简单易用的工具包,以方便的在目标系统中实现全文检索的功能,或者是以此为基础建立起完整的全文检索引擎。lucene.net 是 .net 版的lucene。
你可以在这里到最新的lucene.net
创建索引、更新索引、删除索引
搜索,根据索引查找
indexhelper 添加、更新、删除索引
using system; using lucene.net.store; using lucene.net.index; using lucene.net.analysis.pangu; using lucene.net.documents; namespace bll { class indexhelper { /// <summary> /// 日志小助手 /// </summary> static common.loghelper logger = new common.loghelper(typeof(searchbll)); /// <summary> /// 索引保存的位置,保存在配置文件中从配置文件读取 /// </summary> static string indexpath = common.configurationhelper.appsettingmappath("indexpath"); /// <summary> /// 创建索引文件或更新索引文件 /// </summary> /// <param name="item">索引信息</param> public static void createindex(model.helpermodel.indexfilehelper item) { try { //索引存储库 fsdirectory directory = fsdirectory.open(new system.io.directoryinfo(indexpath), new nativefslockfactory()); //判断索引是否存在 bool isupdate = indexreader.indexexists(directory); if (isupdate) { //如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁 if (indexwriter.islocked(directory)) { //解锁索引库 indexwriter.unlock(directory); } } //创建indexwriter对象,添加索引 indexwriter writer = new indexwriter(directory, new panguanalyzer(), !isupdate, lucene.net.index.indexwriter.maxfieldlength.unlimited); //获取新闻 title部分 string title = item.filetitle; //获取新闻主内容 string body = item.filecontent; //为避免重复索引,所以先删除number=i的记录,再重新添加 //尤其是更新的话,更是必须要先删除之前的索引 writer.deletedocuments(new term("id", item.filename)); //创建索引文件 document document document = new document(); //只有对需要全文检索的字段才analyzed //添加id字段 document.add(new field("id", item.filename, field.store.yes, field.index.not_analyzed)); //添加title字段 document.add(new field("title", title, field.store.yes, field.index.not_analyzed)); //添加body字段 document.add(new field("body", body, field.store.yes, field.index.analyzed, lucene.net.documents.field.termvector.with_positions_offsets)); //添加url字段 document.add(new field("url", item.filepath, field.store.yes, field.index.not_analyzed)); //写入索引库 writer.adddocument(document); //关闭资源 writer.close(); //不要忘了close,否则索引结果搜不到 directory.close(); //记录日志 logger.debug(string.format("索引{0}创建成功",item.filename)); } catch (systemexception ex) { //记录错误日志 logger.error(ex); throw; } catch (exception ex) { //记录错误日志 logger.error(ex); throw; } } /// <summary> /// 根据id删除相应索引 /// </summary> /// <param name="guid">要删除的索引id</param> public static void deleteindex(string guid) { try { ////索引存储库 fsdirectory directory = fsdirectory.open(new system.io.directoryinfo(indexpath), new nativefslockfactory()); //判断索引库是否存在索引 bool isupdate = indexreader.indexexists(directory); if (isupdate) { //如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁 if (indexwriter.islocked(directory)) { indexwriter.unlock(directory); } } indexwriter writer = new indexwriter(directory, new panguanalyzer(), !isupdate, lucene.net.index.indexwriter.maxfieldlength.unlimited); //删除索引文件 writer.deletedocuments(new term("id", guid)); writer.close(); directory.close();//不要忘了close,否则索引结果搜不到 logger.debug(string.format("删除索引{0}成功", guid)); } catch (exception ex) { //记录日志 logger.error(ex); //抛出异常 throw; } } } }
search 通过查找索引实现搜索
using lucene.net.analysis; using lucene.net.analysis.pangu; using lucene.net.documents; using lucene.net.index; using lucene.net.search; using lucene.net.store; using model.helpermodel; using system; using system.collections.generic; namespace bll { public static class searchbll { //一个类中可能会有多处输出到日志,多处需要记录日志,常将logger做成static 静态变量 /// <summary> /// 日志助手 /// </summary> static common.loghelper logger = new common.loghelper(typeof(searchbll)); /// <summary> /// 索引保存位置 /// </summary> static string indexpath = common.configurationhelper.appsettingmappath("indexpath"); /// <summary> /// 搜索 /// </summary> /// <param name="keywords">用户搜索的关键词</param> /// <returns>返回搜索的结果</returns> public static list<searchresult> search(string keywords) { try { //索引存储库 fsdirectory directory = fsdirectory.open(new system.io.directoryinfo(indexpath), new nolockfactory()); //创建indexreader对象 indexreader reader = indexreader.open(directory, true); //创建indexsearcher对象 indexsearcher searcher = new indexsearcher(reader); //新建phrasequery 查询对象 phrasequery query = new phrasequery(); //把用户输入的关键词进行拆词 foreach (string word in splitword(keywords)) { //添加搜索关键词 query.add(new term("body", word)); } //设置分词间距为100字之内 query.setslop(100); topscoredoccollector collector = topscoredoccollector.create(1000, true); //根据查询条件查询结果 searcher.search(query, null, collector); //搜索到的scoredoc结果 scoredoc[] docs = collector.topdocs(0, collector.gettotalhits()).scoredocs; //保存搜索结果的list list<searchresult> listresult = new list<searchresult>(); for (int i = 0; i < docs.length; i++) { //取到文档的编号(主键,这个是lucene .net分配的) //检索结果中只有文档的id,如果要取document,则需要doc再去取 //降低内容占用 int docid = docs[i].doc; //根据id找document document doc = searcher.doc(docid); string number = doc.get("id"); string title = doc.get("title"); string body = doc.get("body"); string url = doc.get("url"); //建立一个搜索结果对象 searchresult result = new searchresult(); result.number = number; result.title = title; result.bodypreview = preview(body, keywords); result.url = url; //添加到结果列表 listresult.add(result); } if (listresult.count == 0) { return null; } else { return listresult; } } catch (systemexception ex) { logger.error(ex); return null; } catch (exception ex) { logger.error(ex); return null; } } /// <summary> /// 获取内容预览 /// </summary> /// <param name="body">内容</param> /// <param name="keyword">关键词</param> /// <returns></returns> private static string preview(string body, string keyword) { //创建htmlformatter,参数为高亮单词的前后缀 pangu.highlight.simplehtmlformatter simplehtmlformatter = new pangu.highlight.simplehtmlformatter("<font color=\"red\">", "</font>"); //创建 highlighter ,输入htmlformatter 和 盘古分词对象semgent pangu.highlight.highlighter highlighter = new pangu.highlight.highlighter(simplehtmlformatter, new pangu.segment()); //设置每个摘要段的字符数 highlighter.fragmentsize = 100; //获取最匹配的摘要段 string bodypreview = highlighter.getbestfragment(keyword, body); return bodypreview; } /// <summary> /// 盘古分词,对用户输入的搜索关键词进行分词 /// </summary> /// <param name="str">用户输入的关键词</param> /// <returns>分词之后的结果组成的数组</returns> private static string[] splitword(string str) { list<string> list = new list<string>(); analyzer analyzer = new panguanalyzer(); tokenstream tokenstream = analyzer.tokenstream("", new system.io.stringreader(str)); lucene.net.analysis.token token = null; while ((token = tokenstream.next()) != null) { list.add(token.termtext()); } return list.toarray(); } } }
searchresult 模型
namespace model.helpermodel { public class searchresult { public string number { get; set; } public string title { get; set; } public string bodypreview { get; set; } public string url { get; set; } } }
以上所述就是本文的全部内容了,希望大家能够喜欢。