lucene的实战项目——实现ik分词和高亮显示

程序员文章站 2022-05-28 22:30:11

...

lucene的简单的项目，本项目使用springboot实现简单的ik分词和高亮显示

一.配置pom文件

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>cn.et</groupId>
  <artifactId>Lucene_Project</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  
  <!-- springboot所继承的jar包 -->
    <parent>
	    <groupId>org.springframework.boot</groupId>
	    <artifactId>spring-boot-starter-parent</artifactId>
	    <version>1.5.9.RELEASE</version>
	</parent>
	<dependencies>
	    <dependency>
	    	<groupId>org.springframework.boot</groupId>
	        <artifactId>spring-boot-starter-web</artifactId>
	    </dependency>
	    <!-- springboot连接数据库依赖的jar包  (依赖  有了这个库，就可以使用jpa连接数据库了)-->
	    <dependency>
		    <groupId>org.springframework.boot</groupId>
		    <artifactId>spring-boot-starter-data-jpa</artifactId>
		</dependency>
		<!-- mysql的驱动包 -->
		<dependency>
		    <groupId>mysql</groupId>
		    <artifactId>mysql-connector-java</artifactId>
	        <version>5.1.44</version>
	    </dependency>
	    <!-- ik分词器-->
	    <dependency>
			<groupId>com.janeluo</groupId>
			<artifactId>ikanalyzer</artifactId>
			<version>2012_u6</version>
		</dependency>
	    <!-- ik高亮显示所需到jar包 -->
	   <dependency>
			<groupId>org.apache.lucene</groupId>
			<artifactId>lucene-highlighter</artifactId>
			<version>4.7.2</version>
	   </dependency>
	</dependencies>
	
</project>

二.编辑application.properties文件连接数据库

spring.datasource.url=jdbc:mysql://localhost/food
spring.datasource.username=root
spring.datasource.password=root
spring.datasource.driver-class-name=com.mysql.jdbc.Driver

三.编写分词工具类实现分词

package cn.et.food.utils;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class LuceneUtils {
	
	static String dir="F:\\index";
	static Analyzer analyzer = new IKAnalyzer();
	/**
	 * 搜索
	 * @throws IOException 
	 * @throws ParseException 
	 * @throws InvalidTokenOffsetsException 
	 */
	public static List<Map> search(String field,String value) throws IOException, ParseException, InvalidTokenOffsetsException{
		Directory directory = FSDirectory.open(new File(dir));
		//读取索引库的存储目录
		DirectoryReader ireader = DirectoryReader.open(directory);
		//搜索类
		IndexSearcher isearcher = new IndexSearcher(ireader);
		//lucence查询解析 用于指定查询的属性名和分词器
		QueryParser parser = new QueryParser(Version.LUCENE_47, field, analyzer);
		//开始搜索
		Query query = parser.parse(value);
		//最终结果被分词后添加前缀和后缀的处理类 <B></B>
		SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<font color=red>","</font>");
		//将高亮搜索的词 添加到高亮处理器中
		Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
		
		//获取搜索的结果 指定返回的docuemnt个数
		 ScoreDoc[] hits = isearcher.search(query, null, 10).scoreDocs;
		 List<Map> list=new ArrayList<Map>();
	    for (int i = 0; i < hits.length; i++) {
	      int id = hits[i].doc;
	      Document hitDoc = isearcher.doc(hits[i].doc);
	      Map map=new HashMap();
	      map.put("foodid", hitDoc.get("foodid"));
	      String foodname=hitDoc.get("foodname");
	      //將查询的结果和搜索词匹配 匹配到添加前缀和后缀高亮 
	      TokenStream tokenStream = TokenSources.getAnyTokenStream(isearcher.getIndexReader(), id, "foodname", analyzer);
	      //传入的第二个参数是查询的值 
	      TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, foodname, false, 200);
	      String foodnameHign="";
	      for (int j = 0; j < frag.length; j++) {
	        if ((frag[j] != null) && (frag[j].getScore() > 0)) {
	        	foodnameHign=((frag[j].toString()));
	        }
	      }
	      map.put("foodname",foodnameHign);
	      map.put("price",  hitDoc.get("price"));
	      map.put("imagepath",  hitDoc.get("imagepath"));
	      list.add(map);
	    }
		ireader.close();
		directory.close();
		return list;
		
	}
	
	/**
	 * 创建索引库
	 * select * from table where userdesc like '%兰州%'
	 * 	
	 * @throws IOException 
	 */
	public static void write(Document doc) throws IOException{
		//索引库的存储目录
		Directory directory = FSDirectory.open(new File(dir));
		//关联lucence版本和当前分词器
		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
		//传入 目录和分词器
		IndexWriter iwriter = new IndexWriter(directory, config);
		
		iwriter.addDocument(doc);
		iwriter.commit();
		iwriter.close();
	}

}

四.dao层的编写

package cn.et.food.dao.impl;

import java.util.List;
import java.util.Map;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Repository;
@Repository
public class FoodDaoImpl {
	@Autowired
	private JdbcTemplate jdbc;
	/**
	 * 获取总行数
	 * @return
	 */
	public int queryFoodCount(){
		String sql="select count(*) as foodCount from food";
		return Integer.parseInt(jdbc.queryForList(sql).get(0).get("foodCount").toString());
	}
	/**
	 * 分页获取数据
	 * @param start 开始位置
	 * @param rows 获取总行数
	 * @return
	 */
	public List<Map<String, Object>> queryFood(int start,int rows){
		String sql="select * from food limit "+start+","+rows;
		return jdbc.queryForList(sql);
	}
}

五.controller的编写

package cn.et.food.dao.impl;

import java.io.IOException;
import java.util.List;
import java.util.Map;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RestController;

import cn.et.food.utils.LuceneUtils;

@RestController
public class FoodController {

	@Autowired
	FoodDaoImpl dao;
	
	@GetMapping("/searchFood")
	public List<Map> getFood(String keyWord) throws Exception {
		return LuceneUtils.search("foodname", keyWord);
	}
	
	@GetMapping("/createIndex")
	public String createIndex() {
		try {
			//数据库查询数据 查询数据批量查询
			int queryFoodCount = dao.queryFoodCount(); 
			//第一次拉取  0,1000
			//第二次拉取  1001,2000
			int startIndex=0;
			int rows=5;
			while(startIndex<=queryFoodCount){
				//每次拉取数据
				List<Map<String, Object>> queryFood = dao.queryFood(startIndex, rows);
				for (int i = 0; i < queryFood.size(); i++) {
					Map<String, Object> mso=queryFood.get(i);
					Document doc=new Document();
					Field field1=new Field("foodid", mso.get("foodid").toString(), TextField.TYPE_STORED);
					Field field2=new Field("foodname", mso.get("foodname").toString(), TextField.TYPE_STORED);
					Field field3=new Field("price", mso.get("price").toString(), TextField.TYPE_STORED);
					Field field4=new Field("imagepath", mso.get("imagepath").toString(), TextField.TYPE_STORED);
					doc.add(field1);
					doc.add(field2);
					doc.add(field3);
					doc.add(field4);
					LuceneUtils.write(doc);
				}
				//写入lucene索引
				startIndex+=1+rows;
			}
		} catch (IOException e) {
			e.printStackTrace();
			return "0";
			
		}
		return "1";
	}
	
}

六.html的编写

<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Insert title here</title>
<script type="text/javascript" src="jquery-3.1.0.min.js"></script>

<script type="text/javascript">
  $(function(){
	  $("#searchBtn").click(function(){
		  $.ajax({
			  url:'searchFood',
			  data:'keyWord='+$("input[name='keyWord']").val(),
			  dataType:'json',
			  success:function(jd){
				  $("div[name='c']").remove()
				  for(var i=0;i<jd.length;i++){
					  var html="<div name='c'>"+
					    "   <h3>"+jd[i].foodname+"</h3> "+
				       "<span>"+jd[i].foodname+"价格是："+jd[i].price+"</span>"+
				       "<hr>"+
				     "</div>";
				     $("#foodDiv").append(html);
				  }
			  }
			  
		  })
		  
	  })
	  
  })
 
</script>

</head>
<body>
  <div>
     <img  src="bd_logo1.png" width="280px" ><br/>
     <input type="text" name="keyWord"  style="border:1px solid grey;width:200px;height:25px">
     <input id="searchBtn" type="button" value="搜索一下"  style="background-color: rgb(51,133,255);color: white;border:0px;height:28px ">
     <div id="foodDiv">
     
     </div>
  </div>
</body>
</html>

七.运行项目

package cn.et.food.dao.impl;

import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;

@SpringBootApplication
public class FoodMain {

	public static void main(String[] args) {
		SpringApplication.run(FoodMain.class, args);
	}
}

八.案例图

九.效果

输入http://localhost:8080/search.html 网址，即可！

相关标签： lucene IK分词高亮显示

上一篇： C#高亮显示TreeView控件中的选中项

下一篇：关于C++中字符串输入get与getline的区别

lucene的实战项目——实现ik分词和高亮显示

一.配置pom文件

二.编辑application.properties文件 连接数据库

三.编写分词工具类 实现分词

四.dao层的编写

五.controller的编写

六.html的编写

七.运行项目

lucene的实战项目——实现ik分词和高亮显示

二.编辑application.properties文件连接数据库

三.编写分词工具类实现分词