欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

lucene搜索

程序员文章站 2022-07-08 19:32:00
...

这里做的lucene是根据一个表里的网址链接抓取网页生成索引。

线程配置文件

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE beans PUBLIC "-//SPRING//DTD BEAN//EN" "http://www.springframework.org/dtd/spring-beans.dtd">
<!-- blogindextop -->
<beans>
 <!-- (装载定时器)-->
 <bean class="org.springframework.scheduling.quartz.SchedulerFactoryBean">
    <property name="triggers">     
      <list> 
     	<ref bean="TaskStatTrigger" />	
     	<ref bean="TranslateCheckInfoTrigger" />		
      </list>
    </property>
  </bean>
  <!-- (定时器)每日定时执行 --> 
		<bean id="TaskStatTrigger" class="org.springframework.scheduling.quartz.CronTriggerBean">
		<property name="jobDetail">
			<bean class="org.springframework.scheduling.quartz.MethodInvokingJobDetailFactoryBean">
				<property name="targetObject" ref="TranslateLoadAttendanceInfoService" />
				<property name="targetMethod" value="translate" />
				<property name="concurrent" value="false" />
			</bean>
		</property>
		<!-- 0 代表秒,27代表分,15代表小时(24小时制)
			?不代表任何值(也就是对日期不做要求),* 在这代表1-12月的每一个月,

			FRI 是星期五
			如:"0 15 10 * * ? *" 每天上午10:15执行
			   "0 15 10 ? * MON-FRI" 周一至周五的上午10:15
			   "34 10 9 ? * SUN" 每个月的星期六的上午9:10:34 执行
			   "34 10 9 5 * ?" 每个月的5号上上午9:10:34 执行
					每天每隔10秒

		 -->
		<property name="cronExpression" value="0 00 00 * * ?" />
	</bean>
	<!-- (定时器)定时类 加入考勤人员 --> 
	<bean id="TranslateLoadAttendanceInfoService" parent="txProxyTemplate">
    	<property name="target">
      		<bean class="com.sdfxw.office.service.TranslateLoadAttendanceInfoServiceImp">
		        <property name="attendancelDAO">
		          <ref bean="AttendancelDAO" />
		        </property>
		        <property name="personNelInfoDAO">
		          <ref bean="PersonNelInfoDAO" />
		        </property>
		       <property name="attendanceRuleDAO">
					<ref bean="AttendanceRuleDAO" />
				</property>
      		</bean>
    	</property>
	</bean> 

 struts配置文件

	 <!--搜索引擎--> 
	<action parameter="actionName" path="/search" type="com.sdfxw.search.action.SearchAction" >
            <forward name="searchview"   path="/jsp/search/search.jsp" />
            <forward name="customview"   path="/jsp/search/custom.jsp" />
       </action> 
 

 

建表语句

-- Create table
create table SEARCHLINK
(
  LINKID VARCHAR2(50) not null,
  URL    VARCHAR2(200),
  MODEL  VARCHAR2(50)
)
 

 

 

先做action

package com.sdfxw.search.action;

import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.struts.action.ActionForm;
import org.apache.struts.action.ActionForward;
import org.apache.struts.action.ActionMapping;
import org.apache.struts.actions.DispatchAction;
import org.carf.common.spring.ApplicationFactory;
import org.carf.util.common.ParameterUtil;
import org.carf.util.page.PageViewContext;

import com.sdfxw.search.service.SearchService;




public class SearchAction extends DispatchAction
{
    public ActionForward search(ActionMapping mapping, ActionForm form,
            HttpServletRequest request, HttpServletResponse response) throws Exception
    {
        String keyword = ParameterUtil.getParameter(request, "keyword");
        String page = ParameterUtil.getParameter(request, "page");
        //if(StringUtils.isNotBlank(keyword)){
	        SearchService service = (SearchService) ApplicationFactory
	                .getService("SearchService");
	        String querystr = "Content:" + keyword;
	        PageViewContext pp = service.query(querystr, 20, page);
	        request.setAttribute("PP", pp);
        //}
        return mapping.findForward("searchview");
    }

    public ActionForward custom(ActionMapping mapping, ActionForm form,
            HttpServletRequest request, HttpServletResponse response) throws Exception
    {
        String keyword = ParameterUtil.getParameter(request, "keyword");
        String page = ParameterUtil.getParameter(request, "page");
        SearchService service = (SearchService) ApplicationFactory
                .getService("SearchService");
        String querystr = "Content:" + keyword + " OR Caption:" + keyword;        
        PageViewContext pp = service.querycustom(querystr, 20, page);
        request.setAttribute("PP", pp);
        return mapping.findForward("customview");
    }
}
 

service代码(生成索引,搜索索引)

package com.sdfxw.search.service;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.demo.html.HTMLParser;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.carf.util.common.WebFormatter;
import org.carf.util.page.PageViewContext;
import org.carf.util.page.PageViewUtil;
import org.springframework.core.io.Resource;

import com.sdfxw.search.dao.URLDao;

public class SearchService
{
    public final String MODEL_SAFE = "1";

    public final String MODEL_EXPERT = "13";

    public final String MODEL_PRODUCT = "2";

    public final String MODEL_ING = "31";

    public final String MODEL_CHANCE = "41";

    public final String MODEL_STORY = "42";

    public final String MODEL_ANGEL = "43";

    public final String MODEL_MONTHLY = "53";

    public final String MODEL_JOB = "75";

    public final String MODEL_CUSTOM = "16";
    
    public final String MODEL_MEDIUM = "76";
    
    public final String MODEL_AGENCY = "77";
    
    public final String MODEL_DOWNLOAD = "78";

    private Resource indexDir;

    private String indexPrefix;    

    private URLDao urldao;

    private static final String[] specialChar = new String[] { "\\", "+", "-", "&&",
            "||", "!", "(", ")", "{", "}", "[", "]", "^", "\"", "~", "*", "?", ":" };

    public PageViewContext query(String queryStr, int pageSize, String pageNum)
            throws IOException, ParseException
    {
        IndexSearcher indexSearcher = new IndexSearcher(indexDir.getFile().getPath());
        QueryParser queryParser = new QueryParser("Content", new StandardAnalyzer());
        Query query = queryParser.parse(queryStr);
//        Query query = MultiFieldQueryParser.parse(new String[]{"Content","Model"}, new String[]{queryStr,MODEL_SAFE}, new StandardAnalyzer());

//        BooleanQuery query2 = new BooleanQuery();
//        query2.add(arg0, arg1)
        
        Hits hits = indexSearcher.search(query);
        PageViewContext pp = PageViewUtil.getPageViewContext(pageSize, pageNum, hits);
        indexSearcher.close();
        return pp;
    }

    public PageViewContext querycustom(String queryStr, int pageSize, String pageNum)
            throws IOException, ParseException
    {
        IndexSearcher indexSearcher = new IndexSearcher(indexDir.getFile().getPath()
                + "_custom");
        QueryParser queryParser = new QueryParser("Content", new StandardAnalyzer());
        Query query = queryParser.parse(queryStr);
        Hits hits = indexSearcher.search(query);
        PageViewContext pp = PageViewUtil.getPageViewContext(pageSize, pageNum, hits);
        indexSearcher.close();
        return pp;
    }

    public void createIndex()
    {
        List list = urldao.getURL();
        for (int i = 0; i < list.size(); i++)
        {
            Map map = (Map) list.get(i);
            String urlstr = (String) map.get("URL");
            String model = (String) map.get("Model");
            try
            {
                createOneItem(urlstr, model);
            }
            catch (Exception e)
            {
                e.printStackTrace(System.out);
            }
        }
        try
        {
            String path = indexDir.getFile().getPath();
            File f = new File(path + "_tmp");
            try
            {
                FileUtils.forceDelete(indexDir.getFile());
            }
            catch (Exception ee)
            {
            }
            f.renameTo(indexDir.getFile());
        }
        catch (Exception e)
        {
            e.printStackTrace(System.out);
        }
    }

    private void createOneItem(String urlstr, String model) throws Exception
    {
        Analyzer luceneAnalyzer = new StandardAnalyzer();
        String path = indexDir.getFile().getPath() + "_tmp";
        boolean create = !indexExist(path);

        IndexWriter indexWriter = new IndexWriter(path, luceneAnalyzer, create);
        try
        {
            indexWriter.setMergeFactor(1500);
            Document doc = new Document();

            if (!urlstr.startsWith("\\") && !urlstr.startsWith("/"))
                urlstr = "/" + urlstr;
            urlstr = StringUtils.replace(urlstr, "\\", "/");

            Field f_url = new Field("URL", urlstr, Field.Store.YES,
                    Field.Index.UN_TOKENIZED);
            try
            {
                InputStream is = getHtmlStream(urlstr);
                String content = IOUtils.toString(is);
                content = WebFormatter.html2text(content);
                content = StringUtils.deleteWhitespace(content);
                String summary = StringUtils.abbreviate(content, 200); 

                is = getHtmlStream(urlstr);
                HTMLParser parser = new HTMLParser(is);
                Field f_title = new Field("Title", parser.getTitle(), Field.Store.YES,
                        Field.Index.TOKENIZED);

                Field f_model = new Field("Model", model, Field.Store.YES,
                        Field.Index.UN_TOKENIZED);
                Field f_summary = new Field("Summary", summary, Field.Store.YES,
                        Field.Index.UN_TOKENIZED);

                Field f_content = new Field("Content", content, Field.Store.NO,
                        Field.Index.TOKENIZED);
                doc.add(f_url);
                doc.add(f_title);
                doc.add(f_summary);
                doc.add(f_content);
                indexWriter.addDocument(doc);
                indexWriter.optimize();
            }
            catch (Exception e)
            {
                e.printStackTrace(System.out);
            }
        }
        finally
        {
            indexWriter.close();
        }
        Thread t = Thread.currentThread();
    }

    private InputStream getHtmlStream(String urlstr) throws Exception
    {
        HttpClient httpclient = new HttpClient();
        System.out.println(indexPrefix + urlstr);
        
        PostMethod httppost = new PostMethod(indexPrefix + urlstr);
        httpclient.executeMethod(httppost);
        InputStream is = httppost.getResponseBodyAsStream();
        return is;
    }

    public boolean indexExist(String indexDir)
    {
        return IndexReader.indexExists(indexDir);
    }

    private String EscapSpecialChar(String str)
    {
        for (int i = 0; i < specialChar.length; i++)
        {
            StringUtils.replace(str, specialChar[i], "\\" + specialChar[i]);
        }
        return str;
    }

    public String genFiled(String fName, String fValue)
    {
        String str = fName + ":\"" + EscapSpecialChar(fValue) + "\"";
        return str;
    }

    public String getIndexPrefix()
    {
        return indexPrefix;
    }

    public void setIndexPrefix(String indexPrefix)
    {
        this.indexPrefix = indexPrefix;
    }  

    public URLDao getUrldao()
    {
        return urldao;
    }

    public void setUrldao(URLDao urldao)
    {
        this.urldao = urldao;
    }

    public void setIndexDir(Resource indexDir)
    {
        this.indexDir = indexDir;
    }

    public void insertSearchLink(String modelID, String url)
    {
        Map map = new HashMap();
        map.put("Model", modelID);
        map.put("URL", url);
        this.urldao.insert(map);
    }

    public void deleteSearchLink(String modelID)
    {
        this.urldao.deleteByModelID(modelID);
    }

    public void createCustomIndex(List list) throws Exception
    {
        Analyzer luceneAnalyzer = new StandardAnalyzer();
        String temppath = indexDir.getFile().getPath() + "_customtmp";
        String path = indexDir.getFile().getPath() + "_custom";
        boolean create = !indexExist(temppath);

        IndexWriter indexWriter = new IndexWriter(temppath, luceneAnalyzer, create);
        try
        {
            indexWriter.setMergeFactor(1500);
            for (int i = 0; i < list.size(); i++)
            {

                Map map = (Map) list.get(i);
                String Caption = (String) map.get("Caption");
                Object IssueDate = map.get("IssueDate");
                String IssueDateStr = "";
                if (IssueDate != null)
                {
                    IssueDateStr = IssueDate.toString().substring(0, 10);
                }
                else
                {
                    IssueDateStr = "";
                }
                String ID = (String) map.get("ID");
                String Content = (String) map.get("Content");// 内容
                Document doc = new Document();

                Field f_ID = new Field("ID", ID, Field.Store.YES,
                        Field.Index.UN_TOKENIZED);
                Field f_Caption = new Field("Caption", Caption, Field.Store.YES,
                        Field.Index.TOKENIZED);
                Field f_content = new Field("Content", Content, Field.Store.NO,
                        Field.Index.TOKENIZED);
                Field f_IssueDate = new Field("IssueDate", IssueDateStr, Field.Store.YES,
                        Field.Index.UN_TOKENIZED);
                doc.add(f_ID);
                doc.add(f_Caption);
                doc.add(f_content);
                doc.add(f_IssueDate);
                indexWriter.addDocument(doc);
                indexWriter.optimize();
            }
        }
        finally
        {
            indexWriter.close();
        }
        try
        {
            File ftemp = new File(temppath);
            File f = new File(path);
            try
            {
                FileUtils.forceDelete(f);
            }
            catch (Exception ee)
            {
            }
            ftemp.renameTo(f);
        }
        catch (Exception e)
        {
            e.printStackTrace(System.out);
        }
    }
    
    public List getURL(String url)
    {
    	return urldao.getURL();
    }
    
    public void update(Map map)
    {
    	urldao.update(map);
    }
    
    public void insertOrupdateByUrl(String url,String model)
    {
    	//String searchurl = url.replace("&", "'||chr(38)||'");
    	String searchurl = StringUtils.replace(url, "&", "'||chr(38)||'");
    	List results = urldao.getURL(searchurl);
    	if((results == null || results.size()==0) && StringUtils.isNotBlank(model))
    	{
    		Map map = new HashMap();
    		map.put("URL", url);
    		map.put("MODEL", model);
    		urldao.insert(map);
    	}else if(results != null && results.size()>0 && StringUtils.isNotBlank(url)){
    		Map map = (Map)results.get(0);
    		map.put("URL", url);
    		urldao.update(map);
    	}
    }
    
    public void deleteByUrl(String url)
    {
    	url = StringUtils.replace(url, "&", "'||chr(38)||'");
    	urldao.deleteByUrl(url);
    }
}

 dao层代码

package com.sdfxw.search.dao;

import java.util.List;
import java.util.Map;

import org.apache.commons.lang.StringEscapeUtils;
import org.carf.common.jdbc.JdbcDaoSupportEx;
import org.carf.common.jdbc.JdbcTemplateEx;

public class URLDao extends JdbcDaoSupportEx 
{
	public List getURL()
	{
		JdbcTemplateEx template = this.getJdbcTemplate();
		String sql = "select * from SEARCHLINK";
		return template.queryForList(sql);
	}
	
	public List getURL(String url)
	{
		JdbcTemplateEx template = this.getJdbcTemplate();
		String sql = "select * from SEARCHLINK where URL ='"+ StringEscapeUtils.escapeSql(url) +"'";
		return template.queryForList(sql);
	}
	
	public void insert(Map map)
	{
		JdbcTemplateEx template = this.getJdbcTemplate();
		map.put("LINKID", "-2");
		template.insertMap(map, "SEARCHLINK", "LINKID");
	}
	
	public void update(Map map)
	{
		JdbcTemplateEx template = this.getJdbcTemplate();
		template.updateMap(map, "SEARCHLINK", "LINKID");
	}
	
	public void deleteByUrl(String url)
	{
		JdbcTemplateEx template = this.getJdbcTemplate();
		String sql = "DELETE FROM SEARCHLINK WHERE URL='"+url + "'";
		template.update(sql);
	}
	
	public void deleteByModelID(String modelID)
	{
		JdbcTemplateEx template = this.getJdbcTemplate();
		String sql = "DELETE FROM SEARCHLINK WHERE Model=?";
		template.update(sql, new Object[] { modelID });
	}
}