lucene-分页查询
程序员文章站
2022-07-09 09:52:56
...
- 两种方式
分页查询是很常见的需求,比如要查询第10页,每页10条数据。
Lucene 分页通常来讲有两种方式:
第一种是把100条数据查出来,然后取最后10条。 优点是快,缺点是对内存消耗大。
第二种是把第90条查询出来,然后基于这一条,通过searchAfter方法查询10条数据。 优点是内存消耗小,缺点是比第一种更慢
- 第一种
一共查出 pageNow*pageSize条,然后取最后pageSize条private static ScoreDoc[] pageSearch1(Query query, IndexSearcher searcher, int pageNow, int pageSize) throws IOException { TopDocs topDocs = searcher.search(query, pageNow*pageSize); System.out.println("查询到的总条数\t"+topDocs.totalHits); ScoreDoc [] alllScores = topDocs.scoreDocs; List<ScoreDoc> hitScores = new ArrayList<>(); int start = (pageNow -1)*pageSize ; int end = pageSize*pageNow; for(int i=start;i<end;i++) hitScores.add(alllScores[i]); ScoreDoc[] hits = hitScores.toArray(new ScoreDoc[]{}); return hits; }
public class TestLucene { public static void main(String[] args) throws Exception { // 1. 准备中文分词器 IKAnalyzer analyzer = new IKAnalyzer(); // 2. 索引 Directory index = createIndex(analyzer); // 3. 查询器 String keyword = "手机"; System.out.println("当前关键字是:"+keyword); Query query = new QueryParser( "name", analyzer).parse(keyword); // 4. 搜索 IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher=new IndexSearcher(reader); int pageNow = 1; int pageSize = 10; ScoreDoc[] hits = pageSearch1(query, searcher, pageNow, pageSize); // 5. 显示查询结果 showSearchResults(searcher, hits,query,analyzer); // 6. 关闭查询 reader.close(); } private static ScoreDoc[] pageSearch1(Query query, IndexSearcher searcher, int pageNow, int pageSize) throws IOException { TopDocs topDocs = searcher.search(query, pageNow*pageSize); System.out.println("查询到的总条数\t"+topDocs.totalHits); ScoreDoc [] alllScores = topDocs.scoreDocs; List<ScoreDoc> hitScores = new ArrayList<>(); int start = (pageNow -1)*pageSize ; int end = pageSize*pageNow; for(int i=start;i<end;i++) hitScores.add(alllScores[i]); ScoreDoc[] hits = hitScores.toArray(new ScoreDoc[]{}); return hits; } private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, IKAnalyzer analyzer) throws Exception { System.out.println("找到 " + hits.length + " 个命中."); SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>"); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); System.out.println("找到 " + hits.length + " 个命中."); System.out.println("序号\t匹配度得分\t结果"); for (int i = 0; i < hits.length; ++i) { ScoreDoc scoreDoc= hits[i]; int docId = scoreDoc.doc; Document d = searcher.doc(docId); List<IndexableField> fields= d.getFields(); System.out.print((i + 1) ); System.out.print("\t" + scoreDoc.score); for (IndexableField f : fields) { if("name".equals(f.name())){ TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(d.get(f.name()))); String fieldContent = highlighter.getBestFragment(tokenStream, d.get(f.name())); System.out.print("\t"+fieldContent); } else{ System.out.print("\t"+d.get(f.name())); } } System.out.println("<br>"); } } private static Directory createIndex(IKAnalyzer analyzer) throws IOException { Directory index = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(index, config); String fileName = "140k_products.txt"; List<Product> products = ProductUtil.file2list(fileName); int total = products.size(); int count = 0; int per = 0; int oldPer =0; for (Product p : products) { addDoc(writer, p); count++; per = count*100/total; if(per!=oldPer){ oldPer = per; System.out.printf("索引中,总共要添加 %d 条记录,当前添加进度是: %d%% %n",total,per); } if(per>10) break; } writer.close(); return index; } private static void addDoc(IndexWriter w, Product p) throws IOException { Document doc = new Document(); doc.add(new TextField("id", String.valueOf(p.getId()), Field.Store.YES)); doc.add(new TextField("name", p.getName(), Field.Store.YES)); doc.add(new TextField("category", p.getCategory(), Field.Store.YES)); doc.add(new TextField("price", String.valueOf(p.getPrice()), Field.Store.YES)); doc.add(new TextField("place", p.getPlace(), Field.Store.YES)); doc.add(new TextField("code", p.getCode(), Field.Store.YES)); w.addDocument(doc); } }
- 第二种
首先是边界条件,如果是第一页,就直接查询了。private static ScoreDoc[] pageSearch2(Query query, IndexSearcher searcher, int pageNow, int pageSize) throws IOException { int start = (pageNow - 1) * pageSize; if(0==start){ TopDocs topDocs = searcher.search(query, pageNow*pageSize); return topDocs.scoreDocs; } // 查询数据, 结束页面自前的数据都会查询到,但是只取本页的数据 TopDocs topDocs = searcher.search(query, start); //获取到上一页最后一条 ScoreDoc preScore= topDocs.scoreDocs[start-1]; //查询最后一条后的数据的一页数据 topDocs = searcher.searchAfter(preScore, query, pageSize); return topDocs.scoreDocs; }
如果不是第一页,那么就取start-1那一条,然后再根据它通过searchAfter 来查询public class TestLucene { public static void main(String[] args) throws Exception { // 1. 准备中文分词器 IKAnalyzer analyzer = new IKAnalyzer(); // 2. 索引 Directory index = createIndex(analyzer); // 3. 查询器 String keyword = "手机"; System.out.println("当前关键字是:"+keyword); Query query = new QueryParser( "name", analyzer).parse(keyword); // 4. 搜索 IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher=new IndexSearcher(reader); int pageNow = 1; int pageSize = 10; ScoreDoc[] hits = pageSearch2(query, searcher, pageNow, pageSize); // 5. 显示查询结果 showSearchResults(searcher, hits,query,analyzer); // 6. 关闭查询 reader.close(); } private static ScoreDoc[] pageSearch1(Query query, IndexSearcher searcher, int pageNow, int pageSize) throws IOException { TopDocs topDocs = searcher.search(query, pageNow*pageSize); System.out.println("查询到的总条数\t"+topDocs.totalHits); ScoreDoc [] alllScores = topDocs.scoreDocs; List<ScoreDoc> hitScores = new ArrayList<>(); int start = (pageNow -1)*pageSize ; int end = pageSize*pageNow; for(int i=start;i<end;i++) hitScores.add(alllScores[i]); ScoreDoc[] hits = hitScores.toArray(new ScoreDoc[]{}); return hits; } private static ScoreDoc[] pageSearch2(Query query, IndexSearcher searcher, int pageNow, int pageSize) throws IOException { int start = (pageNow - 1) * pageSize; if(0==start){ TopDocs topDocs = searcher.search(query, pageNow*pageSize); return topDocs.scoreDocs; } // 查询数据, 结束页面自前的数据都会查询到,但是只取本页的数据 TopDocs topDocs = searcher.search(query, start); //获取到上一页最后一条 ScoreDoc preScore= topDocs.scoreDocs[start-1]; //查询最后一条后的数据的一页数据 topDocs = searcher.searchAfter(preScore, query, pageSize); return topDocs.scoreDocs; } private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, IKAnalyzer analyzer) throws Exception { System.out.println("找到 " + hits.length + " 个命中."); SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>"); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); System.out.println("找到 " + hits.length + " 个命中."); System.out.println("序号\t匹配度得分\t结果"); for (int i = 0; i < hits.length; ++i) { ScoreDoc scoreDoc= hits[i]; int docId = scoreDoc.doc; Document d = searcher.doc(docId); List<IndexableField> fields= d.getFields(); System.out.print((i + 1) ); System.out.print("\t" + scoreDoc.score); for (IndexableField f : fields) { if("name".equals(f.name())){ TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(d.get(f.name()))); String fieldContent = highlighter.getBestFragment(tokenStream, d.get(f.name())); System.out.print("\t"+fieldContent); } else{ System.out.print("\t"+d.get(f.name())); } } System.out.println("<br>"); } } private static Directory createIndex(IKAnalyzer analyzer) throws IOException { Directory index = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(index, config); String fileName = "140k_products.txt"; List<Product> products = ProductUtil.file2list(fileName); int total = products.size(); int count = 0; int per = 0; int oldPer =0; for (Product p : products) { addDoc(writer, p); count++; per = count*100/total; if(per!=oldPer){ oldPer = per; System.out.printf("索引中,总共要添加 %d 条记录,当前添加进度是: %d%% %n",total,per); } if(per>10) break; } writer.close(); return index; } private static void addDoc(IndexWriter w, Product p) throws IOException { Document doc = new Document(); doc.add(new TextField("id", String.valueOf(p.getId()), Field.Store.YES)); doc.add(new TextField("name", p.getName(), Field.Store.YES)); doc.add(new TextField("category", p.getCategory(), Field.Store.YES)); doc.add(new TextField("price", String.valueOf(p.getPrice()), Field.Store.YES)); doc.add(new TextField("place", p.getPlace(), Field.Store.YES)); doc.add(new TextField("code", p.getCode(), Field.Store.YES)); w.addDocument(doc); } }
上一篇: Apple Watch购买技巧 苹果机密文件告诉你
下一篇: 华为智能手表什么时候上市?价格多少?