Java实现读取Doxygen查询功能的索引文件
用Doxygen生成源代码的文档需要配置文件,配置文件内有search选项:
#---------------------------------------------------------------------------
# Configuration::additions related to the search engine
#---------------------------------------------------------------------------
SEARCHENGINE = YES
如果YES,则在生成文档时会生成search.idx索引文件和search.php查询界面。
search.php会将要查询的字符串作为输入参数调用用php实现的查询function search($file,$word,&$statsList)
我将该方法以及该方法调用的其他方法翻译成java语言,如下:
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.util.TreeMap;
import java.util.ArrayList;
import java.util.StringTokenizer;
/**
* @author tyrone
*
* TODO To change the template for this generated type comment go to
* Window - Preferences - Java - Code Style - Code Templates
*/
public class Search {
/**读取search.idx*/
public Search(File fp){
String content="";
try{
BufferedInputStreamIn = new BufferedInputStream(new FileInputStream( fp ));
int len=In.available();
this.Content=new byte[len];
In.read(this.Content);
Scontent=new String(this.Content);
}catch(Exception ex){
ex.printStackTrace();
}
this.Scontent=new String(this.Content);
//this.Content=content.getBytes();
}
private byte[] Content;
private String Scontent;
private int Index;
private void setIndex(int index){
this.Index=index;
}
private int getIndex(){
return this.Index;
}
private byte[] getContent(){
return this.Content;
}
private String getScontent(){
return this.Scontent;
}
/**
* 查询
* @param word
* @param statsList
* @return
*/
public ArrayList Searching(String word,ArrayList statsList){
this.setIndex(computeIndex(word));
TreeMap stat=new TreeMap();
int start=0;
int count=0;
byte[] buf=new byte[4];
if (this.getIndex()!=-1) // found a valid index
{
int totalHi=0;
int totalFreqHi=0;
int totalFreqLo=0;
//read 4 bytes skip header
int index=readInt(this.getIndex()*4+4);
//int index=readInt(8,this.getIn());
if (index>0){// found words matching the hash key
start=statsList.size();
count=start;
String w=readString(index);
while (w.length()!=0){
int statIdx = readInt(this.getIndex());
if (w.compareTo(word)>=0){
// found word that matches (as substring)
stat.put("word",word);
stat.put("match",w);
stat.put("index",new Integer(statIdx));
if (w.length()==word.length())
stat.put("full","true");
else
stat.put("full","false");
statsList.add(stat);
}
w = readString(this.getIndex());
}
for (count=start;count<statsList.size();count++)
{
TreeMap statInfo = (TreeMap)statsList.get(count);
int multiplier = 1;
// whole word matches have a double weight
String full=(String)statInfo.get("full");
if (full.compareTo("true")==0) multiplier=2;
Integer inte=(Integer)statInfo.get("index");
int numDocs = readInt(inte.intValue());
TreeMap[] docInfo =new TreeMap[numDocs];
// read docs info + occurrence frequency of the word
for (int i=0;i<numDocs;i++)
{
int idx=readInt(this.getIndex());
int freq=readInt(this.getIndex());
docInfo[i]=new TreeMap();
docInfo[i].put("idx",new Integer(idx));
docInfo[i].put("freq",new Integer(freq>>1));
docInfo[i].put("rank",new Double(0.0));
docInfo[i].put("hi",new Integer(freq&1));
if ((freq&1)>0) // word occurs in high priority doc
{
totalHi++;
totalFreqHi+=freq*multiplier;
}
else // word occurs in low priority doc
{
totalFreqLo+=freq*multiplier;
}
}
// read name and url info for the doc
for (int i=0;i<numDocs;i++)
{
Integer idx=(Integer)docInfo[i].get("idx");
docInfo[i].put("name",readString(idx.intValue()));
docInfo[i].put("url",readString(this.getIndex()));
}
statInfo.put("docs",docInfo);
}
int totalFreq=(totalHi+1)*totalFreqLo +totalFreqHi;
for (count=start;count<statsList.size();count++)
{
TreeMap statInfo =(TreeMap)statsList.get(count);
int multiplier = 1;
// whole word matches have a double weight
String full=(String)statInfo.get("full");
if (full.compareTo("true")==0) multiplier=2;
TreeMap[] docInfo=(TreeMap[])statInfo.get("docs");
for (int i=0;i<docInfo.length;i++)
{
// compute frequency rank of the word in each doc
Integer inte=(Integer)docInfo[i].get("freq");
int freq=inte.intValue();
inte=(Integer)docInfo[i].get("hi");
if (inte.intValue()>0){
docInfo[i].put("rank",new Double((freq*multiplier+totalFreqLo)/totalFreq));
}else{
docInfo[i].put("rank",new Double((freq*multiplier)/totalFreq));
}
}
}
}
}
return statsList;
}
private int readInt(int index){
byte[] buf1;
int b1,b2,b3,b4;
try{
b1=this.getContent()[index];
b2=this.getContent()[index+1];
b3=this.getContent()[index+2];
b4=this.getContent()[index+3];
/**费了好大劲,才知道java的byte转化为ASCII码是16位,而idx存的是8位的。*/
b1=b1&0xff;
b2=b2&0xff;
b3=b3&0xff;
b4=b4&0xff;
index=index+4;
this.setIndex(index);
return (b1<<24)|(b2<<16)|(b3<<8)|(b4);
}catch(Exception ex){
}
return -1;
}
private String readString(int index){
String result="";
byte[] re=new byte[60];
int i=0;
byte ind;
while((ind=this.getContent()[index])!=0){
re[i]=ind;
if (i==59){
result=result+new String(re);
i=0;
}else{
i++;
}
index++;
}
result=result+new String(re,0,i);
this.setIndex(++index);
return result;
}
/**
*
* @param word
* @return
*/
private int computeIndex(String word)
{
int hi;
int lo;
if (word.length()<2) return -1;
// high char of the index
hi =word.charAt(0);
if (hi==0) return -1;
// low char of the index
lo =word.charAt(1);
if (lo==0) return -1;
// return index
return hi*256+lo;
}
/**args[0]=search.idx, args[1]="word1+word2+..." ,如何显示statsList 结果已经不重要了*/
public static void main(String[] args){
Search se=new Search(new File(args[0]));
StringTokenizer st = new StringTokenizer(args[1],"+");
ArrayList result=new ArrayList();
while (st.hasMoreTokens()){
result=se.Searching(st.nextToken(),result);
}
for(int i=0;i<result.size();i++){
TreeMap tm=(TreeMap)result.get(i);
TreeMap[] docs=(TreeMap[])tm.get("docs");
for (int l=0;l<docs.length;l++){
System.out.println((String)docs[l].get("name"));
System.out.println((String)docs[l].get("url"));
}
}
}
}
推荐阅读
-
纯JS实现的读取excel文件内容功能示例【支持所有浏览器】
-
Android编程实现通讯录中联系人的读取,查询,添加功能示例
-
在SQL Server中,索引是一种增强式的存在,这意味着,即使没有索引,SQL Server仍然可以实现应有的功能。但索引可以在大多数情况下大大提升查询性能,
-
几种读取属性文件的JAVA实现方式(2005年)
-
Java利用反射实现文件的读取操作
-
几种读取属性文件的JAVA实现方式(2005年)
-
jsp+java类+servlet实现文件读取、写入的功能(一)
-
java 实现切割文件和合并文件的功能
-
PHP实现类似于C语言的文件读取及解析功能
-
java实现文件的上传功能