欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

lucene-使用Digester分析XML索引

程序员文章站 2022-05-15 18:19:01
...

1、隶属于Jakarta Commons项目的一个子项目提供了一个简单的上层接口来将XML类型的文档映射为JAVA对象。

2、DigesterXMLHandler类用来解析XML格式的文档。

public class DigesterXMLHandler implements DocumentHandler{

private Digester dig;

private static Documentdoc;

publicDigesterXMLHandler(){

dig=new Digester();

dig.setValidating(false);

//首先创建DigesterXMLHandler实例

dig.addObjectCreate("address-book",DigesterXMLHandler.class);

//找到contact子元素,创建一个contact类实例

dig.addObjectCreate("address-book/contact",Contact.class);

//当Digester找到<contact>参数的type属性时我们就需要设置Contact实例的type属性。

dig.adSetProperties("address-book","type","type");

//设置几个规则,用来设置contact属性。

dig.addCallMethod("address-book/contact/name","setName",0);

dig.addCallMethod("address-book/contact/address","setAddress",0);

dig.addCallMethod("address-book/contact/city","setCity",0);

dig.addCallMethod("address-book/contact/province","setProvince",0);

dig.addCallMethod("address-book/contact/postalcode","setPostalcode",0);

dig.addCallMethod("address-book/contact/country","setCountry",0);

dig.addCallMethod("address-book/contact/telephone","setTelephone",0);

dig.addSetNext("address-book/contact","populateDocument");

}

public sychronized DocumentgetDocument(InputStream is) throws DocumentHandlerException{

try{

dig.parse(is);//开始解析XML格式的InputStream 输入流
}

catch (IOException e){

throw new DocumentHandlerException("cannot parse XMLdocument",e);

}

catch (SAXExceptoin e){

throw new DocumentHandlerException("cannot parse XMLdocument",e);

}

return doc;

}

 

public voidpopulateDocument(Contact contact){

//将已经取出的各Field组装到Lucene的Document对象里

doc=new Document();

doc.add(Field.Keyword("type",contact.getType()));

doc.add(Field.Keyword("name",contact.getName()));

doc.add(Field.Keyword("address",contact.getAddress()));

doc.add(Field.Keyword("city",contact.getCity()));

doc.add(Field.Keyword("province",contact.getProvince()));

doc.add(Field.Keyword("postalcode",contact.getPostalcode()));

doc.add(Field.Keyword("country",contact.getCountry()));

doc.add(Field.Keyword("telephone",contact.getTelephone()));

}

//重载了每个contact实例入口的JAVABEAN类

public static class Contact{

private String type;

private String name;

private String address;

private String city;

private String province;

private String postalcode;

private String country;

private String telephone;

public void setType(String newType){

type=newType();

}

public String getType(){

return type;

}

public String setName(String newName){

name=newName;

}

public String getName(){

returnname;

}

..........//依次设置city、province、postalcode、country、telephone

..........

}

public static voidmain(String[] args) throws Exception{

DigesterXMLHandler handler=new DigesterXMLHandler();

Document doc=

handler.getDocument(new FileInputStream(new File(args[0]));

System.out.println(doc);

}

}

相关标签: XML lucene