使用dom4j 和xpath 通过url读取xml数据的实例
程序员文章站
2022-03-03 16:14:00
...
我这个实例是在我开发过程中用到的,本人是菜鸟,希望高手勿喷!
首先dom4j 需要在项目中添加一些包,另外调用xpath 也需要一些包,然后我这里就不多说了,直接说一下我这个代码的具体思路:
1、根据一定的规则,创建读取xml的url (由于我这个代码是根据用户不同的选择读取不同的xml)
2、检查url的合法性,主要是看url是否是有效链接
3、创建saxReader实例,接着创建doc实例
4、通过xpath中的selectNodes读取出数据,(其实这个是dom4j的一个方法,主要需要xpath的支持)
5、打包数据,挂在arraylist中,最后写入数据库
代码如下:
public class ParseXML {
private ImportNameForm dataform;
private int type;
private Academic ac;
private int count = 0; // 返回文章的数量
private StringBuffer finalStrBuffer = new StringBuffer();
ArrayList<Academic> ar = new ArrayList<Academic>();
// 把年份和账号通过构造函数传过来
public ParseXML(ImportNameForm dataform, int type) {
this.dataform = dataform;
this.type = type;
}
// 返回文章的数量
public int getCount() {
return count;
}
public ArrayList<Academic> xml2arraylist() {
this.parsing(1);
return ar;
}
public String xml2string() {
this.parsing(2);
return finalStrBuffer.toString();
}
// 根据地址在xml中把数据读出,当flag等于1,把数据放在academic中,然后再放在arraylist中
// 当flag=2 把数据转化成html放在buffer中
//flag等于1是为了方便存储,flag等于2是为了在选择名字后的显示
public void parsing(int flag) {
switch (type) {
case 1:
this.getPaperXML(flag);
break;
case 2:
this.getPublicationXML(flag);
break;
case 3:
this.getProjectXML(flag);
break;
case 4:
this.getPatentXML(flag);
break;
}
}
public String xzmTemplate(int i) {
String str;
String[] xzm = dataform.getXzm();
str = "<tr><td bgcolor='#dddddd'>学者:";
str = str + xzm[i] + " 的相关信息</td></tr>";
return str;
}
public String checkboxTemplate(Academic ac) {
String str;
str = "<tr><td><input type='checkbox' name='ckb' value='";
str = str + ac.getId() + "'/>";
str = str + ac.getTitle() + "</td></tr>";
return str;
}
// 根据用户的账号和选择的年份生成URL
public String[] createURL() {
String year1 = dataform.getYear1();
String year2 = dataform.getYear2();
String[] ckb = dataform.getCkb();
String[] str = new String[ckb.length];
String strType;
switch (type) {
case 1:
strType = "Paper";
break;
case 2:
strType = "Publication";
break;
case 3:
strType = "Project";
break;
case 4:
strType = "Patent";
break;
default:
strType = "Paper";
break;
}
for (int i = 0; i < ckb.length; i++) {
str[i] = "http://www.XXXXXX.com/rest/";
str[i] = str[i] + strType + "/" + ckb[i].trim() + "/" + year1 + "-"
+ year2;
}
return str;
}
// 检查URL是否有效
private boolean isConnect(String url) {
boolean flag = false;
int counts = 0;
if (url == null || url.length() <= 0) {
return flag;
}
while (counts < 5) {
try {
HttpURLConnection connection = (HttpURLConnection) new URL(url)
.openConnection();
int state = connection.getResponseCode();
if (state == 200) {
flag = true;
}
break;
} catch (Exception ex) {
counts++;
continue;
}
}
return flag;
}
public void getPaperXML(int flag) {
System.out.println("Paper_parsing");
String[] strURL = this.createURL();
SAXReader saxReader = new SAXReader();
try {
for (int i = 0; i < strURL.length; i++) {
if (isConnect(strURL[i])) {
URL url;
url = new URL(strURL[i]);
Document doc = saxReader.read(url);
List paperids = doc
.selectNodes("/scholarPapers/scholarPaper/id");
List titles = doc
.selectNodes("/scholarPapers/scholarPaper/title");
List authors = doc
.selectNodes("/scholarPapers/scholarPaper/authors");
List dates = doc
.selectNodes("/scholarPapers/scholarPaper/date");
List sources = doc
.selectNodes("/scholarPapers/scholarPaper/source");
List types = doc
.selectNodes("/scholarPapers/scholarPaper/type");
List citations = doc.selectNodes("/scholarPapers/scholarPaper/citation");
if (flag == 2) {
finalStrBuffer.append(xzmTemplate(i));
}
for (int j = 0; j < paperids.size(); j++) {
count++;
ac = new Academic();
ac.setAcademic_class(String.valueOf(type));
ac.setAuthor(((Element) authors.get(j)).getText());
ac.setPaper(((Element) sources.get(j)).getText());
ac.setPtype(((Element) types.get(j)).getText());
ac.setTitle(((Element) titles.get(j)).getText());
ac.setId(Integer.parseInt(((Element) paperids.get(j))
.getText()));
ac.setYear(Integer.parseInt(((Element) dates.get(j))
.getText().substring(0, 4)));
ac.setContent(((Element)citations.get(j)).getText());
if (flag == 1) {
ar.add(ac);
} else
finalStrBuffer.append(checkboxTemplate(ac));
}
}
}
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (DocumentException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
上一篇: 通过 DOM4J 操作 XML
下一篇: 使用dom4j查询xml