android 封装抓取网页信息的实例代码
程序员文章站
2023-12-01 20:08:34
复制代码 代码如下:package cn.mypic; import java.io.bufferedinp...
复制代码 代码如下:
package cn.mypic;
import java.io.bufferedinputstream;
import java.io.bufferedreader;
import java.io.file;
import java.io.filenotfoundexception;
import java.io.fileoutputstream;
import java.io.ioexception;
import java.io.inputstreamreader;
import java.net.malformedurlexception;
import java.net.url;
import java.util.regex.matcher;
import java.util.regex.pattern;
public class getcontentpicture {
//得到了图片地址并下载图片
public void gethtmlpicture(string httpurl) {
url url;
bufferedinputstream in;
fileoutputstream file;
int count; //图片文件名序号
filenumber num=new filenumber();//图片文件名序号类,num为对象
count=num.numberreadfromfile();//获取图片文件序号
try {
system.out.println("获取网络图片");
string filename = (string.valueof(count)).concat(httpurl.substring(httpurl.lastindexof(".")));//图片文件序号加上图片的后缀名,后缀名用了string内的一个方法来获得
//httpurl.substring(httpurl.lastindexof("/"));//这样获得的文件名即是图片链接里图片的名字
string filepath = "d:/image/";//图片存储的位置
url = new url(httpurl);
in = new bufferedinputstream(url.openstream());
file = new fileoutputstream(new file(filepath+filename));
int t;
while ((t = in.read()) != -1) {
file.write(t);
}
file.close();
in.close();
system.out.println("图片获取成功");
count=count+1;//图片文件序号加1
num.numberwritetofile(count);//将图片名序号保存
} catch (malformedurlexception e) {
e.printstacktrace();
} catch (filenotfoundexception e) {
e.printstacktrace();
} catch (ioexception e) {
e.printstacktrace();
}
}
//获取网页的代码保存在string格式的content中
public string gethtmlcode(string httpurl) throws ioexception {
string content ="";
url uu = new url(httpurl); // 创建url类对象
bufferedreader ii = new bufferedreader(new inputstreamreader(uu
.openstream())); // //使用openstream得到一输入流并由此构造一个bufferedreader对象
string input;
while ((input = ii.readline()) != null) { // 建立读取循环,并判断是否有读取值
content += input;
}
ii.close();
return content;
}
//分析网页代码,找到匹配的网页图片地址
public void get(string url) throws ioexception {
string searchimgreg = "(?x)(src|src|background|background)=('|\")/?(([\\w-]+/)*([\\w-]+\\.(jpg|jpg|png|png|gif|gif)))('|\")";//用于在网页代码content中查找匹配的图片链接。
string searchimgreg2 = "(?x)(src|src|background|background)=('|\")(http://([\\w-]+\\.)+[\\w-]+(:[0-9]+)*(/[\\w-]+)*(/[\\w-]+\\.(jpg|jpg|png|png|gif|gif)))('|\")";
string content = this.gethtmlcode(url);//this指对象gcp,在此地调用获取网页代码,gethtmlcode方法
//system.out.println(content); //输出的content将是一个连续的字符串。
pattern pattern = pattern.compile(searchimgreg);//java.util.regex.pattern
matcher matcher = pattern.matcher(content); //java.util.regex.matcher
while (matcher.find()) {
system.out.println(matcher.group(3));//输出图片链接地址到屏幕
// system.out.println(url);
this.gethtmlpicture(matcher.group(3));//对象调用gethtmlpicture从网上下载并输出图片文件到指定目录
}
pattern = pattern.compile(searchimgreg2);
matcher = pattern.matcher(content);
while (matcher.find()) {
system.out.println(matcher.group(3));
this.gethtmlpicture(matcher.group(3));
}
// searchimgreg =
// "(?x)(src|src|background|background)=('|\")/?(([\\w-]+/)*([\\w-]+\\.(jpg|jpg|png|png|gif|gif)))('|\")";
}
//主函数url网页的地址
public static void main(string[] args) throws ioexception {
string url = "http://www.baidu.com";
getcontentpicture gcp = new getcontentpicture();
gcp.get(url);
}
}
复制代码 代码如下:
package cn.mypic;
import java.io.*;
public class filenumber{
//文件写
public void numberwritetofile(int x){
int c=0;
c=x;
file filepath=new file("d:/image");//文件名序号txt文件保存地址
file f1=new file(filepath,"number.txt");
try{
fileoutputstream fout=new fileoutputstream(f1);
dataoutputstream out=new dataoutputstream(fout);
out.writeint(c);
}
catch(filenotfoundexception e){
system.err.println(e);
}
catch(ioexception e){
system.err.println(e);
}
}
//文件读
public int numberreadfromfile(){
int c1 = 0;
file filepath=new file("d:/image");
file f1=new file(filepath,"number.txt");
try{
fileinputstream fin=new fileinputstream(f1);
datainputstream in=new datainputstream(fin);
c1=in.readint();
system.out.println(c1);//输出文件内容至屏幕
}
catch(filenotfoundexception e){
system.err.println(e);
}
catch(ioexception e){
system.err.println(e);
}
return c1;
}
public static void main(string args[]){
}
}