java 抓取网页内容实现代码
package test;
import java.io.bufferedreader;
import java.io.ioexception;
import java.io.inputstream;
import java.io.inputstreamreader;
import java.net.authenticator;
import java.net.httpurlconnection;
import java.net.passwordauthentication;
import java.net.url;
import java.net.urlconnection;
import java.util.properties;
public class urltest {
// 一个public方法,返回字符串,错误则返回"error open url"
public static string getcontent(string strurl) {
try {
url url = new url(strurl);
bufferedreader br = new bufferedreader(new inputstreamreader(url
.openstream()));
string s = "";
stringbuffer sb = new stringbuffer("");
while ((s = br.readline()) != null) {
sb.append(s + "/r/n");
}
br.close();
return sb.tostring();
} catch (exception e) {
return "error open url:" + strurl;
}
}
public static void initproxy(string host, int port, final string username,
final string password) {
authenticator.setdefault(new authenticator() {
protected passwordauthentication getpasswordauthentication() {
return new passwordauthentication(username,
new string(password).tochararray());
}
});
system.setproperty("http.proxytype", "4");
system.setproperty("http.proxyport", integer.tostring(port));
system.setproperty("http.proxyhost", host);
system.setproperty("http.proxyset", "true");
}
public static void main(string[] args) throws ioexception {
string url = "//www.jb51.net";
string proxy = "http://192.168.22.81";
int port = 80;
string username = "username";
string password = "password";
string curline = "";
string content = "";
url server = new url(url);
initproxy(proxy, port, username, password);
httpurlconnection connection = (httpurlconnection) server
.openconnection();
connection.connect();
inputstream is = connection.getinputstream();
bufferedreader reader = new bufferedreader(new
inputstreamreader(is));
while ((curline = reader.readline()) != null) {
content = content + curline+ "/r/n";
}
system.out.println("content= " + content);
is.close();
system.out.println(getcontent(url));
}
}