欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  Java

JAVA抓取一个HTML源代码

程序员文章站 2022-04-14 20:52:49
...
package com.hyq.src;
import java.io.InputStream;
import java.net.URL;
public class Test {
/**
* @param args
*/
public static void main(String[] args) {
try{
Test.testNetStream();
}catch(Exception e){
e.printStackTrace();
}
} 
public static void testNetStream()throws Exception{
URL url=new URL("http://www.imust.cn/");
InputStream in=url.openStream();
byte[] b=new byte[100000];
in.read(b);
in.close();
String s=new String(b);
System.out.println(s);
}
}
package com.hyq.src;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
public class Test {
public static void main(String[] args)
{
System.out.println(Test.getHtmlSource("http://sports.163.com/zc/"));
}
public static String getHtmlSource(String url){
StringBuffer stb=new StringBuffer();
try{
URLConnection uc=new URL(url).openConnection();
BufferedReader br=new BufferedReader(new InputStreamReader(uc.getInputStream(),"gb2312"));
String temp=null;
while((temp=br.readLine())!=null){
stb.append(temp).append("\n");
}
br.close();
}catch(Exception e){
e.printStackTrace();
}
return stb.toString();
}
}