简单的java网络爬虫
程序员文章站
2022-06-05 18:55:32
...
注释都尽可能的写好一点了
需要用到的知识包含java网络编程(http这一块)和I/O流编程
直接上代码:
package com.org.socket;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.net.URL;
import java.net.URLConnection;
public class HttpTest {
public static void main(String[] args) {
BufferedReader br =null;
//定义URL
String url2="http://www.baidu.com";
try {
URL url =new URL(url2);
//建立连接
URLConnection connection = url.openConnection();
//打开输入流通道,获取网页数据
InputStream inputStream = connection.getInputStream();
//建立缓冲读取数据
br = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
String line =null;
//建立输出流,用以将数据输出到文件里
BufferedWriter writer =new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File("d:/java_/text.txt")), "UTF-8"));
while((line=br.readLine())!=null) {
writer.write(line);
writer.newLine();
//若是把内容打印到控制台,
System.out.println(line);
}
writer.close();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally {
try {
br.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
上一篇: SEOer的三流货色:失败的个性
下一篇: Linux学习笔记(二)—文件与目录管理