欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

简单的java网络爬虫

程序员文章站 2022-06-05 18:55:32
...

注释都尽可能的写好一点了
需要用到的知识包含java网络编程(http这一块)和I/O流编程
直接上代码:

package com.org.socket;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.net.URL;
import java.net.URLConnection;

public class HttpTest {
	public static void main(String[] args) {
		BufferedReader br =null;
		//定义URL
		String url2="http://www.baidu.com";
		try {
			URL url =new URL(url2);
			//建立连接
			URLConnection connection = url.openConnection();
			//打开输入流通道,获取网页数据
			InputStream inputStream = connection.getInputStream();
			//建立缓冲读取数据
			br = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
			String line =null;
			//建立输出流,用以将数据输出到文件里
			BufferedWriter writer =new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File("d:/java_/text.txt")), "UTF-8"));
			while((line=br.readLine())!=null) {
				writer.write(line);
				writer.newLine();
				//若是把内容打印到控制台,
				System.out.println(line);
			}
			writer.close();
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}finally {
			try {
				br.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
	}

}

相关标签: JAVA