欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

Java 简单的网络爬虫

程序员文章站 2022-06-05 19:50:33
...

网络爬虫简单实现 + 模拟浏览器

简单步骤:

        1、获取URL

        2、下载资源

注:url.openStream()可类比InputStream()

import java.io.*;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
/*
   网络爬虫简单实现 + 模拟浏览器
   1、获取URL
   2、下载资源
 */
public class 网络爬虫 {
    public static void main(String[] args) throws Exception {
        basicSpider1();
    }
    static void basicSpider1() throws Exception {// txt
        //模拟浏览器
        URL url = new URL("https://blog.csdn.net/qq_57389269/article/details/118994372?spm=1001.2014.3001.5501");
        HttpURLConnection conn = (HttpURLConnection) url.openConnection();
        conn.setRequestMethod("GET");
        conn.setRequestProperty("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0");
        //爬取
        try(BufferedReader is = new BufferedReader(
                new InputStreamReader(new BufferedInputStream(conn.getInputStream()),"utf-8"));
            BufferedWriter os = new BufferedWriter(
                    new OutputStreamWriter(
                            new BufferedOutputStream(
                                    new FileOutputStream("bbb.txt")),"utf-8"))) {
            String str = null;
            while((str=is.readLine())!=null) {
                os.write(str);
                os.newLine();
            }
            os.flush();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    static void basicSpider2() throws Exception {// picture
        URL src = new URL("https://g.csdnimg.cn/static/user-img/anonymous-User-img.png");
        try(InputStream is = new BufferedInputStream(src.openStream());
            OutputStream os = new BufferedOutputStream(new FileOutputStream("web.png"))) {
            int len =-1;
            byte[] flush = new byte[1024];
            while((len=is.read(flush))!=-1){
                os.write(flush,0,len);
            }
            os.flush();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

相关标签: Java