欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

爬取爱奇艺首页的图片(Java网络爬虫简易实现)

程序员文章站 2022-06-05 18:47:29
...
package com.peter.Crawel01;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

public class App {
	public static void main(String[] args) throws IOException {
		try {
			Document document = Jsoup.connect("https://www.iqiyi.com/").get();
			Elements images = document.select("img[src~=(?i)\\.(png|jpe?g|gif)]");
			String src = null;
			int i = 0;
			for (var image : images) {
				src = image.attr("src");
				System.out.println(src);
				URL url = new URL("https:" + src);
				InputStream is = url.openStream();
				byte[] flush = new byte[1024];
				int len = 0;
				// System.out.println(src.substring(src.length() - 4, src.length()));
				File file = new File("" + i + src.substring(src.length() - 4, src.length()));
				if (!file.exists()) {
					file.createNewFile();
				}
				FileOutputStream fos = new FileOutputStream(file);
				while (-1 != (len = is.read(flush))) {
					fos.write(flush, 0, len);
					// System.out.println(new String(flush, 0, len));
				}
				i++;
			}
		} catch (MalformedURLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
}