爬取爱奇艺首页的图片（Java网络爬虫简易实现）

程序员文章站 2022-06-05 18:47:29

...

package com.peter.Crawel01;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

public class App {
	public static void main(String[] args) throws IOException {
		try {
			Document document = Jsoup.connect("https://www.iqiyi.com/").get();
			Elements images = document.select("img[src~=(?i)\\.(png|jpe?g|gif)]");
			String src = null;
			int i = 0;
			for (var image : images) {
				src = image.attr("src");
				System.out.println(src);
				URL url = new URL("https:" + src);
				InputStream is = url.openStream();
				byte[] flush = new byte[1024];
				int len = 0;
				// System.out.println(src.substring(src.length() - 4, src.length()));
				File file = new File("" + i + src.substring(src.length() - 4, src.length()));
				if (!file.exists()) {
					file.createNewFile();
				}
				FileOutputStream fos = new FileOutputStream(file);
				while (-1 != (len = is.read(flush))) {
					fos.write(flush, 0, len);
					// System.out.println(new String(flush, 0, len));
				}
				i++;
			}
		} catch (MalformedURLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
}

上一篇：长沙吃的特色，让你爱上长沙美食

下一篇：历史上声名显赫的五匹马，除了赤兔你还知道其他吗？