JAVA爬虫撸图片
程序员文章站
2023-11-13 22:19:58
JAVA爬虫撸图片Httpclient+jsouppackage cn.fu.spider;import org.apache.http.HttpEntity;import org.apache.http.client.ClientProtocolException;import org.apache.http.client.methods.CloseableHttpResponse;import org.apache.http.client.methods.HttpGet;import o...
JAVA爬虫撸图片
Httpclient+jsoup
package cn.fu.spider;
import org.apache.http.HttpEntity;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;
public class HttpclientPoolDemo {
static String file = "F://paqu";//下载的目标路径
public static void main(String[] args) {
//http://www.8888.com/xx/1/,http://www.8888.com/xx/2/,http://www.8888.com/xx/3/
String url = "http://**********.html"+"/";
//总页数每页9图 总共193张图
int pages= 193/9;
PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager();
//设置最大连接数
cm.setMaxTotal(50);
// 设置每个主机最大连接数
cm.setDefaultMaxPerRoute(10);
List<String> list = doGet(cm, url, pages);
for (String s : list) {
downImage(s,s.substring(40));
}
}
private static List<String> doGet(PoolingHttpClientConnectionManager cm, String url, int pages) {
ArrayList<String> list = new ArrayList<>();
ArrayList<String> list1 = new ArrayList<>();
CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();
for (int i = 1; i < pages; i++) {
HttpGet httpGet = new HttpGet(url + i);
CloseableHttpResponse response = null;
try {
response = httpClient.execute(httpGet);
} catch (IOException e) {
e.printStackTrace();
}
HttpEntity entity = response.getEntity();
String s = null;
try {
s = EntityUtils.toString(entity, "UTF-8");
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
response.close();
} catch (IOException e) {
e.printStackTrace();
}
}
Document document = Jsoup.parse(s);
Elements elements = document.getElementsByClass("alignnone");
for (Element element : elements) {
String attr = element.attr("data-lazy-src");
list1.add(attr);
}
}
for (String s : list1) {
if (s != "" && s != null) {
list.add(s);
}
}
return list;
}
public static void downImage(String imgurl, String fileName) {
//判断目标文件夹是否存在
File files = new File(file);
if (!files.exists()) {
files.mkdirs();
}
InputStream is;
FileOutputStream out;
try {
URL url = new URL(imgurl);
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
is = connection.getInputStream();
// 创建文件
File fileofImg = new File(file + "/" + fileName);
out = new FileOutputStream(fileofImg);
int i = 0;
while ((i = is.read()) != -1) {
out.write(i);
}
is.close();
out.close();
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
本文地址:https://blog.csdn.net/fu4562018/article/details/112098690
上一篇: 网络编程基础之TCP编程学习(一)