Http协议客户端工具之Httpclient
程序员文章站
2022-07-15 09:27:58
...
HttpClient 是 Apache Jakarta Common 下的子项目,可以用来提供高效的、最新的、功能丰富的支持 HTTP 协议的客户端编程工具包,并且它支持 HTTP 协议最新的版本和建议。
对于某些页面的爬取有些链接使用javascript加密的话需要使用htmlunit,这个工具可以直接执行javascript
个人写的工具类,仅供参考:
对于某些页面的爬取有些链接使用javascript加密的话需要使用htmlunit,这个工具可以直接执行javascript
个人写的工具类,仅供参考:
package com.xyj.com.tool.util; import java.io.File; import java.io.IOException; import java.net.URISyntaxException; import java.nio.charset.Charset; import java.security.cert.CertificateException; import java.util.List; import java.util.Map; import javax.net.ssl.SSLContext; import javax.net.ssl.SSLHandshakeException; import javax.net.ssl.TrustManager; import javax.net.ssl.X509TrustManager; import org.apache.http.HttpEntity; import org.apache.http.HttpEntityEnclosingRequest; import org.apache.http.HttpHost; import org.apache.http.HttpRequest; import org.apache.http.HttpResponse; import org.apache.http.NameValuePair; import org.apache.http.NoHttpResponseException; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.HttpRequestRetryHandler; import org.apache.http.client.ResponseHandler; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.params.ClientPNames; import org.apache.http.client.params.CookiePolicy; import org.apache.http.client.utils.URLEncodedUtils; import org.apache.http.conn.params.ConnRoutePNames; import org.apache.http.conn.scheme.Scheme; import org.apache.http.conn.ssl.SSLSocketFactory; import org.apache.http.entity.StringEntity; import org.apache.http.entity.mime.MultipartEntity; import org.apache.http.entity.mime.content.ByteArrayBody; import org.apache.http.entity.mime.content.FileBody; import org.apache.http.entity.mime.content.StringBody; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.params.CoreProtocolPNames; import org.apache.http.protocol.ExecutionContext; import org.apache.http.protocol.HttpContext; import org.apache.http.util.EntityUtils; /** * @className:HttpClientUtil.java * @classDescription:HttpClient工具类//待完善模拟登录,cookie,证书登录 * @author:xiayingjie * @createTime:2011-8-31 */ public class HttpClientUtil { public static String CHARSET_ENCODING = "UTF-8"; // private static String // USER_AGENT="Mozilla/4.0 (compatible; MSIE 6.0; Win32)";//ie6 public static String USER_AGENT = "Mozilla/4.0 (compatible; MSIE 7.0; Win32)";// ie7 // private static String // USER_AGENT="Mozilla/4.0 (compatible; MSIE 8.0; Win32)";//ie8 /** * 获取DefaultHttpClient对象 * * @param charset * 字符编码 * @return DefaultHttpClient对象 */ private static DefaultHttpClient getDefaultHttpClient(final String charset) { DefaultHttpClient httpclient = new DefaultHttpClient(); // 模拟浏览器,解决一些服务器程序只允许浏览器访问的问题 httpclient.getParams().setParameter(CoreProtocolPNames.USER_AGENT, USER_AGENT); httpclient.getParams().setParameter( CoreProtocolPNames.USE_EXPECT_CONTINUE, Boolean.FALSE); httpclient.getParams().setParameter( CoreProtocolPNames.HTTP_CONTENT_CHARSET, charset == null ? CHARSET_ENCODING : charset); // 浏览器兼容性 httpclient.getParams().setParameter(ClientPNames.COOKIE_POLICY, CookiePolicy.BROWSER_COMPATIBILITY); // 定义重试策略 httpclient.setHttpRequestRetryHandler(requestRetryHandler); return httpclient; } /** * 访问https的网站 * @param httpclient */ private static void enableSSL(DefaultHttpClient httpclient){ //调用ssl try { SSLContext sslcontext = SSLContext.getInstance("TLS"); sslcontext.init(null, new TrustManager[] { truseAllManager }, null); SSLSocketFactory sf = new SSLSocketFactory(sslcontext); sf.setHostnameVerifier(SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER); Scheme https = new Scheme("https", sf, 443); httpclient.getConnectionManager().getSchemeRegistry() .register(https); } catch (Exception e) { e.printStackTrace(); } } /** * 重写验证方法,取消检测ssl */ private static TrustManager truseAllManager = new X509TrustManager(){ public void checkClientTrusted( java.security.cert.X509Certificate[] arg0, String arg1) throws CertificateException { // TODO Auto-generated method stub } public void checkServerTrusted( java.security.cert.X509Certificate[] arg0, String arg1) throws CertificateException { // TODO Auto-generated method stub } public java.security.cert.X509Certificate[] getAcceptedIssuers() { // TODO Auto-generated method stub return null; } } ; /** * 异常自动恢复处理, 使用HttpRequestRetryHandler接口实现请求的异常恢复 */ private static HttpRequestRetryHandler requestRetryHandler = new HttpRequestRetryHandler() { // 自定义的恢复策略 public boolean retryRequest(IOException exception, int executionCount, HttpContext context) { // 设置恢复策略,在发生异常时候将自动重试3次 if (executionCount >= 3) { // 如果连接次数超过了最大值则停止重试 return false; } if (exception instanceof NoHttpResponseException) { // 如果服务器连接失败重试 return true; } if (exception instanceof SSLHandshakeException) { // 不要重试ssl连接异常 return false; } HttpRequest request = (HttpRequest) context .getAttribute(ExecutionContext.HTTP_REQUEST); boolean idempotent = (request instanceof HttpEntityEnclosingRequest); if (!idempotent) { // 重试,如果请求是考虑幂等 return true; } return false; } }; /** * 使用ResponseHandler接口处理响应,HttpClient使用ResponseHandler会自动管理连接的释放,解决了对连接的释放管理 */ private static ResponseHandler<String> responseHandler = new ResponseHandler<String>() { // 自定义响应处理 public String handleResponse(HttpResponse response) throws ClientProtocolException, IOException { HttpEntity entity = response.getEntity(); if (entity != null) { String charset = EntityUtils.getContentCharSet(entity) == null ? CHARSET_ENCODING : EntityUtils.getContentCharSet(entity); return new String(EntityUtils.toByteArray(entity), charset); } else { return null; } } }; /** * 使用post方法获取相关的数据 * * @param url * @param paramsList * @return */ public static String post(String url, List<NameValuePair> paramsList) { return httpRequest(url, paramsList, "POST", null); } /** * 使用post方法并且通过代理获取相关的数据 * * @param url * @param paramsList * @param proxy * @return */ public static String post(String url, List<NameValuePair> paramsList, HttpHost proxy) { return httpRequest(url, paramsList, "POST", proxy); } /** * 使用get方法获取相关的数据 * * @param url * @param paramsList * @return */ public static String get(String url, List<NameValuePair> paramsList) { return httpRequest(url, paramsList, "GET", null); } /** * 使用get方法并且通过代理获取相关的数据 * * @param url * @param paramsList * @param proxy * @return */ public static String get(String url, List<NameValuePair> paramsList, HttpHost proxy) { return httpRequest(url, paramsList, "GET", proxy); } /** * 提交数据到服务器 * * @param url * @param params * @param authenticated * @throws IOException * @throws ClientProtocolException */ public static String httpRequest(String url, List<NameValuePair> paramsList, String method, HttpHost proxy) { String responseStr = null; // 判断输入的值是是否为空 if (null == url || "".equals(url)) { return null; } // 创建HttpClient实例 DefaultHttpClient httpclient = getDefaultHttpClient(CHARSET_ENCODING); //判断是否是https请求 if(url.startsWith("https")){ enableSSL(httpclient); } String formatParams = null; // 将参数进行utf-8编码 if (null != paramsList && paramsList.size() > 0) { formatParams = URLEncodedUtils.format(paramsList, CHARSET_ENCODING); } // 如果代理对象不为空则设置代理 if (null != proxy) { httpclient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, proxy); } try { // 如果方法为Get if ("GET".equalsIgnoreCase(method)) { if (formatParams != null) { url = (url.indexOf("?")) < 0 ? (url + "?" + formatParams) : (url.substring(0, url.indexOf("?") + 1) + formatParams); } HttpGet hg = new HttpGet(url); responseStr = httpclient.execute(hg, responseHandler); // 如果方法为Post } else if ("POST".equalsIgnoreCase(method)) { HttpPost hp = new HttpPost(url); if (formatParams != null) { StringEntity entity = new StringEntity(formatParams); entity.setContentType("application/x-www-form-urlencoded"); hp.setEntity(entity); } responseStr = httpclient.execute(hp, responseHandler); } } catch (ClientProtocolException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return responseStr; } /** * 提交数据到服务器 * * @param url * @param params * @param authenticated * @throws IOException * @throws ClientProtocolException */ public static String httpFileRequest(String url, Map<String, String> fileMap,Map<String, String> stringMap,int type, HttpHost proxy) { String responseStr = null; // 判断输入的值是是否为空 if (null == url || "".equals(url)) { return null; } // 创建HttpClient实例 DefaultHttpClient httpclient = getDefaultHttpClient(CHARSET_ENCODING); //判断是否是https请求 if(url.startsWith("https")){ enableSSL(httpclient); } // 如果代理对象不为空则设置代理 if (null != proxy) { httpclient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, proxy); } //发送文件 HttpPost hp = new HttpPost(url); MultipartEntity multiEntity = new MultipartEntity(); try { //type=0是本地路径,否则是网络路径 if(type==0){ for (String key : fileMap.keySet()) { multiEntity.addPart(key, new FileBody(new File(fileMap.get(key)))); } }else{ for (String key : fileMap.keySet()) { multiEntity.addPart(key,new ByteArrayBody(getUrlFileBytes(fileMap.get(key)),key)); } } // 加入相关参数 默认编码为utf-8 for (String key : stringMap.keySet()) { multiEntity.addPart(key, new StringBody(stringMap.get(key),Charset.forName(CHARSET_ENCODING))); } hp.setEntity(multiEntity); responseStr = httpclient.execute(hp, responseHandler); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } return responseStr; } /** * 将相关文件和参数提交到相关服务器 * @param url * @param fileMap * @param StringMap * @return */ public static String postFile(String url, Map<String, String> fileMap, Map<String, String> stringMap) { return httpFileRequest( url,fileMap,stringMap,0, null); } /** * 将相关文件和参数提交到相关服务器 * @param url * @param fileMap * @param StringMap * @return */ public static String postUrlFile(String url, Map<String, String> urlMap, Map<String, String> stringMap) { return httpFileRequest( url,urlMap,stringMap,1, null); } /** * 获取网络文件的字节数组 * * @param url * @return * @throws IOException * @throws ClientProtocolException * @throws ClientProtocolException * @throws IOException */ public static byte[] getUrlFileBytes(String url) throws ClientProtocolException, IOException { byte[] bytes = null; // 创建HttpClient实例 DefaultHttpClient httpclient = getDefaultHttpClient(CHARSET_ENCODING); // 获取url里面的信息 HttpGet hg = new HttpGet(url); HttpResponse hr = httpclient.execute(hg); bytes = EntityUtils.toByteArray(hr.getEntity()); // 转换内容为字节 return bytes; } /** * 获取图片的字节数组 * * @createTime 2011-11-24 * @param url * @return * @throws IOException * @throws ClientProtocolException * @throws ClientProtocolException * @throws IOException */ public static byte[] getImg(String url) throws ClientProtocolException, IOException { byte[] bytes = null; // 创建HttpClient实例 DefaultHttpClient httpclient = getDefaultHttpClient(CHARSET_ENCODING); // 获取url里面的信息 HttpGet hg = new HttpGet(url); HttpResponse hr = httpclient.execute(hg); bytes = EntityUtils.toByteArray(hr.getEntity()); // 转换内容为字节 return bytes; } public static void main(String[] args) throws URISyntaxException, ClientProtocolException, IOException { String url="http://www.baidu.com/"; String str=HttpClientUtil.get(url, null); System.out.println(str); } }
上一篇: 模板之freemarker
下一篇: 模板之freemarker
推荐阅读
-
Linux三阶段之七:HTTP协议概念原理说明
-
Django之web应用、http协议和django简介
-
和我一起学习爬虫之HTTP/HTTPS抓包工具--Fiddler
-
Http协议客户端工具之Httpclient
-
Http协议客户端工具之Httpclient
-
荐 软件测试之TCP、HTTP协议必知必会,面试必备!
-
Dubbo学习笔记(九)------协议之http
-
android存储方式之Http存储方式讲解,TCP/IP协议和Http协议之间的关系
-
基于ONVIF协议的(IPC)客户端程序开发之onvif协议由soap和rtsp协议组成讲解
-
Http请求封装(对HttpClient类的进一步封装,使之调用更方便。另外,此类管理唯一的HttpClient对象,支持线程池调用,效率更高)