java 实现通过 post 方式提交json参数操作
程序员文章站
2022-07-04 19:59:32
由于所爬取的网站需要验证码,通过网页的开发人员工具【f12】及在线http post,get接口测试请求工具(http://coolaf.com/)发现访问时加上请求头header 信息时可以跳过验证...
由于所爬取的网站需要验证码,通过网页的开发人员工具【f12】及在线http post,get接口测试请求工具(http://coolaf.com/)发现访问时加上请求头header 信息时可以跳过验证码校验。
而且该网站只接受post请求,对提交的参数也只接受json格式,否则请求失败。
现将通过 post 方式提交json参数的方法记录如下:
import java.io.unsupportedencodingexception; import java.net.uri; import java.net.urldecoder; import java.util.arraylist; import java.util.list; import org.apache.http.httpentity; import org.apache.http.httpresponse; import org.apache.http.client.httpclient; import org.apache.http.client.config.requestconfig; import org.apache.http.client.methods.httppost; import org.apache.http.client.methods.httprequestbase; import org.apache.http.client.utils.uribuilder; import org.apache.http.entity.stringentity; import org.apache.http.impl.client.closeablehttpclient; import org.apache.http.impl.client.httpclientbuilder; import org.apache.http.impl.client.httpclients; import org.apache.http.util.entityutils; import com.alibaba.fastjson.jsonarray; import com.alibaba.fastjson.jsonobject; /** * <p>@postjsonparamstest.java</p> * @version 1.0 * @author zxk * @date 2018-3-3 */ public class postjsonparamstest { // 超时时间 private static final int run_time =10000; // 爬取初始页数 private string page; public static void main(string[] args) throws exception { postjsonparamstest crawl = new postjsonparamstest(); // 请求的url地址 string url ="http://www.gzcredit.gov.cn/service/creditservice.asmx/searchorgwithpage"; // 设置起始访问页码 crawl.setpage("1"); string isstop = ""; // 设置请求 httprequestbase request = null; request = new httppost(url); try { // 设置config requestconfig requestconfig = requestconfig.custom() .setsockettimeout(run_time) .setconnecttimeout(run_time) .setconnectionrequesttimeout(run_time) .build(); request.setconfig(requestconfig); // json 格式的 post 参数 string postparams ="{\"condition\":{\"qymc\":\"%%%%\",\"cydw\":\"\"},\"pageno\":"+crawl.getpage()+",\"pagesize\":100,count:2709846}"; system.out.println(postparams); httpentity httpentity = new stringentity(postparams); ((httppost) request).setentity(httpentity); // 添加请求头,可以绕过验证码 request.addheader("accept","application/json, text/javascript, */*"); request.addheader("accept-encoding","gzip, deflate"); request.addheader("accept-language", "zh-cn,zh;q=0.8"); request.addheader("connection", "keep-alive"); request.addheader("host", "www.gzcredit.gov.cn"); request.addheader("content-type", "application/json; charset=utf-8"); uribuilder builder = new uribuilder(url); uri uri = builder.build(); uri = new uri(urldecoder.decode(uri.tostring(), "utf-8")); request.seturi(uri); while(!isstop.equals("停止")||isstop.equals("重跑")){ isstop = crawl.crawllist(request); if(isstop.equals("爬取")){ crawl.setpage(string.valueof(integer.parseint(crawl.getpage())+1)); } // if("2713".equals(crawl.getpage())) break; if("2".equals(crawl.getpage())){ break; } } } catch (numberformatexception e) { e.printstacktrace(); throw new numberformatexception("数字格式错误"); } catch (unsupportedencodingexception e) { e.printstacktrace(); throw new unsupportedencodingexception("不支持的编码集"); } } /** * 爬取搜索列表 * @param page * @return */ private string crawllist(httprequestbase request){ int statuscode = 0; // 下面两种方式都可以用来创建客户端连接,相当于打开了一个浏览器 closeablehttpclient httpclient = httpclients.createdefault(); // httpclient httpclient = httpclientbuilder.create().build(); httpentity httpentity = null; httpresponse response = null; try { try { response = httpclient.execute(request); } catch (exception e){ e.printstacktrace(); entityutils.consumequietly(httpentity); return "重跑"; } //打印状态 statuscode =response.getstatusline().getstatuscode(); if(statuscode!=200){ entityutils.consumequietly(httpentity); return "重跑"; } //实体 httpentity = response.getentity(); string searchliststr = entityutils.tostring(httpentity,"gbk").replaceall("\\\\米", "米"); string alldata = (string) jsonobject.parseobject(searchliststr).get("d"); // 字符串值中间含双引号的替换处理 string s = alldata.replaceall("\\{\"","{'") .replaceall("\":\"", "':'") .replaceall("\",\"", "','") .replaceall("\":", "':") .replaceall(",\"", ",'") .replaceall("\"\\}", "'}") .replaceall("\"", "") .replaceall("'", "\"") .replaceall("<br />", "") .replaceall("\t", "") .replaceall("\\\\", "?"); jsonobject jsondata = jsonobject.parseobject(s); jsonarray jsoncontent = jsondata.getjsonarray("orglist"); searchliststr = null; alldata = null; s = null; if (jsoncontent==null || jsoncontent.size()<1) { return "重跑"; } system.out.println(jsoncontent.tojsonstring()); return "爬取"; } catch (exception e) { e.printstacktrace(); return "重跑"; } finally{ entityutils.consumequietly(httpentity); } } private string getpage() { return page; } private void setpage(string page) { this.page = page; } }
补充知识:java利用httpclient发送post请求,将请求数据放到body里
我就废话不多说了,大家还是直接看代码吧~
/** * post请求 ,请求数据放到body里 * @param url 请求地址 * @param bodydata 参数 * @author wangyj * @date 2019年4月20日 */ public static string dopostbodydata(string url, string bodydata) throws exception{ string result = ""; closeablehttpclient httpclient = null; closeablehttpresponse response = null; try { httppost httppost = gethttppost(url, null); // 请求地址 httppost.setentity(new stringentity(bodydata, encoding)); httpclient = gethttpclient(); // 得到返回的response response = httpclient.execute(httppost); httpentity entity = response.getentity(); result = getresult(entity, encoding); } catch (exception e) { throw e; } finally { // 关闭httpclient if (null != httpclient) { httpclient.close(); } // 关闭response if (null != response) { entityutils.consume(response.getentity()); // 会自动释放连接 response.close(); } } return result; }
以上这篇java 实现通过 post 方式提交json参数操作就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持。
上一篇: 不能同时用
下一篇: Python实现敏感词过滤的4种方法
推荐阅读
-
java 实现通过 post 方式提交json参数操作
-
POST不同提交方式对应的Content-Type,及java服务器接收参数方式
-
JAVA HttpURLConnection Post方式提交传递参数 javaHttpURLConnection
-
JAVA HttpURLConnection Post方式提交传递参数 javaHttpURLConnection
-
通过URL参数post传递的实现方式 PHP/Javascript
-
【JAVA】JSP中通过Get和Post方式传递页面参数。
-
java 实现通过 post 方式提交json参数操作
-
POST不同提交方式对应的Content-Type,及java服务器接收参数方式
-
通过URL参数post传递的实现方式 PHP/Javascript