爬虫完美绕过服务器反爬检查
程序员文章站
2022-05-04 11:28:30
...
HostnameVerifier hv = new HostnameVerifier() {
public boolean verify(String urlHostName, SSLSession session)
{
System.out.println("Warning: URL Host: " + urlHostName + " vs. " + session.getPeerHost());
return true;
}
};
trustAllHttpsCertificates();
HttpsURLConnection.setDefaultHostnameVerifier(hv);
// 插入取到的html代码
try {
URL url = new URL(string);
URLConnection connection = url.openConnection();
connection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
connection.connect();
/*URLConnection conn = url.openConnection();
conn.setRequestProperty("User-Agent",
"Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");*/
isr=new InputStreamReader(connection.getInputStream(),Charset.forName("UTF-8"));
//isr=new InputStreamReader(conn.getInputStream(), "UTF-8");
bufr = new BufferedReader(isr);
} catch (Exception e) {
e.printStackTrace();
}
trustAllHttpsCertificates():
private void trustAllHttpsCertificates() {
javax.net.ssl.TrustManager[] trustAllCerts = new javax.net.ssl.TrustManager[1];
javax.net.ssl.TrustManager tm = new miTM();
trustAllCerts[0] = tm;
javax.net.ssl.SSLContext sc = null;
try {
sc = javax.net.ssl.SSLContext
.getInstance("SSL");
} catch (NoSuchAlgorithmException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
try {
sc.init(null, trustAllCerts, null);
} catch (KeyManagementException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
javax.net.ssl.HttpsURLConnection.setDefaultSSLSocketFactory(sc
.getSocketFactory());
}
static class miTM implements javax.net.ssl.TrustManager,
javax.net.ssl.X509TrustManager {
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
return null;
}
public boolean isServerTrusted(
java.security.cert.X509Certificate[] certs) {
return true;
}
public boolean isClientTrusted(
java.security.cert.X509Certificate[] certs) {
return true;
}
public void checkServerTrusted(
java.security.cert.X509Certificate[] certs, String authType)
throws java.security.cert.CertificateException {
return;
}
public void checkClientTrusted(
java.security.cert.X509Certificate[] certs, String authType)
throws java.security.cert.CertificateException {
return;
}
}
上一篇: Python网络爬虫2
下一篇: 2 网络爬虫基础