欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

爬虫完美绕过服务器反爬检查

程序员文章站 2022-05-04 11:28:30
...
HostnameVerifier hv = new HostnameVerifier() {  
    public boolean verify(String urlHostName, SSLSession session) 
    {  
        System.out.println("Warning: URL Host: " + urlHostName + " vs. " + session.getPeerHost());  
        return true;  
    }  
}; 
trustAllHttpsCertificates();
HttpsURLConnection.setDefaultHostnameVerifier(hv);
// 插入取到的html代码
        try {
            URL url = new URL(string);
            URLConnection connection = url.openConnection();
            connection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
             connection.connect();
            /*URLConnection conn = url.openConnection();
            conn.setRequestProperty("User-Agent",
                    "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");*/
            isr=new InputStreamReader(connection.getInputStream(),Charset.forName("UTF-8")); 
            //isr=new InputStreamReader(conn.getInputStream(), "UTF-8");
            bufr = new BufferedReader(isr);
        } catch (Exception e) {
            e.printStackTrace();
        }

trustAllHttpsCertificates():

private void trustAllHttpsCertificates() {
        javax.net.ssl.TrustManager[] trustAllCerts = new javax.net.ssl.TrustManager[1];  
        javax.net.ssl.TrustManager tm = new miTM();  
        trustAllCerts[0] = tm;  
        javax.net.ssl.SSLContext sc = null;
        try {
            sc = javax.net.ssl.SSLContext  
                    .getInstance("SSL");
        } catch (NoSuchAlgorithmException e1) {
            // TODO Auto-generated catch block
            e1.printStackTrace();
        }  
        try {
            sc.init(null, trustAllCerts, null);
        } catch (KeyManagementException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }  
        javax.net.ssl.HttpsURLConnection.setDefaultSSLSocketFactory(sc  
                .getSocketFactory());

    }
     static class miTM implements javax.net.ssl.TrustManager,  
     javax.net.ssl.X509TrustManager {  
 public java.security.cert.X509Certificate[] getAcceptedIssuers() {  
     return null;  
 }  

 public boolean isServerTrusted(  
         java.security.cert.X509Certificate[] certs) {  
     return true;  
 }  

 public boolean isClientTrusted(  
         java.security.cert.X509Certificate[] certs) {  
     return true;  
 }  

 public void checkServerTrusted(  
         java.security.cert.X509Certificate[] certs, String authType)  
         throws java.security.cert.CertificateException {  
     return;  
 }  

 public void checkClientTrusted(  
         java.security.cert.X509Certificate[] certs, String authType)  
         throws java.security.cert.CertificateException {  
     return;  
 }  
}