Java实现爬取往期所有双色球开奖结果功能示例

程序员文章站 2023-12-21 12:23:22

本文实例讲述了java实现爬取往期所有双色球开奖结果功能。分享给大家供大家参考，具体如下：梦想还是要有的，万一实现了呢？我相信经常买双色球的朋友和我都会有一个疑问，就是...

本文实例讲述了java实现爬取往期所有双色球开奖结果功能。分享给大家供大家参考，具体如下：

梦想还是要有的，万一实现了呢？我相信经常买双色球的朋友和我都会有一个疑问，就是往期双色球的开奖结果是什么？我钟意的这一注双色球在往期是否开过一等奖，如果开过的话，基本上可以放弃这一注了，因为历史上应该没有出现过两期双色球开奖完全一致的吧？那么往期的开奖结果是什么呢？我自己用java写了一个简易的类，爬取所有双色球开奖结果，本来想开发安卓版本的，由于ui等需要时间准备，有缘再开发吧。

import java.io.bufferedreader;
import java.io.bufferedwriter;
import java.io.file;
import java.io.filewriter;
import java.io.ioexception;
import java.io.inputstream;
import java.io.inputstreamreader;
import java.net.httpurlconnection;
import java.net.url;
import java.util.regex.matcher;
import java.util.regex.pattern;
import java.util.zip.gzipinputstream;
public class allballs {
 private static stringbuffer mstringbuffer;
 public static void main(string[] args) {
  system.out.println("正在获取...");
  mstringbuffer = new stringbuffer();
  string baseurlprefix = "http://kaijiang.zhcw.com/zhcw/html/ssq/list_";
  string baseurlsuffix = ".html";
  string homeurl = "http://kaijiang.zhcw.com/zhcw/html/ssq/list_1.html";
  string pagecountcontent = gethtmlstring(homeurl);
  int pagecount = getpagecount(pagecountcontent);
  if (pagecount > 0) {
   for (int i = 1; i <= pagecount; i++) {
    string url = baseurlprefix + i + baseurlsuffix;
    string pagecontent = gethtmlstring(url);
    if (pagecontent != null && !pagecontent.equals("")) {
     getonetermcontent(pagecontent);
    } else {
     system.out.println("第" + i + "页丢失");
    }
    try {
     thread.sleep(1200);
    } catch (exception e) {
     // todo: handle exception
    }
   }
   file file = new file("双色球.txt");
   if (file.exists()) {
    file.delete();
   }
   try {
    filewriter writer = new filewriter(file);
    bufferedwriter bufferedwriter = new bufferedwriter(writer);
    bufferedwriter.write(mstringbuffer.tostring());
    bufferedwriter.close();
    writer.close();
   } catch (ioexception e) {
    // todo auto-generated catch block
    e.printstacktrace();
   }
   //bufferedwriter writer = new bufferedwriter(new outputs)
  } else {
   system.out.println("结果页数为0");
  }
  system.out.println("完成！");
 }
 /**
  * 获取总页数
  * @param result
  */
 private static int getpagecount(string result) {
  string regex = "\\d+\">末页";
  pattern pattern = pattern.compile(regex);
  matcher matcher = pattern.matcher(result);
  string[] splits = null;
  while (matcher.find()) {
   string content = matcher.group();
   splits = content.split("\"");
   break;
  }
  if (splits != null && splits.length == 2) {
   string countstring = splits[0];
   if (countstring != null && !countstring.equals("")) {
    return integer.parseint(countstring);
   }
  }
  return 0;
 }
  /**
  * 获取网页源码
  * @return
  */
 private static string gethtmlstring(string targeturl) {
  string content = null;
  httpurlconnection connection = null;
  try {
   url url = new url(targeturl);
   connection = (httpurlconnection) url.openconnection();
   connection.setrequestmethod("post");
   connection.setrequestproperty("user-agent", "mozilla/4.0 (compatible; msie 7.0; windows 7)");
   connection.setrequestproperty("accept", "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-powerpoint, application/vnd.ms-excel, application/msword, */*");
   connection.setrequestproperty("accept-language", "zh-cn");
   connection.setrequestproperty("ua-cpu", "x86");
   //为什么没有deflate呢
   connection.setrequestproperty("accept-encoding", "gzip");
   connection.setrequestproperty("content-type", "text/html");
   //keep-alive，有什么用呢，你不是在访问网站，你是在采集。嘿嘿。减轻别人的压力，也是减轻自己。
   connection.setrequestproperty("connection", "close");
   //不要用cache，用了也没有什么用，因为我们不会经常对一个链接频繁访问。（针对程序）
   connection.setusecaches(false);
   connection.setconnecttimeout(6 * 1000);
   connection.setreadtimeout(6 * 1000);
   connection.setdooutput(true);
   connection.setdoinput(true);
   connection.setrequestproperty("charset", "utf-8");
   connection.connect();
   if (200 == connection.getresponsecode()) {
    inputstream inputstream = null;
    if (connection.getcontentencoding() != null && !connection.getcontentencoding().equals("")) {
     string encode = connection.getcontentencoding().tolowercase();
     if (encode != null && !encode.equals("") && encode.indexof("gzip") >= 0) {
      inputstream = new gzipinputstream(connection.getinputstream());
     }
    }
    if (null == inputstream) {
     inputstream = connection.getinputstream();
    }
    bufferedreader reader = new bufferedreader(new inputstreamreader(inputstream, "utf-8"));
    stringbuilder builder = new stringbuilder();
    string line = null;
    while ((line = reader.readline()) != null) {
     builder.append(line).append("\n");
    }
    content = builder.tostring();
   }
  } catch (exception e) {
   e.printstacktrace();
  } finally {
   if (connection != null) {
    connection.disconnect();
   }
  }
  return content;
 }
 private static void getonetermcontent(string pagecontent) {
  string regex = "<td align=\"center\" style=\"padding-left:10px;\">[\\s\\s]+?</em></td>";
  pattern pattern = pattern.compile(regex);
  matcher matcher = pattern.matcher(pagecontent);
  while (matcher.find()) {
   string onetermcontent = matcher.group();
   getonetermnumbers(onetermcontent);
  }
 }
 private static void getonetermnumbers(string onetermcontent) {
  string regex = ">\\d+<";
  pattern pattern = pattern.compile(regex);
  matcher matcher = pattern.matcher(onetermcontent);
  while (matcher.find()) {
   string content = matcher.group();
   string ballnumber = content.substring(1, content.length()-1);
   mstringbuffer.append(ballnumber).append(" ");
  }
  mstringbuffer.append("\r\n");
 }
}

运行结果：

Java实现爬取往期所有双色球开奖结果功能示例

更多关于java相关内容感兴趣的读者可查看本站专题：《java网络编程技巧总结》、《java socket编程技巧总结》、《java文件与目录操作技巧汇总》、《java数据结构与算法教程》、《java操作dom节点技巧总结》和《java缓存操作技巧汇总》

希望本文所述对大家java程序设计有所帮助。