Java实现爬取往期所有双色球开奖结果功能示例
程序员文章站
2024-02-19 21:50:28
本文实例讲述了java实现爬取往期所有双色球开奖结果功能。分享给大家供大家参考,具体如下:
梦想还是要有的,万一实现了呢?我相信经常买双色球的朋友和我都会有一个疑问,就是...
本文实例讲述了java实现爬取往期所有双色球开奖结果功能。分享给大家供大家参考,具体如下:
梦想还是要有的,万一实现了呢?我相信经常买双色球的朋友和我都会有一个疑问,就是往期双色球的开奖结果是什么?我钟意的这一注双色球在往期是否开过一等奖,如果开过的话,基本上可以放弃这一注了,因为历史上应该没有出现过两期双色球开奖完全一致的吧?那么往期的开奖结果是什么呢?我自己用java写了一个简易的类,爬取所有双色球开奖结果,本来想开发安卓版本的,由于ui等需要时间准备,有缘再开发吧。
import java.io.bufferedreader; import java.io.bufferedwriter; import java.io.file; import java.io.filewriter; import java.io.ioexception; import java.io.inputstream; import java.io.inputstreamreader; import java.net.httpurlconnection; import java.net.url; import java.util.regex.matcher; import java.util.regex.pattern; import java.util.zip.gzipinputstream; public class allballs { private static stringbuffer mstringbuffer; public static void main(string[] args) { system.out.println("正在获取..."); mstringbuffer = new stringbuffer(); string baseurlprefix = "http://kaijiang.zhcw.com/zhcw/html/ssq/list_"; string baseurlsuffix = ".html"; string homeurl = "http://kaijiang.zhcw.com/zhcw/html/ssq/list_1.html"; string pagecountcontent = gethtmlstring(homeurl); int pagecount = getpagecount(pagecountcontent); if (pagecount > 0) { for (int i = 1; i <= pagecount; i++) { string url = baseurlprefix + i + baseurlsuffix; string pagecontent = gethtmlstring(url); if (pagecontent != null && !pagecontent.equals("")) { getonetermcontent(pagecontent); } else { system.out.println("第" + i + "页丢失"); } try { thread.sleep(1200); } catch (exception e) { // todo: handle exception } } file file = new file("双色球.txt"); if (file.exists()) { file.delete(); } try { filewriter writer = new filewriter(file); bufferedwriter bufferedwriter = new bufferedwriter(writer); bufferedwriter.write(mstringbuffer.tostring()); bufferedwriter.close(); writer.close(); } catch (ioexception e) { // todo auto-generated catch block e.printstacktrace(); } //bufferedwriter writer = new bufferedwriter(new outputs) } else { system.out.println("结果页数为0"); } system.out.println("完成!"); } /** * 获取总页数 * @param result */ private static int getpagecount(string result) { string regex = "\\d+\">末页"; pattern pattern = pattern.compile(regex); matcher matcher = pattern.matcher(result); string[] splits = null; while (matcher.find()) { string content = matcher.group(); splits = content.split("\""); break; } if (splits != null && splits.length == 2) { string countstring = splits[0]; if (countstring != null && !countstring.equals("")) { return integer.parseint(countstring); } } return 0; } /** * 获取网页源码 * @return */ private static string gethtmlstring(string targeturl) { string content = null; httpurlconnection connection = null; try { url url = new url(targeturl); connection = (httpurlconnection) url.openconnection(); connection.setrequestmethod("post"); connection.setrequestproperty("user-agent", "mozilla/4.0 (compatible; msie 7.0; windows 7)"); connection.setrequestproperty("accept", "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-powerpoint, application/vnd.ms-excel, application/msword, */*"); connection.setrequestproperty("accept-language", "zh-cn"); connection.setrequestproperty("ua-cpu", "x86"); //为什么没有deflate呢 connection.setrequestproperty("accept-encoding", "gzip"); connection.setrequestproperty("content-type", "text/html"); //keep-alive,有什么用呢,你不是在访问网站,你是在采集。嘿嘿。减轻别人的压力,也是减轻自己。 connection.setrequestproperty("connection", "close"); //不要用cache,用了也没有什么用,因为我们不会经常对一个链接频繁访问。(针对程序) connection.setusecaches(false); connection.setconnecttimeout(6 * 1000); connection.setreadtimeout(6 * 1000); connection.setdooutput(true); connection.setdoinput(true); connection.setrequestproperty("charset", "utf-8"); connection.connect(); if (200 == connection.getresponsecode()) { inputstream inputstream = null; if (connection.getcontentencoding() != null && !connection.getcontentencoding().equals("")) { string encode = connection.getcontentencoding().tolowercase(); if (encode != null && !encode.equals("") && encode.indexof("gzip") >= 0) { inputstream = new gzipinputstream(connection.getinputstream()); } } if (null == inputstream) { inputstream = connection.getinputstream(); } bufferedreader reader = new bufferedreader(new inputstreamreader(inputstream, "utf-8")); stringbuilder builder = new stringbuilder(); string line = null; while ((line = reader.readline()) != null) { builder.append(line).append("\n"); } content = builder.tostring(); } } catch (exception e) { e.printstacktrace(); } finally { if (connection != null) { connection.disconnect(); } } return content; } private static void getonetermcontent(string pagecontent) { string regex = "<td align=\"center\" style=\"padding-left:10px;\">[\\s\\s]+?</em></td>"; pattern pattern = pattern.compile(regex); matcher matcher = pattern.matcher(pagecontent); while (matcher.find()) { string onetermcontent = matcher.group(); getonetermnumbers(onetermcontent); } } private static void getonetermnumbers(string onetermcontent) { string regex = ">\\d+<"; pattern pattern = pattern.compile(regex); matcher matcher = pattern.matcher(onetermcontent); while (matcher.find()) { string content = matcher.group(); string ballnumber = content.substring(1, content.length()-1); mstringbuffer.append(ballnumber).append(" "); } mstringbuffer.append("\r\n"); } }
运行结果:
更多关于java相关内容感兴趣的读者可查看本站专题:《java网络编程技巧总结》、《java socket编程技巧总结》、《java文件与目录操作技巧汇总》、《java数据结构与算法教程》、《java操作dom节点技巧总结》和《java缓存操作技巧汇总》
希望本文所述对大家java程序设计有所帮助。