java 抓取2016年统计用区划代码和城乡划分代码
程序员文章站
2022-03-02 21:03:43
...
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
/**
* 抓取
*
* @author brianye QQ
* @date 2017-7-10
*/
public class GetRegion {
public static String SITE_URL = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/index.html";
private static List<RegionEntry> regions = new ArrayList<RegionEntry>();
public static void main(String[] args) {
System.out.println("抓取开始:"+ new Date());
getProvince();
StringBuffer content = new StringBuffer();
for (RegionEntry one : regions) {
content.append("insert into sys_region values('").append(one.getCode()).append("', '").append(one.getCode()).append("',null, null,'").append(one.getCode()).append("', '000000', '").append(one.getName()).append("', '1' );\r\n");
for (RegionEntry two : one.getSub()) {
content.append("insert into sys_region values('").append(two.getCode()).append("', '").append(two.getCode()).append("',null, '").append(two.getCode()).append("','").append(one.getCode()).append("', '").append(one.getCode()).append("', '").append(two.getName()).append("', '0' );\r\n");
for (RegionEntry three : two.getSub()) {
content.append("insert into sys_region values('").append(three.getCode()).append("', '").append(three.getCode()).append("', '").append(three.getCode()).append("', '").append(two.getCode()).append("','").append(one.getCode()).append("', '").append(two.getCode()).append("', '").append(three.getName()).append("', '0' );\r\n");
}
}
}
Region.writeFile(content.toString());
System.out.println("抓取完毕:"+ new Date());
}
private static void getProvince() {
try {
Document doc = Jsoup.parse(new URL(SITE_URL).openStream(), "GBK", SITE_URL); //Jsoup.connect(SITE_URL).get();
Elements links = doc.select("tr.provincetr").select("a");
RegionEntry region = null;
for (Element e : links) {
region = new RegionEntry();
String href = e.attr("href");
String[] arr = href.split("\\.");
String code = arr[0];
if (arr[0].length() < 6) {
for (int i = 0; i < 6 -arr[0].length(); i++ ) {
code +="0";
}
}
region.setCode(code);
region.setName(e.text());
String absHref = e.attr("abs:href");
getCity(absHref, region);
regions.add(region);
try {
Thread.sleep(2000);
} catch (InterruptedException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 获取市地址
* @param url
* @param region
*/
private static void getCity(String url, RegionEntry region) {
Document doc;
try {
doc = Jsoup.parse(new URL(url).openStream(), "GBK", url); //Jsoup.connect(url).get().charset(charset);
Elements links = doc.select("tr.citytr");
RegionEntry city;
for (Element e : links) {
city = new RegionEntry();
Elements alist = e.select("a");
Element codeE = alist.get(0);
Element codeN = alist.get(1);
String name = codeN.text();
String code = codeE.text();
code = code.substring(0, 6);
if ("市辖区".equals(name)) {
name = region.getName();
//code = region.getCode();
}
city.setCode(code);
city.setName(name);
String absHref = codeE.attr("abs:href");
getArea(absHref, city);
region.getSub().add(city);
}
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 获取区县地址
* @param url
* @param region
*/
private static void getArea(String url, RegionEntry region) {
Document doc;
try {
doc =Jsoup.parse(new URL(url).openStream(), "GBK", url); // Jsoup.connect(url).get();
Elements links = doc.select("tr.countytr");
RegionEntry area;
for (Element e : links) {
area = new RegionEntry();
Elements alist = e.select("a");
if (alist.size() > 0) {
Element codeE = alist.get(0);
String code = codeE.text();
code = code.substring(0, 6);
area.setCode(code);
Element codeN = alist.get(1);
String name = codeN.text();
area.setName(name);
region.getSub().add(area);
} else {
alist = e.select("td");
area.setCode(alist.get(0).text());
area.setName(alist.get(1).text());
region.getSub().add(area);
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
上一篇: Java爬统计局12位区划代码
下一篇: 前端缓存那些事
推荐阅读
-
统计用区划代码和城乡划分代码
-
统计用区划和城乡划分代码,在线爬取代码
-
Python爬虫练习五:爬取 2017年统计用区划代码和城乡划分代码(附代码与全部数据)
-
2016年统计用区划代码和城乡划分代码(截止2016年07月31日) 省市县镇+url
-
Python获取[2016年统计用区划代码和城乡划分代码(截止2016年07月31日)]
-
python爬取2017年统计用区划代码和城乡划分代码(截止2017年10月31日)
-
2018年统计用区划代码和城乡划分代码(截止2018年10月31日)(数据及python爬虫代码)
-
python爬虫练习五(补充): 2018年统计用区划代码和城乡划分代码(附代码与全部数据)
-
java 抓取2016年统计用区划代码和城乡划分代码
-
Java多线程爬虫-Java爬虫-2018国家统计局区划和城乡划分代码以及数据库、json文件