用Jsoup爬取中国天气网的天气数据并存入数据库
程序员文章站
2022-03-22 20:51:58
...
用Jsoup爬取中国天气网的天气数据并存入数
文章目录
1、Jsoup介绍
1.1 概念:
jsoup 是一款Java 的HTML解析器,可直接解析某个URL地址、HTML文本内容。它提供了一套非常省力的API,可通过DOM,CSS以及类似于jQuery的操作方法来取出和操作数据。
1.2功能:
- 从一个URL,文件或字符串中解析HTML;
- 使用DOM或CSS选择器来查找、取出数据;
- 可操作HTML元素、属性、文本;
1.3 流程图:
2、代码实现部分
2.1项目目录结构
2.2 实现类代码 weather.java
*package com.pojo;
/**
* 实体类
*/
import lombok.Data;
import java.util.Date;
@Data
public class Weather {
private String id;
private String city;
private String date;
private String status;
private String maxTem;
private String minTem;
private String wind;
private String windLevel;
private Date update_time;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getCity() {
return city;
}
public void setCity(String city) {
this.city = city;
}
public String getDate() {
return date;
}
public void setDate(String date) {
this.date = date;
}
public String getStatus() {
return status;
}
public void setStatus(String status) {
this.status = status;
}
public String getMaxTem() {
return maxTem;
}
public void setMaxTem(String maxTem) {
this.maxTem = maxTem;
}
public String getMinTem() {
return minTem;
}
public void setMinTem(String minTem) {
this.minTem = minTem;
}
public String getWind() {
return wind;
}
public void setWind(String wind) {
this.wind = wind;
}
public String getWindLevel() {
return windLevel;
}
public void setWindLevel(String windLevel) {
this.windLevel = windLevel;
}
public Date getUpdate_time() {
return update_time;
}
public void setUpdate_time(Date update_time) {
this.update_time = update_time;
}
public Weather(String id, String city, String date, String status, String maxTem, String minTem, String wind, String windLevel, Date update_time) {
super();
this.id = id;
this.city = city;
this.date = date;
this.status = status;
this.maxTem = maxTem;
this.minTem = minTem;
this.wind = wind;
this.windLevel = windLevel;
this.update_time = update_time;
}
public Weather() {
super();
}
@Override
public String toString() {
return "Weather{" +
"id='" + id + '\'' +
", city='" + city + '\'' +
", date='" + date + '\'' +
", status='" + status + '\'' +
", maxTem='" + maxTem + '\'' +
", minTem='" + minTem + '\'' +
", wind='" + wind + '\'' +
", windLevel='" + windLevel + '\'' +
", update_time=" + update_time +
'}';
}
}*
2.3 解析网页 HtmlParseUtil.java
package com.util;
import com.pojo.Weather;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.net.URL;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.*;
public class HtmlParseUtil {
//解析网页
static String url = "http://www.weather.com.cn/weather15d/101271001.shtml";
static int i = 0;
public static void parseWeather() throws Exception {
Document document = Jsoup.parse(new URL(url), 30000);
Element element = document.getElementById("15d");
// System.out.println(element.html());
//获取所有li标签
Elements elements = element.getElementsByTag("li");
HashMap<Integer, Weather> hash = new HashMap<>();
ListIterator<Element> listIter = elements.listIterator(1);
while (listIter.hasNext()) {
Element e1 = listIter.next();
String date = e1.getElementsByClass("time").eq(0).text();
String status = e1.getElementsByClass("wea").eq(0).text();
String tem = e1.getElementsByClass("tem").eq(0).text();
String wind = e1.getElementsByClass("wind").eq(0).text();
String windLevel = e1.getElementsByClass("wind1").eq(0).text();
String[] tems = tem.split("/");
String maxTem = tems[0];
String minTem = tems[1];
Weather weather = new Weather();
i++;
weather.setDate(date.toString());
weather.setStatus(status.toString());
weather.setMaxTem(maxTem.toString());
weather.setMinTem(minTem.toString());
weather.setWind(wind.toString());
weather.setWindLevel(windLevel.toString());
hash.put(i, weather);
}
Set<Integer> keys = hash.keySet();
for (Integer key : keys) {
Weather value = hash.get(key);
Connection conn = null;
try {
PreparedStatement ps = null;
conn = datautils.getConnection();
String sql = "insert into weather(`date`,`status`,`maxTem`,`minTem`,`updateTime`,`wind`,`windLevel`) values(?,?,?,?,?,?,?)";
ps = conn.prepareStatement(sql);//预编译SQL,先写sql,然后不执行
// ps.setInt(1,1);
ps.setString(1, value.getDate());
ps.setString(2, value.getStatus());
ps.setString(3, value.getMaxTem());
ps.setString(4, value.getMinTem());
ps.setDate(5,new java.sql.Date(new Date().getTime()));
ps.setString(6, value.getWind());
ps.setString(7, value.getWindLevel());
ps.executeUpdate();
conn.close();
} catch (SQLException e) {
System.out.println("数据库访问失败");
e.printStackTrace();
}
System.out.println(key + "," + value.toString());
}
}
public static void main(String[] args) throws Exception, ClassNotFoundException, SQLException{
parseWeather();
}
}
2.4 连接数据库 datautils.java
package com.util;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
public class datautils {
public static final String Driver = "com.mysql.jdbc.Driver";//驱动
public static final String url = "jdbc:mysql://localhost:3306/weather?useUnicode=true&characterEncoding=utf8&useSSL=true";
public static final String user = "root";
public static final String password = "root";
static {
try {
Class.forName(Driver);//加载jdbc驱动
} catch (ClassNotFoundException e) {
System.out.println("找不到驱动程序类,加载驱动失败!");
e.printStackTrace();
}
}
public static Connection getConnection() throws SQLException{
Connection conn = DriverManager.getConnection(url,user,password);//Connection代表数据库
return conn;
}
}
2.5 引入依赖 pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.3.2.RELEASE</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>com</groupId>
<artifactId>weather</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>weather</name>
<description>Demo project for Spring Boot</description>
<properties>
<java.version>1.8</java.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-jdbc</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-jpa</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-r2dbc</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-freemarker</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-jdbc</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-security</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-thymeleaf</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web-services</artifactId>
</dependency>
<dependency>
<groupId>org.mybatis.spring.boot</groupId>
<artifactId>mybatis-spring-boot-starter</artifactId>
<version>2.1.3</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-devtools</artifactId>
<scope>runtime</scope>
<optional>true</optional>
</dependency>
<dependency>
<groupId>dev.miku</groupId>
<artifactId>r2dbc-mysql</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.junit.vintage</groupId>
<artifactId>junit-vintage-engine</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>io.projectreactor</groupId>
<artifactId>reactor-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.security</groupId>
<artifactId>spring-security-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.7.3</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>
注:后期需要做一些东西,所以引入的依赖比较多
3、效果图
3.1 用Jsoup爬取到的一部分数据
3.2 引入数据库
注:暂时还不齐全
4、过程中遇到的部分问题
–数据库连接不成功
4.1 MySQL问题
在写代码的过程中,由于自己还不会用mysql,就出现了一系列愚蠢的问题,不过在后来解决了。
具体问题:将MySQL换成了PHP,一时不知道自己新建的数据库去了那里,也打不开。
大致如此:经过排查,是PHP中数据库的密码错误,导致数据库打不开,所以一定要细心,保持navicathe 和php中数据库的密码一致。
4.2 IDEA与数据库的连接问题
4.2.1 jar包
需要将mysql-connector-java-5.1.44.jar引入项目结果的lib文件夹下,必不可少,可以是其他版本的
最开始导入的时候不能进行分级,所以还需要再设置一下。
步骤:
然后点ok就行了。
4.2.2 使用IDEA连接数据库
1.使用JDBC API 连接和访问数据库,一般分为以下5个步骤
(1)加载驱动程序
(2)建立连接对象
(3)创建语句对象
(4)获得SQL语句的执行结果
(5)关闭建立的对象,释放资源
2.①这是数据库窗口页面,里面可以进行一切数据库操作
②点击Database,出现下面界面,按照顺序进行,进入设置
③设置好自己的数据库的相关内容
点击Test Connection 后可能会出现下面的问题:
有三个解决方案:
可参考此博客:添加链接描述
最终结果是这样:
有待改进~~~///(v)\~~~
上一篇: 值得了解的6大React组件文档化工具(推荐收藏)
下一篇: js如何调用php方法参数