记录一个mysql大量数据快速插入的方法
经过数月的历练与开发,终于在这个月手上负责的一个项目即将交付了,看着这一两个月让我偶感压力的繁杂的事情一件件处理完毕,原本动荡的心静也渐渐平静了些许。此时再回过头过去看那些困扰我N多个日夜的某些问题其实也不过如此。有些事情自己无法改变,只能乐观地坦然地面对。面对充满未知的未来努力让自己保持每一天都快乐才是王道,哈哈!
在本周将一个需要处理上千万数据量的项目部署到UAT环境后,发现数据插入到mysql(mariadb)的时间太慢了,尽管已经根据项目需要将原来的innodb存储引擎修改为了更适合大量数据插入和查询效率更高的myisam引擎。通过对代码再次进行分析后发现是由于代码对于数据插入到mysql的过程中耗费了大量的时间,由于myisam又不支持事务,不能设置批量插入,思量后决定将之前的单条insert语句修改为多条数据的insert语句或许性能就会有所提升,正好趁着今天放假将这块的代码先写个DEMO给测试下。
测试代码
测试代码如下:
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
@RequestMapping(value = "mysql-test")
@RestController
public class MysqlTestController {
@Autowired
private JdbcTemplate jdbcTemplate;
@GetMapping(value = "mulTest")
public String mulTest(Integer size, @RequestParam(defaultValue = "false") boolean sharding) {
initUserInfoTable();
BlockingQueue<UserInfo> userInfoQueue = initUserInfoQueue(size);
return size + " 条记录插入,耗时" + processQueue(userInfoQueue, sharding);
}
/**
* 初始化userInfo表
*/
private void initUserInfoTable() {
String userInfoTableDdlStr = "create table if not exists `userinfo` (\n" +
" `userId` int(11) DEFAULT NULL,\n" +
" `userName` varchar(255) DEFAULT NULL,\n" +
" `gender` varchar(255) DEFAULT NULL,\n" +
" `phone` varchar(255) DEFAULT NULL,\n" +
" `address` varchar(255) DEFAULT NULL,\n" +
" `birthDay` bigint(20) DEFAULT NULL,\n" +
" `school` varchar(255) DEFAULT NULL,\n" +
" `hobby` varchar(255) DEFAULT NULL,\n" +
" KEY `index_userid` (`userId`)\n" +
") ENGINE=MyISAM DEFAULT CHARSET=utf8";
jdbcTemplate.execute(userInfoTableDdlStr);
}
private long processQueue(BlockingQueue<UserInfo> userInfoQueue, boolean sharding) {
int mulSize = 200;
long beginTime = System.currentTimeMillis();
try {
if (sharding) {
while (!userInfoQueue.isEmpty()) {
UserInfo userInfo = userInfoQueue.take();
StringBuffer singleInserSql = new StringBuffer();
singleInserSql.append("INSERT INTO `userInfo` (`userId`, `userName`, `gender`, `phone`, `address`, `birthDay`, `school`, `hobby`) VALUES (?,?,?,?,?,?,?,?)");
jdbcTemplate.update(singleInserSql.toString(), new Object[]{
userInfo.getUserId(), userInfo.getUserName(), userInfo.getGender(), userInfo.getPhone(), userInfo.getAddress(), userInfo.getBirthDay().getTime() / 1000, userInfo.getSchool(), userInfo.getHobby()
});
}
} else {
String insertSqlStr = genMulFixSqlStr(mulSize);
List<UserInfo> userInfoList = new ArrayList<>();
while (!userInfoQueue.isEmpty()) {
UserInfo userInfo = userInfoQueue.take();
userInfoList.add(userInfo);
if (userInfoList.size() == mulSize) {
Object[] params = getMulParams(userInfoList, userInfoList.size());
jdbcTemplate.update(insertSqlStr, params);
userInfoList.clear();
}
}
//插入剩余的数据
if (!userInfoList.isEmpty()) {
String restInsertSql = genMulFixSqlStr(userInfoList.size());
Object[] params = getMulParams(userInfoList, userInfoList.size());
jdbcTemplate.update(restInsertSql, params);
userInfoList.clear();
}
}
} catch (InterruptedException e) {
e.printStackTrace();
}
long endTime = System.currentTimeMillis();
return endTime - beginTime;
}
private Object[] getMulParams(List<UserInfo> userInfoList, int mulSize) {
Object[] params = new Object[mulSize * 8];
for (int i = 0; i < mulSize; i++) {
UserInfo userInfo = userInfoList.get(i);
params[8 * i] = userInfo.getUserId();
params[8 * i + 1] = userInfo.getUserName();
params[8 * i + 2] = userInfo.getGender();
params[8 * i + 3] = userInfo.getPhone();
params[8 * i + 4] = userInfo.getAddress();
params[8 * i + 5] = userInfo.getBirthDay().getTime() / 1000;
params[8 * i + 6] = userInfo.getSchool();
params[8 * i + 7] = userInfo.getHobby();
}
return params;
}
private String genMulFixSqlStr(int mulSize) {
StringBuffer mulInserSql = new StringBuffer();
mulInserSql.append("INSERT INTO `userInfo` (`userId`, `userName`, `gender`, `phone`, `address`, `birthDay`, `school`, `hobby`) VALUES (?,?,?,?,?,?,?,?)");
for (int i = 1; i < mulSize; i++) {
mulInserSql.append(",(?,?,?,?,?,?,?,?)");
}
return mulInserSql.toString();
}
private BlockingQueue<UserInfo> initUserInfoQueue(Integer size) {
BlockingQueue<UserInfo> userInfoQueue = new LinkedBlockingQueue<>();
for (int i = 0; i <= size; i++) {
UserInfo userInfo = new UserInfo();
userInfo.setAddress("shanghai" + i);
userInfo.setBirthDay(new Date());
userInfo.setGender(i % 2 == 0 ? "male" : "famale");
userInfo.setHobby("programing,basketball,reading,driving");
userInfo.setPhone("15700" + i);
userInfo.setSchool("njCoolege");
userInfo.setUserId(i);
userInfo.setUserName("ping" + i);
userInfoQueue.add(userInfo);
}
return userInfoQueue;
}
private class UserInfo {
private Integer userId;
private String userName;
private String gender;
private String phone;
private String address;
private Date birthDay;
private String school;
private String hobby;
public Integer getUserId() {
return userId;
}
public void setUserId(Integer userId) {
this.userId = userId;
}
public String getUserName() {
return userName;
}
public void setUserName(String userName) {
this.userName = userName;
}
public String getGender() {
return gender;
}
public void setGender(String gender) {
this.gender = gender;
}
public String getPhone() {
return phone;
}
public void setPhone(String phone) {
this.phone = phone;
}
public String getAddress() {
return address;
}
public void setAddress(String address) {
this.address = address;
}
public Date getBirthDay() {
return birthDay;
}
public void setBirthDay(Date birthDay) {
this.birthDay = birthDay;
}
public String getSchool() {
return school;
}
public void setSchool(String school) {
this.school = school;
}
public String getHobby() {
return hobby;
}
public void setHobby(String hobby) {
this.hobby = hobby;
}
}
}
快速敲完后,立即试试插入1万条数据试试效率如何
分别访问:A.http://localhost:8080/mysql-test/mulTest?size=10000&sharding=true
B.http://localhost:8080/mysql-test/mulTest?size=10000&sharding=true
两个URL后发现之前的单条插入的sql语句插入10000条时间花费了9721多毫秒
而修改后的多条SQL插入10000条数据仅花费了373毫秒
通过简单的对比测试后,可知多条数据批量插入的效率确实比单条数据插入的数据高上许多倍,比想象之中的预期还要高上许多。
有了上面的实践数据作为有力的理论后盾,得了,为了项目能更好地上线,再在家里加个班吧,写代码去了。加油!!!
总结
最后还是再总结一下
对于需要向mysql中插入大量数据可通过如下方式进行:
1.将表的存储引擎修改为myisam
2.插入数据时采用批量插入方式进行