使用 BulkProcessor 批量导入mysql数据到Elasticsearch
程序员文章站
2022-06-11 11:44:21
...
1. pom文件
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.rpp</groupId>
<artifactId>elasticsearch-demo</artifactId>
<version>1.0-SNAPSHOT</version>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.3.2.RELEASE</version>
<relativePath/>
</parent>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<java.version>1.8</java.version>
</properties>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-dependencies</artifactId>
<version>Hoxton.SR6</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.6.2</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-thymeleaf</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-configuration-processor</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<scope>runtime</scope>
</dependency>
<!--elasticsearch-->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>
<!--devtools热部署-->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-devtools</artifactId>
<optional>true</optional>
<scope>runtime</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>
2. 配置类
TransportClient 在 Elasticsearch 7 过时并且 Elasticsearch 8将会被移除,建议使用RestHighLevelClient 操作。
import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.data.elasticsearch.client.ClientConfiguration;
import org.springframework.data.elasticsearch.client.RestClients;
@Configuration
public class EsConfig {
@Value("${spring.elasticsearch.rest.uris}")
private String hostlist;
@Bean
public RestHighLevelClient client() {
//解析hostlist配置信息
String[] split = hostlist.split(",");
// //创建HttpHost数组,其中存放es主机和端口的配置信息
// HttpHost[] httpHostArray = new HttpHost[split.length];
// for(int i=0;i<split.length;i++){
// String item = split[i];
// System.out.println(item);
// httpHostArray[i] = new HttpHost(item.split(":")[0], Integer.parseInt(item.split(":")[1]), "http");
// }
// //创建RestHighLevelClient客户端
// return new RestHighLevelClient(RestClient.builder(httpHostArray));
final ClientConfiguration clientConfiguration = ClientConfiguration.builder()
.connectedTo(split)
.build();
return RestClients.create(clientConfiguration).rest();
}
}
spring:
elasticsearch:
rest:
uris: 192.168.8.31:9200,192.168.8.31:9201,192.168.8.31:9202
详细配置示例如下:
HttpHeaders httpHeaders = new HttpHeaders();
httpHeaders.add("some-header", "on every request")
ClientConfiguration clientConfiguration = ClientConfiguration.builder()
.connectedTo("localhost:9200", "localhost:9291")
.useSsl()
.withProxy("localhost:8888")
.withPathPrefix("ela")
.withConnectTimeout(Duration.ofSeconds(5))
.withSocketTimeout(Duration.ofSeconds(3))
.withDefaultHeaders(defaultHeaders)
.withBasicAuth(username, password)
.withHeaders(() -> {
HttpHeaders headers = new HttpHeaders();
headers.add("currentTime", LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME));
return headers;
})
. // ... other options
.build();
同样的ElasticsearchTemplate在 ES7也被ElasticsearchRestTemplate替换
@Configuration
public class RestClientConfig extends AbstractElasticsearchConfiguration {
@Override
public RestHighLevelClient elasticsearchClient() {
return RestClients.create(ClientConfiguration.localhost()).rest();
}
// no special bean creation needed
}
基类AbstractElasticsearchConfiguration已经提供了ElasticsearchRestTemplate Bean的初始化
3. 导入代码
private void writeMySQLDataToES(String tableName) {
BulkProcessor bulkProcessor = getBulkProcessor(client);
Connection connection = null;
PreparedStatement ps = null;
ResultSet rs = null;
try {
connection = DBHelper.getConn();
logger.info("start handle data :" + tableName);
String sql = "select * from " + tableName;
ps = connection.prepareStatement(sql, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
// 根据自己需要设置 fetchSize
ps.setFetchSize(20);
rs = ps.executeQuery();
ResultSetMetaData colData = rs.getMetaData();
ArrayList<HashMap<String, String>> dataList = new ArrayList<>();
HashMap<String, String> map = null;
int count = 0;
// c 就是列的名字 v 就是列对应的值
String c = null;
String v = null;
while (rs.next()) {
count++;
map = new HashMap<String, String>(128);
for (int i = 1; i < colData.getColumnCount(); i++) {
c = colData.getColumnName(i);
v = rs.getString(c);
map.put(c, v);
}
dataList.add(map);
// 每1万条 写一次 不足的批次的数据 最后一次提交处理
if (count % 10000 == 0) {
logger.info("mysql handle data number:" + count);
// 将数据添加到 bulkProcessor
for (HashMap<String, String> hashMap2 : dataList) {
bulkProcessor.add(new IndexRequest(POSITION_INDEX).source(hashMap2));
}
// 每提交一次 清空 map 和 dataList
map.clear();
dataList.clear();
}
}
// 处理 未提交的数据
for (HashMap<String, String> hashMap2 : dataList) {
bulkProcessor.add(new IndexRequest(POSITION_INDEX).source(hashMap2));
}
bulkProcessor.flush();
} catch (SQLException e) {
e.printStackTrace();
} finally {
try {
rs.close();
ps.close();
connection.close();
boolean terinaFlag = bulkProcessor.awaitClose(150L, TimeUnit.SECONDS);
logger.info(terinaFlag);
} catch (Exception e) {
e.printStackTrace();
}
}
}
private BulkProcessor getBulkProcessor(RestHighLevelClient client) {
BulkProcessor bulkProcessor = null;
try {
BulkProcessor.Listener listener = new BulkProcessor.Listener() {
@Override
public void beforeBulk(long executionId, BulkRequest request) {
logger.info("Try to insert data number : "
+ request.numberOfActions());
}
@Override
public void afterBulk(long executionId, BulkRequest request,
BulkResponse response) {
logger.info("************** Success insert data number : "
+ request.numberOfActions() + " , id: " + executionId);
}
@Override
public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
logger.error("Bulk is unsuccess : " + failure + ", executionId: " + executionId);
}
};
BiConsumer<BulkRequest, ActionListener<BulkResponse>> bulkConsumer = (request, bulkListener) -> client
.bulkAsync(request, RequestOptions.DEFAULT, bulkListener);
bulkProcessor = BulkProcessor.builder(bulkConsumer, listener)
.setBulkActions(5000)
.setBulkSize(new ByteSizeValue(100L, ByteSizeUnit.MB))
.setConcurrentRequests(10)
.setFlushInterval(TimeValue.timeValueSeconds(100L))
.setBackoffPolicy(BackoffPolicy.constantBackoff(TimeValue.timeValueSeconds(1L), 3))
.build();
} catch (Exception e) {
e.printStackTrace();
try {
bulkProcessor.awaitClose(100L, TimeUnit.SECONDS);
} catch (Exception e1) {
logger.error(e1.getMessage());
}
}
return bulkProcessor;
}
获取数据库连接工具类,导入的时候为了效率高直接使用底层的JDBC进行批量操作
import java.sql.Connection;
import java.sql.DriverManager;
public class DBHelper {
public static final String url = "jdbc:mysql://192.168.8.31:3306/position?useUnicode=true&characterEncoding=utf-8&serverTimezone=Asia/Shanghai&useSSL=false";
public static final String name = "com.mysql.cj.jdbc.Driver";
public static final String user = "root";
public static final String password = "root";
private static Connection connection = null;
public static Connection getConn(){
try {
Class.forName(name);
connection = DriverManager.getConnection(url,user,password);
}catch (Exception e){
e.printStackTrace();
}
return connection;
}
}
推荐阅读
-
java实现批量导入.csv文件到mysql数据库
-
php中使用ExcelFileParser处理excel获得数据(可作批量导入到数据库使用)
-
Java利用MYSQL LOAD DATA LOCAL INFILE实现大批量导入数据到MySQL
-
使用PHPExcel实现数据批量导入到数据库
-
解决大批量数据插入mysql问题:使用mysqlimport工具从文件中导入
-
Elasticsearch-MySQL数据导入到Elasticsearch中
-
logstash从MySQL导入数据到ElasticSearch的配置
-
使用 BulkProcessor 批量导入mysql数据到Elasticsearch
-
elasticsearch 数据导入到 mysql
-
批量导入GIS数据到Elasticsearch中