欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

Flink 入门实战之五Source读取kafka数据

程序员文章站 2022-03-14 18:13:37
...

Source读取kafka, 解析json数据

  • 依赖
<!-- 属性配置 -->
    <properties>
        <flink.version>1.11.2</flink.version>
        <kafka.version>0.10.2.1</kafka.version>
        <scope>compile</scope>
    </properties>
    <dependencies>
        <!-- flink 依赖 -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_2.11</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-scala_2.11</artifactId>
            <version>${flink.version}</version>
            <scope>${scope}</scope>
        </dependency>

        <!-- kafka api -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka_2.11</artifactId>
            <version>${flink.version}</version>
            <scope>${scope}</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
            <version>${kafka.version}</version>
        </dependency>
        <!-- json 解析 -->
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.47</version>
        </dependency>
    </dependencies>
  • 源码
package com.day
import java.util.Properties

import com.alibaba.fastjson.JSON
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer

object SourceKafka {
  def main(args: Array[String]): Unit = {
    // 1、获取流式环境变量
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    // 2、从kafka读取数据
    val properties = new Properties()
    properties.setProperty("bootstrap.servers", "jeff200:9092")
    val kfkConsumer = new FlinkKafkaConsumer[String](
      "test",
      new SimpleStringSchema(),
      properties
    )
    //从头开始读取
    kfkConsumer.setStartFromEarliest()
    val dataStream = env.addSource(kfkConsumer)
        .map(x=>{
          val obj = JSON.parseObject(x)
          val ts = obj.getInteger("ts")
          val uid = JSON.parseObject(obj.getString("common")).getInteger("uid")
          (uid, ts)
        })
    // 3、打印数据
    dataStream.print()
    // 4、执行任务
    env.execute("kafka Job")
  }
}
  • 启动kafka并创建主题和写入json数据
// nohup bin/zookeeper-server-start.sh config/zookeeper.properties &
// nohup bin/kafka-server-start.shconfig/server.properties &
// bin/kafka-topics.sh --create --zookeeper jeff200:2181 --replication-factor 1 --partitions 1 --topic test
// bin/kafka-topics.sh --list --zookeeper jeff200:2181
// bin/kafka-console-producer.sh --broker-list jeff200:9092 --topic test
// {"ts": 1607003021, "common": {"uid": 1}, "displays": [{"item": 1, "type": "goods"},{"item": 2, "type": "goods"}]}
  • 程序运行结果
    Flink 入门实战之五Source读取kafka数据

Flink 入门实战之五Source读取kafka数据