欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

Spark读取Hive中的数据加载为DataFrame

程序员文章站 2022-07-14 15:13:05
...

首先要告诉SparkSql,Hive在哪。然后读取Hive中的数据,必须开启enableHiveSupport。

	val spark = SparkSession.builder()
      .appName("hive")
      .enableHiveSupport()
      .getOrCreate()
    //创建student_infos和student_scores表并从本地加载进来数据
    spark.sql("use spark")//使用的库:spark
    spark.sql("drop table if exists student_infos")
    spark.sql("create table if not exists student_infos (name string,age int) row format  delimited fields terminated by '\t'")
    spark.sql("load data local inpath '/root/test/student_infos' into table student_infos")

    spark.sql("drop table if exists student_scores")
    spark.sql("create table if not exists student_scores (name string,score int) row format delimited fields terminated by '\t'")
    spark.sql("load data local inpath '/root/test/student_scores' into table student_scores")

    //    //读取表
    //    val frame: DataFrame = spark.table("student_infos")
    //    frame.show(100)

    //查询语句,显示后把结果保存到hive
    val df = spark.sql("select si.name,si.age,ss.score from student_infos si,student_scores ss where si.name = ss.name")
    df.show(100)

    /**
      * 将结果.saveAsTable存入到hive表中
      */
    spark.sql("drop table if exists good_student_infos")
    df.write.mode(SaveMode.Overwrite).saveAsTable("good_student_infos")