impala中常用SQL操作,建表,增加分区,增加统计信息
程序员文章站
2022-07-11 09:09:22
...
impala中常用SQL操作:
- 建表并创建分区,指定存储类型
create EXTRENAL table IF NOT EXISTS testDB.testTable(
a INT,
b BIGINT,
c STRING,
d FLOAT,
e DOUBLE,
f BOOLEAN
) PARTITIONED BY (year INT,month INT,day INT)
stored as parquet
- 从指定位置导入表:
CREATE table testDB.testTable(
a INT,
b BIGINT,
c STRING,
d FLOAT,
e DOUBLE,
f BOOLEAN
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
STORED AS TEXTFILE
LOCATION '/user/test/dataDir';
或者可以先建表,然后倒入:
load data inpath '/user/test/dataDir' into table testDB.testTable;
- select as 建表:
create table IF NOT EXISTS testDB.testTable as
select a as aa ,b as bb from test.testbb
- 查看建表语句
show create table testDB.testTable
- 删除表
drop table if exists testDB.testTable
- 给表增加字段:
alter table testDB.testTable add columns(a1 INT,a2 String)
- 更改字段名称和类型
ALTER TABLE name CHANGE column_name new_name new_type;
- 给表增加分区
alter table testDB.testTable ADD IF NOT EXISTS PARTITION(year=2021,month=202105,day=20210506)
- 删除表分区
alter table testDB.testTable drop IF EXISTS PARTITION(year=2021,month=202105,day=20210506)
- 查看表分区
show partitions testDB.testTable
- 表分区插入数据
insert overwrite testDB.testTable PARTITION(year=2021,month=202105,day=20210506)
select xxxx ... from testDB.testTable_tmp
或者可以不指定一个分区插入,这时候需指定字段
insert overwrite testDB.testTable(a,b,c) PARTITION(year=2021,month=202105,day=20210506,hour)
select a,b,c,hourDay from testDB.testTable_tmp
- 查看字段信息
show column stats testDB.testTable
- 增加表统计信息:
compute stats testDB.testTable
- 增加分区统计信息
compute incremental stats testDB.testTable PARTITION(year=2021,month=202105,day=20210506)
- 刷新表
refresh testDB.testTable
- 刷新表分区
refresh testDB.testTable PARTITION(year=2021,month=202105,day=20210506)
- 刷新表分区
alter table testDB.testTable recover partitions
- 刷新元数据
INVALIDATE METADATA testDB.testTable