Hive SQL
程序员文章站
2022-05-18 17:29:03
...
1.数据类型
data_type
: primitive_type
| array_type
| map_type
| struct_type
| union_type -- (Note: Available in Hive 0.7.0 and later)
## 基本数据类型 ##
primitive_type
: TINYINT
| SMALLINT
| INT
| BIGINT
| BOOLEAN
| FLOAT
| DOUBLE
| DOUBLE PRECISION -- (Note: Available in Hive 2.2.0 and later)
| STRING
| BINARY -- (Note: Available in Hive 0.8.0 and later)
| TIMESTAMP -- (Note: Available in Hive 0.8.0 and later)
| DECIMAL -- (Note: Available in Hive 0.11.0 and later)
| DECIMAL(precision, scale) -- (Note: Available in Hive 0.13.0 and later)
| DATE -- (Note: Available in Hive 0.12.0 and later)
| VARCHAR -- (Note: Available in Hive 0.12.0 and later)
| CHAR -- (Note: Available in Hive 0.13.0 and later)
## 数组 ##
array_type
: ARRAY < data_type >
## map ##
map_type
: MAP < primitive_type, data_type >
## 结构体 ##
struct_type
: STRUCT < col_name : data_type [COMMENT col_comment], ...>
## 联合类型(暂不知其类型) ##
union_type
: UNIONTYPE < data_type, data_type, ... > -- (Note: Available in Hive 0.7.0 and later)
2.表的创建语法
CREATE [TEMPORARY] ##TEMPORARY:临时 ## [EXTERNAL] ##EXTERNAL:外部表## TABLE [IF NOT EXISTS] [db_name.]table_name -- (Note: TEMPORARY available in Hive 0.14.0 and later)
## col_name:表的列名 data_type:类的类型 COMMENT col_comment:注释 ##
[(col_name data_type [COMMENT col_comment], ... [constraint_specification])]
[COMMENT table_comment]
## 分区 ##
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
## CLUSTERED BY :指定用于分桶的列。SORTED BY:对桶中的列进行排序 ##
[CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]
[SKEWED BY (col_name, col_name, ...) -- (Note: Available in Hive 0.10.0 and later)]
ON ((col_value, col_value, ...), (col_value, col_value, ...), ...)
[STORED AS DIRECTORIES]
[
## 对读入的数据进行分割,对应表中中的各个字段##
[ROW FORMAT row_format]
## 存储文件的格式 ##
[STORED AS file_format]
| STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)] -- (Note: Available in Hive 0.6.0 and later)
]
[LOCATION hdfs_path]
[TBLPROPERTIES (property_name=property_value, ...)] -- (Note: Available in Hive 0.6.0 and later)
[AS select_statement]; -- (Note: Available in Hive 0.5.0 and later; not supported for external tables)
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
LIKE existing_table_or_view_name
[LOCATION hdfs_path];
data_type
: primitive_type
| array_type
| map_type
| struct_type
| union_type -- (Note: Available in Hive 0.7.0 and later)
primitive_type
: TINYINT
| SMALLINT
| INT
| BIGINT
| BOOLEAN
| FLOAT
| DOUBLE
| DOUBLE PRECISION -- (Note: Available in Hive 2.2.0 and later)
| STRING
| BINARY -- (Note: Available in Hive 0.8.0 and later)
| TIMESTAMP -- (Note: Available in Hive 0.8.0 and later)
| DECIMAL -- (Note: Available in Hive 0.11.0 and later)
| DECIMAL(precision, scale) -- (Note: Available in Hive 0.13.0 and later)
| DATE -- (Note: Available in Hive 0.12.0 and later)
| VARCHAR -- (Note: Available in Hive 0.12.0 and later)
| CHAR -- (Note: Available in Hive 0.13.0 and later)
array_type
: ARRAY < data_type >
map_type
: MAP < primitive_type, data_type >
struct_type
: STRUCT < col_name : data_type [COMMENT col_comment], ...>
union_type
: UNIONTYPE < data_type, data_type, ... > -- (Note: Available in Hive 0.7.0 and later)
row_format
: DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] [COLLECTION ITEMS TERMINATED BY char]
[MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char]
[NULL DEFINED AS char] -- (Note: Available in Hive 0.13 and later)
| SERDE serde_name [WITH SERDEPROPERTIES (property_name=property_value, property_name=property_value, ...)]
file_format:
: SEQUENCEFILE
| TEXTFILE -- (Default, depending on hive.default.fileformat configuration)
| RCFILE -- (Note: Available in Hive 0.6.0 and later)
| ORC -- (Note: Available in Hive 0.11.0 and later)
| PARQUET -- (Note: Available in Hive 0.13.0 and later)
| AVRO -- (Note: Available in Hive 0.14.0 and later)
| JSONFILE -- (Note: Available in Hive 4.0.0 and later)
| INPUTFORMAT input_format_classname OUTPUTFORMAT output_format_classname
constraint_specification:
: [, PRIMARY KEY (col_name, ...) DISABLE NOVALIDATE ]
[, CONSTRAINT constraint_name FOREIGN KEY (col_name, ...) REFERENCES table_name(col_name, ...) DISABLE NOVALIDATE
3.创建内部表
create table [if not exists] table_name
内部表在删除时,元数据与数据都会被删除
4.创建外部表
create external table [if not exists] table_name LOCATION hdfs_path
LOCATION:数据存储在hdfs中的存储目录
外部表在被删除时,只会删除元数据,数据并不会被删除
5.建表实例
crete table person(
id int,
name string,
age int,
likes array<string>,
address map<string,string>
)
row format delimited
fields terminated by ',' #字段对应的分割符
collection items terminated by '-' #数组对应的分隔符
map keys terminated by ':' #map key-value之间的分割符
lines terminated by '\n' #每条记录的分隔符,一行为一条记录
6.查看表的结构
desc table_name
desc formatted table_name
7.通过已有表建表
7.1 通过已有表复制一个表,不包含表数据
CREATE TABLE new_table LIKE old_table;
7.2 通过查询已有表的一些数据进行新表的创建,新创建的表中有查询到的数据
CREATE TABLE new_table As SELECT o1.columA, o1.columB,o2.colimC FROM old_table1 o1 left join old_table2 o2 on o1.columA = o2.columC;
下一篇: java中获取日期以及由时间生成唯一主键