Hadoop之Hive
Hive基本介绍
查询管理存储在HDFS中的大数据集合,提供像SQL的查询语句HiveQL。Hive表的元数据存储在Derby数据库中,Mysql和Oracle也可以作为Hive的存储源
Hive操作
use:切换数据库
hive> use default; OK Time taken: 0.454 seconds hive> show tables; OK categories customers departments intermediate_access_logs order_items orders products tokenized_access_logs wlslog Time taken: 0.404 seconds, Fetched: 9 row(s)
CREATE DATABASE:创建数据库
CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] database_name //数据库名 [COMMENT database_comment] //数据库说明 [LOCATION hdfs_path] //存储路径 [WITH DBPROPERTIES (property_name=property_value, ...)]; //键值
直接创建数据库
hive> CREATE DATABASE IF NOT EXISTS testdb; OK Time taken: 1.45 seconds hive> DESC database extended testdb; OK testdb hdfs://quickstart.cloudera:8020/user/hive/warehouse/testdb.db cloudera USER Time taken: 0.046 seconds, Fetched: 1 row(s)
CREATE TABLE:创建表
ROW FORMAT:DELIMITED,用于分割文件
FIELDS TERMINATED BY:指定字段
LINES TERMINATE BY:行标识
CREATE TABLE wlslog(time_stamp STRING,category STRING, type STRING,servername STRING,code STRING,msg STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'; hive> show tables; OK wlslog Time taken: 0.019 seconds, Fetched: 1 row(s)
LOAD DATA:导入数据
LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename [PARTITION
(partcol1=val1, partcol2=val2 ...)]
导入文件
hive> LOAD DATA LOCAL INPATH 'cdh/wlslog.log' OVERWRITE INTO TABLE wlslog; Loading data to table testdb.wlslog Table testdb.wlslog stats: [numFiles=1, numRows=0, totalSize=717, rawDataSize=0] OK Time taken: 0.757 seconds hive> select * from wlslog; OK Apr-8-2014-7:06:16-PM-PDT Notice WebLogicServer AdminServer BEA-000365 Server state changed to STANDBY Apr-8-2014-7:06:17-PM-PDT Notice WebLogicServer AdminServer BEA-000365 Server state changed to STARTING Apr-8-2014-7:06:18-PM-PDT Notice WebLogicServer AdminServer BEA-000365 Server state changed to ADMIN Apr-8-2014-7:06:19-PM-PDT Notice WebLogicServer AdminServer BEA-000365 Server state changed to RESUMING Apr-8-2014-7:06:20-PM-PDT Notice WebLogicServer AdminServer BEA-000331 Started WebLogic AdminServer Apr-8-2014-7:06:21-PM-PDT Notice WebLogicServer AdminServer BEA-000365 Server state changed to RUNNING Apr-8-2014-7:06:22-PM-PDT Notice WebLogicServer AdminServer BEA-000360 Server started in RUNNING mode Time taken: 0.41 seconds, Fetched: 7 row(s)
复制表结构
CREATE TABLE wlslog_2 LIKE wlslog;插入数据
INSERT OVERWRITE TABLE tablename1 [PARTITION(partcol1=val1,partcol2=val2 ...) [IF NOT EXISTS]] select_statement1 FROM from_statement;
INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1,partcol2=val2 ...)] select_statement1 FROM from_statement;
创建表时插入数据
CREATE TABLE wlslog_copy
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'
STORED AS TEXTFILE(SEQUENCEFILE/ORC/PARQUET)
AS
SELECT time_stamp,category,type,servername,code,msg
FROM wlslog
ALTER TABLE:修改表名、注释、属性、存储
TRUNCATE TABLE:删除数据行
TRUNCATE TABLE table_name [PARTITION partition_spec];
DROP TABLE:删除表
DROP TABLE [IF EXISTS] table_name;