kylin
程序员文章站
2024-03-22 16:31:28
...
配置
分发hive并建立软链接
cdh1:
deploy.sh hive-1.1.0-cdh5.10.0 /root/app/ slave
cdh2,cdh3:
ln -s hive-1.1.0-cdh5.10.0 hive
环境变量[所有节点]
vi /etc/profile
export HADOOP_HOME=/root/app/hadoop
export PATH=$HADOOP_HOME/bin:$PATH
export HBASE_HOME=/root/app/hbase
export PATH=$HBASE_HOME/bin:$PATH
export HIVE_HOME=/root/app/hive
export HIVE_CONF_HOME=$HIVE_HOME/conf
export PATH=$HIVE_HOME/bin:$PATH
export KAFKA_HOME=/root/app/kafka
export PATH=$KAFKA_HOME/bin:$PATH
export HCAT_HOME=$HIVE_HOME/hcatalog
export PATH=$HCAT_HOME/bin:$PATH
export KYLIN_HOME=/root/app/kylin
export PATH=$KYLIN_HOME/bin:$PATH
source /etc/profile
kylin.properties[cdh1]
kylin.server.mode=all
kylin.server.cluster-servers=cdh1:7070,cdh2:7070,cdh3:7070
kylin.job.jar=/root/app/kylin/lib/kylin-job-2.3.2.jar
kylin.coprocessor.local.jar=/root/app/kylin/lib/kylin-coprocessor-2.3.2.jar
分发kylin并建立软链接
cdh1:
deploy.sh apache-kylin-2.3.2-bin /root/app/ slave
cdh2,cdh3:
ln -s apache-kylin-2.3.2-bin kylin
kylin.properties[cdh2,cdh3]
vi kylin.properties
kylin.server.mode=query
服务启动
#启动zookeeper集群
runRemoteCmd.sh "/root/app/zookeeper/bin/zkServer.sh start" all [cdh1]
#启动hdfs集群
/root/app/hadoop/sbin/start-dfs.sh [cdh1]
#启动yarn集群
/root/app/hadoop/sbin/start-yarn.sh [cdh1]
/root/app/hadoop/sbin/yarn-daemon.sh start resourcemanager [cdh2]
#开启 jobhistoryserver
/root/app/hadoop/sbin/mr-jobhistory-daemon.sh start historyserver [cdh1]
#启动 HBase 集群
/root/app/hbase/bin/start-hbase.sh [cdh1]
#启动 Hive
--启动之前确保每个服务器能连上mysql
runRemoteCmd.sh "/root/app/hive/bin/hive --service metastore > /root/data/log/hive.log 2>&1 &" all
#启动 Kafka 集群【可选】
runRemoteCmd.sh "/root/app/kafka/bin/kafka-server-start.sh /root/app/kafka/config/server.properties > /root/data/log/kafka.log 2>&1 &" all
runRemoteCmd.sh "jps" all
*******************cdh1***********************
6146 Jps
3460 HRegionServer
1797 NameNode
5831 Kafka
2092 JournalNode
1901 DataNode
2478 ResourceManager
3313 HMaster
2579 NodeManager
1558 QuorumPeerMain
2358 DFSZKFailoverController
3004 JobHistoryServer
5023 RunJar
*******************cdh2***********************
2801 HRegionServer
2225 DFSZKFailoverController
2020 JournalNode
1925 DataNode
2342 NodeManager
1657 QuorumPeerMain
5401 Jps
5098 Kafka
1850 NameNode
2891 HMaster
4253 RunJar
2527 ResourceManager
*******************cdh3***********************
3397 RunJar
4421 Jps
1941 JournalNode
1846 DataNode
2056 NodeManager
4121 Kafka
2377 HRegionServer
1658 QuorumPeerMain
Kylin依赖检查[所有节点]
./check-env.sh
./find-hive-dependency.sh
./find-hbase-dependency.sh
启动 kylin 服务
runRemoteCmd.sh "/root/app/kylin/bin/kylin.sh start > /root/data/log/kylin.log 2>&1 &" all
访问地址 web ui 访问 kylin
默认秘钥:admin/KYLIN
官网案例
加载脚本
./sample.sh
Restart Kylin Server or click Web UI => System Tab => Reload Metadata to take effect
构建cube
监控
cube构建完成
数据查询
select * from kylin_sales
select count(*) from kylin_sales
select part_dt,sum(price) as total_selled,count(distinct seller_id) as
sellers from kylin_sales group by part_dt order by part_dt
Windows防火墙给对应IP开通策略
实际案例
集群规划
|
cdh1 |
cdh2 |
cdh3 |
Zookeeper |
QuorumPeerMain |
QuorumPeerMain |
QuorumPeerMain |
HDFS |
NN,DN |
NN,DN |
DN |
YARN |
RM,NM |
RM,NM |
NM |
Hive |
RUNJAR |
RUNJAR |
RUNJAR |
HBase |
HMaster,RS |
HMaster,RS |
RS |
Mysql |
Mysql |
|
|
Kylin |
RUNJAR |
RUNJAR |
RUNJAR |
服务启动
#启动zookeeper集群
runRemoteCmd.sh "/root/app/zookeeper/bin/zkServer.sh start" all [cdh1]
#启动hdfs集群
/root/app/hadoop/sbin/start-dfs.sh [cdh1]
#启动yarn集群
/root/app/hadoop/sbin/start-yarn.sh [cdh1]
/root/app/hadoop/sbin/yarn-daemon.sh start resourcemanager [cdh2]
#开启 jobhistoryserver
/root/app/hadoop/sbin/mr-jobhistory-daemon.sh start historyserver [cdh1]
#启动 HBase 集群
/root/app/hbase/bin/start-hbase.sh [cdh1]
#启动 Hive
--启动之前确保每个服务器能连上mysql
runRemoteCmd.sh "/root/app/hive/bin/hive --service metastore > /root/data/log/hive.log 2>&1 &" all
#启动 Kafka 集群【可选】
runRemoteCmd.sh "/root/app/kafka/bin/kafka-server-start.sh /root/app/kafka/config/server.properties > /root/data/log/kafka.log 2>&1 &" all
runRemoteCmd.sh "jps" all
*******************cdh1***********************
6146 Jps
3460 HRegionServer
1797 NameNode
5831 Kafka
2092 JournalNode
1901 DataNode
2478 ResourceManager
3313 HMaster
2579 NodeManager
1558 QuorumPeerMain
2358 DFSZKFailoverController
3004 JobHistoryServer
5023 RunJar
*******************cdh2***********************
2801 HRegionServer
2225 DFSZKFailoverController
2020 JournalNode
1925 DataNode
2342 NodeManager
1657 QuorumPeerMain
5401 Jps
5098 Kafka
1850 NameNode
2891 HMaster
4253 RunJar
2527 ResourceManager
*******************cdh3***********************
3397 RunJar
4421 Jps
1941 JournalNode
1846 DataNode
2056 NodeManager
4121 Kafka
2377 HRegionServer
1658 QuorumPeerMain
Kylin依赖检查[所有节点]
./check-env.sh
./find-hive-dependency.sh
./find-hbase-dependency.sh
启动 kylin 服务
runRemoteCmd.sh "/root/app/kylin/bin/kylin.sh start > /root/data/log/kylin.log 2>&1 &" all
数据文件准备
create_table.sql
DROP TABLE IF EXISTS employee;
CREATE TABLE employee(
id int,
name string,
deptId int,
age int,
salary float
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE;
DROP TABLE IF EXISTS department;
CREATE TABLE department(
id int,
name string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE;
LOAD DATA INPATH '/kylin-test/employee.csv' OVERWRITE INTO TABLE employee;
LOAD DATA INPATH '/kylin-test/department.csv' OVERWRITE INTO TABLE department;
department.csv
10 department10
11 department11
12 department12
13 department13
14 department14
15 department15
16 department16
17 department17
18 department18
19 department19
20 department20
21 department21
22 department22
23 department23
24 department24
25 department25
26 department26
27 department27
28 department28
29 department29
employee.csv
1 user1 27 28 21435.205
2 user2 29 48 22004.803
3 user3 15 38 16219.985
4 user4 13 27 20613.082
5 user5 28 21 16987.768
6 user6 17 41 11968.506
7 user7 19 47 24554.438
8 user8 26 39 12676.427
9 user9 19 47 6063.39
10 user10 10 29 18110.506
11 user11 12 22 20258.072
12 user12 13 33 12342.235
13 user13 14 21 8234.231
14 user14 25 34 9876.126
15 user15 13 33 20234.891
16 user16 24 24 5512.45
17 user17 28 26 10234.56
18 user18 19 32 16342.57
19 user19 19 39 38283.345
20 user20 12 38 26135.237
创建hdfs目录
./hdfs dfs -mkdir /kylin-test
上传文件至hdfs
./hdfs dfs -put /root/data/file/* /kylin-test
在 hive 中执行 create_table.sql
bin/hive -f create_table.sql
查看员工表数据
bin/hive -e "use default;select * from employee"
查看部门表数据
bin/hive -e "use default;select * from department"
数据同步给kylin
创建project
加载hive数据到kylin
同步hive元数据
新建model
新建cube
构建cube
kylin查询
count
max
grouy by