hive on spark
程序员文章站
2022-04-29 09:05:48
...
version:
apache-hive-3.1.1-bin
spark-2.4.4-bin-hadoop2-without-hive
hive官方文档:
##HIVE##
vi conf/hive-site.xml
<property>
<name>hive.execution.engine</name>
<value>spark</value>
</property>
<property>
<name>spark.master</name>
<value>yarn</value>
</property>
<property>
<name>spark.eventLog.enabled</name>
<value>true</value>
</property>
<property>
<name>spark.eventLog.dir</name>
<value>hdfs://master:9000/spark/historyLog</value>
</property>
<property>
<name>spark.executor.memory</name>
<value>512m</value>
</property>
<property>
<name>spark.executor.cores</name>
<value>2</value>
</property>
<property>
<name>spark.yarn.driver.memoryOverhead</name>
<value>400</value>
</property>
<property>
<name>spark.serializer</name>
<value>org.apache.spark.serializer.KryoSerializer</value>
</property>
<!--property> <!--版本兼容问题,存在两个版本spark-->
<name>spark.yarn.jars</name>
<value>hdfs://master:9000/spark/jars/*</value>
</property-->
vi hive-env.sh
export SPARK_HOME=/opt/bigdata/spark-2.4.4
##SPARK##
#1. 配置mvn加速镜像
vi /home/spark/bigdata/spark-2.4.4/build/apache-maven-3.5.4/conf/settings.xml
<mirror>
<id>alimaven</id>
<name>aliyun maven</name>
<url>http://maven.aliyun.com/nexus/content/groups/public/</url>
<mirrorOf>central</mirrorOf>
</mirror>
#2.下载并解压缩spark源代码
tar -zxf spark-2.4.4.tgz
#3.cd目录
cd /opt/bigdata/spark-2.4.4
#4.构建
./dev/make-distribution.sh --name "hadoop2-without-hive" --tgz "-Pyarn,hadoop-provided,hadoop-2.7,parquet-provided,orc-provided"
#5.构建完成后删除spark-2.4.4源代码,将spark-2.4.4-bin-hadoop2-without-hive.tgz解压至spark-2.4.4目录
#6.添加依赖jar
cp minlog-1.3.0.jar SPARK_HOME/jars
cp commons-logging-1.1.3.jar SPARK_HOME/jars
cp slf4j-log4j12-1.7.10.jar SPARK_HOME/jars
cp slf4j-api-1.7.10.jar SPARK_HOME/jars
cp SPARK_HOME/jars/scala-library-2.11.12.jar HIVE_HOME/lib
cp SPARK_HOME/jars/spark-core*.jar HIVE_HOME/lib
cp SPARK_HOME/jars/spark-network-common HIVE_HOME/lib
#7. 修改环境变量
vi /etc/profile
# spark
export SPARK_HOME=/opt/bigdata/spark-2.4.0-bin-hadoop2.7 #与hive3.1不兼容
#export SPARK_HOME=/opt/bigdata/spark-2.4.4
#export PATH=$SPARK_HOME/bin:$PATH
source /etc/profile
vi spark-env.sh
export JAVA_HOME=/opt/jdk1.8.0_201
export SCALA_HOME=/home/spark/bigdata/scala-2.12.8
export HADOOP_CONF_DIR=/home/spark/bigdata/hadoop-2.7.7/etc/hadoop
export SPARK_DIST_CLASSPATH=$(/opt/bigdata/hadoop-2.7.7/bin/hadoop classpath)
问题:版本不兼容
解决:##SPARK##