前置配置
nmtui
静态IP配置,host配置ssh本机免密登录
ssh-******
ssh-copy-id node01
防火墙配置
systemctl status firewalld.service
systemctl stop firewalld #关闭防火墙
systemctl disable firewalld #禁止开机自启
SELINUX关闭修改/etc/selinux/config下SELINUX=disabledJDK环境从oracle.com/java/technologies/javase/javase-jdk8-downloads.html上下载jdk安装包解压至对应安装目录
vim /etc/profile.d/java.sh
JAVA_HOME=/export/server/jdk1.8.0_251
PATH=/export/server/jdk1.8.0_251/bin:$PATH
export JAVA_HOME PATH
配置环境变量 并source /etc/profile
安装步骤
Hadoop 3.2.1下载
wget http://mirror.bit.edu.cn/apache/hadoop/common/hadoop-3.2.1/hadoop-3.2.1.tar.gz
解压至/export/server/下
tar -zxvf hadoop-3.2.1.tar.gz -C /export/server/
配置环境变量
vim /etc/profile.d/hadoop.sh
export HADOOP_HOME=/export/server/hadoop-3.2.1
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_YARN_HOME=$HADOOP_HOME
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
source /etc/profile
Hadoop配置
core-site.xml
<property>
<name>fs.defaultFS</name>
<value>hdfs://node01:9000</value>
<description>指定HDFS Master(namenode)的通信地址,默认端口</description>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/export/server/hadoop-3.2.1//3.2.1/tmp</value>
<description>指定hadoop运行时产生文件的存储路径</description>
</property>
<property>
<name>hadoop.native.lib</name>
<value>false</value>
<description>是否应使用本机hadoop库(如果存在)</description>
</property>
hdfs-site.xml
<property>
<name>dfs.replication</name>
<value>1</value>
<description>设置数据块应该被复制的份数</description>
</property>
<property>
<name>dfs.safemode.threshold.pct</name>
<value>0</value>
<description>小于等于0意味不进入安全模式,大于1意味一直处于安全模式</description>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
<description>文件操作时的权限检查标识, 关闭</description>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/export/server/hadoop-3.2.1/tmp/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/export/server/hadoop-3.2.1/tmp/dfs/data</value>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>0.0.0.0:9870</value>
</property>
yarn-site.xml
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
<description>NodeManager上运行的附属服务。需配置成mapreduce_shuffle,才可运行MapReduce程序</description>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
<description>yarn模式</description>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
在start-dfs.sh和stop-dfs.sh里添加以下配置
export HDFS_DATANODE_USER=root
export HADOOP_SECURE_DN_USER=root
export HDFS_NAMENODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
在start-yarn.sh和stop-yarn.sh添加以下配置
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
Hadoop格式化
hdfs namenode -format
启动hadoop
start-all.sh
检测
jps可看到以下进程启动
1719 NameNode
2041 SecondaryNameNode
2331 ResourceManager
2460 NodeManager
1837 DataNode
HDFS webUI端口 9870
YARN webUI端口 8088
可以通过自带的wordCount程序进行验证
hadoop jar /export/server/hadoop-3.2.1/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.2.1.jar wordcount /input/test.txt /output/
hadoop fs -ls /output
hdfs dfs -cat /output/part-r-00000
分布式的话修改hdfs-site.xml中的dfs.replication workers中的节点等