Hadoop 单节点搭建
环境: VirtualBox Unbuntu14.04 LTS
安装JDK
查看当前java版本
java -version
更新最新的软件包信息
sudo apt-get updata
安装JDK
sudo apt-get install default-jdk
再查看java版本
java -version
安装SSH
sudo apt-get install ssh
安装rsync
sudo apt-get install rsync
配置ssh
ssh-keygen -t dsa -p '' -f ~/.ssh/id_dsa # 产生密钥
ll ~/.ssh # 查看是否存在 id_dsa.pub文件
cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys # 将公钥添加到许可证文件中
安装Hadoop
首先保证虚拟机可以链接网络
可以将虚拟机改为 网络地址转换NAT
Hadoop官网 https://archive.apache.org/dist/hadoop/common
选择2.6.0的话 复制该文件的链接使用wget 命令下载
wget https://archive.apache.org/dist/hadoop/common/hadoop-2.6.0/hadoop-2.6.0.tar.gz
解压
sudo tar -zxvf hadoop-2.6.0.tar.gz
移动到指定路径
sudo mv hadoop-2.6.0 /usr/local/hadoop
查看文件是否完整
设置环境变量
sudo gedit ~/.bashrc
export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64 #java 路径
export HADOOP_HOME=/usr/local/hadoop #hadoop 路径
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native #链接库
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib" #链接库
export JAVA_LIBRARY_PATH=$HADOOP_HOME/lib/native:$JAVA_LIBRARY_PATH
使环境变量生效
重启或输入 source ~/.bashrc
修改hadoop配置文件
修改hadoop-env.sh
sudo gedit hadoop-env.sh
# The java implementation to use.
export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64
修改core-site.xml
sudo gedit core-site.xml
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
修改yarn-site.xml
sudo gedit yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
</configuration>
修改mapred-site.xml
sudo scp mapred-site.xml.template mapred-site.xml
sudo gedit mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
修改hdfs-site.xml
sudo gedit hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/local/hadoop/hadoop_data/hdfs/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/local/hadoop/hadoop_data/hdfs/datanode</value>
</property>
</configuration>
创建相应的目录
sudo mkdir -p /usr/local/hadoop/hadoop_data/hdfs/namenode # NameNode存储目录
sudo mkdir -p /usr/local/hadoop/hadoop_data/hdfs/datanode # DataNode 存储目录
sudo chown hduser:hduser -R /usr/local/hadoop # 修改目录所有者
格式化namenode
hadoop namenode -format
启动hadoop
start-dfs.sh
start-yarn.sh
查看
在浏览器中输入http://localhost:8088查看 Hadoop ResourceManager Web 界面
点击Nodes 查看当前节点
在浏览器中输入http://localhost:50070 查看NameNode HDFS Web 界面
点击Datanodes 显示当前启动的Datanode
全部命令参考
1 cd /usr/local/
2 ll
3 rm -rf hadoop/
4 sudo rm -rf hadoop/
5 ll
6 update-alternatives --display java
7 java -version
8 sudo apt-get update
9 sudo apt-get install default-jdk
10 java -version
11 update-alternatives --display java # 查看java安装路径
12 sudo apt-get install ssh
13 sudo apt-get install rsync
14 ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
15 ll ~/.ssh
16 cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
17 wget https://archive.apache.org/dist/hadoop/common/hadoop-2.6.0/hadoop-2.6.0.tar.gz
18 ll
19 sudo tar -zxvf hadoop-2.6.0.tar.gz
20 sudo mv hadoop-2.6.0 /usr/local/hadoop
21 ll /usr/local/hadoop
22 ll
23 cd /usr/local
24 ls
25 cd hadoop/
26 ll
27 cd /
28 cd
29 wget https://archive.apache.org/dist/hadoop/common/hadoop-2.6.0/hadoop-2.6.0.tar.gz.md5
30 wget https://archive.apache.org/dist/hadoop/common/hadoop-2.6.0/hadoop-2.6.0.tar.gz.mds
31 ll
32 ./hadoop-2.6.0.tar.gz.mds
33 sudo ./hadoop-2.6.0.tar.gz.mds
34 cd /usr/local/hadoop/
35 ll
36 cd ~
37 ll
38 wget https://archive.apache.org/dist/hadoop/common/hadoop-2.6.0/hadoop-2.6.0.tar.gz.md5
39 ll
40 md5sum -c hadoop-2.6.0.tar.gz.md5 #检查下载的文件是否完整
41 md5sum -c hadoop-2.6.0.tar.gz.mds
42 md5sum -c hadoop-2.6.0.tar.gz.md5
43 rm -f hadoop-2.6.0.tar.gz
44 rm -f hadoop-2.6.0.tar.gz.1
45 rm -f hadoop-2.6.0.tar.gz.mds
46 ll
47 wget https://archive.apache.org/dist/hadoop/common/hadoop-2.6.0/hadoop-2.6.0.tar.gz
48 sudo tar -zxvf hadoop-2.6.0.tar.gz
49 sudo mv hadoop-2.6.0 /usr/local/hadoop
50 ll /usr/local/
51 cd hadoop
52 ll /usr/local/hadoop/
53 md5sum -c hadoop-2.6.0.tar.gz.md5
54 sudo gedit ~/.bashrc
55 source ~/.bashrc
56 cd /usr/local/hadoop/etc/hadoop/
57 ll
58 sudo gedit hadoop-env.sh
59 sudo gedit core-site.xml
60 sudo gedit yarn-site.xml
61 sudo scp mapred-site.xml.template mapred-site.xml
62 sudo gedit mapred-site.xml
63 sudo gedit hdfs-site.xml
64 sudo mkdir -p /usr/local/hadoop/hadoop_data/hdfs/namenode
65 sudo mkdir -p /usr/local/hadoop/hadoop_data/hdfs/datanode
66 sudo chown hduser:hduser -R /usr/local/hadoop
67 hadoop namenode -format
68 start-dfs.sh
69 start-yarn.sh
70 jps
71 history