一.hadoop(一个可靠的分布式共享存储和分析系统)
二.实验工具
Oracle VM VirtualBox
CentOS 7
xshell
xftp
三.实验步骤
step1
1.事先准备好一台linux虚拟机(网卡为仅主机模式,最基本的服务器安装)
#systemctl stop firewalld //关闭防火墙
#systemctl disable firewalld //防止开机自启
2.IPADDR 192.168.56.100/24 网关192.168.56.1
#vi /etc/sysconfig/network
NETWORKING=yes
GATEWAY=192.168.56.1
#vi /etc/sysconfig/network-sripts/ifcfg-enp0s3
TYPE=Ethernet
IPADDR=192.168.56.100
NETMASK=255.255.255.0
3.用root连接Xshell,使用xftp上传jdk和hadoop
4.安装jdk和hadoop(检验是否成功)
#cd /usr/local
#rpm -ivh jkd-8u91-linux-64.rpm //用Tab补齐
#cd /usr/
#java
Usage: java [-options] class [args...]
(to execute a class)
or java [-options] -jar jarfile [args...]
(to execute a jar file)
where options include:
-d32 use a 32-bit data model if available
-d64 use a 64-bit data model if available
-server to select the "server" VM
The default VM is server.
-cp <class search path of directories and zip/jar files>
-classpath <class search path of directories and zip/jar files>
A : separated list of directories, JAR archives,
and ZIP archives to search for class files.
-D<name>=<value>
set a system property
-verbose:[class|gc|jni]
enable verbose output
-version print product version and exit
-version:<value>
Warning: this feature is deprecated and will be removed
in a future release.
require the specified version to run
-showversion print product version and continue
-jre-restrict-search | -no-jre-restrict-search
Warning: this feature is deprecated and will be removed
in a future release.
include/exclude user private JREs in the version search
-? -help print this help message
-X print help on non-standard options
-ea[:<packagename>...|:<classname>]
-enableassertions[:<packagename>...|:<classname>]
enable assertions with specified granularity
-da[:<packagename>...|:<classname>]
-disableassertions[:<packagename>...|:<classname>]
disable assertions with specified granularity
-esa | -enablesystemassertions
enable system assertions
-dsa | -disablesystemassertions
disable system assertions
-agentlib:<libname>[=<options>]
load native agent library <libname>, e.g. -agentlib:hprof
see also, -agentlib:jdwp=help and -agentlib:hprof=help
-agentpath:<pathname>[=<options>]
load native agent library by full pathname
-javaagent:<jarpath>[=<options>]
load Java programming language agent, see java.lang.instrument
-splash:<imagepath>
show splash screen with specified image
See http://www.oracle.com/technetwork/java/javase/documentation/index.html for more details.
安装hadoop
#cd local
#ls
#tar -xvf hadoop-2.7.3.tar.gz //注意用Tab补齐
#cd /usr/local
#ls
bin games hadoop-2.7.3.tar.gz jdk-8u91-linux-x64.rpm lib64 sbin src
etc hadoop include lib libexec share
# cd hadoop
# cd etc
# ls
hadoop
# cd h*
# ls
capacity-scheduler.xml httpfs-env.sh mapred-env.sh
configuration.xsl httpfs-log4j.properties mapred-queues.xml.template
container-executor.cfg httpfs-signature.secret mapred-site.xml.template
core-site.xml httpfs-site.xml slaves
hadoop-env.cmd kms-acls.xml ssl-client.xml.example
hadoop-env.sh kms-env.sh ssl-server.xml.example
hadoop-metrics2.properties kms-log4j.properties yarn-env.cmd
hadoop-metrics.properties kms-site.xml yarn-env.sh
hadoop-policy.xml log4j.properties yarn-site.xml
hdfs-site.xml mapred-env.cmd
#vim hadoop-env.sh //配置hadoop环境
//将export JAVA_HOME=${JAVA_HOME}改写为export JAVA_HOME=/usr/java/default
#vim /etc/profile
//zai 末尾添加export PATH=$PATH:/usr/local/hadoop/bin:/usr/local/hadoop/sbin
#source /etc/profile //执行改写的文件,使之生效
#hadoop
Usage: hadoop [--config confdir] [COMMAND | CLASSNAME]
CLASSNAME run the class named CLASSNAME
or
where COMMAND is one of:
fs run a generic filesystem user client
version print the version
jar <jar> run a jar file
note: please use "yarn jar" to launch
YARN applications, not this command.
checknative [-a|-h] check native hadoop and compression libraries availability
distcp <srcurl> <desturl> copy file or directories recursively
archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive
classpath prints the class path needed to get the
credential interact with credential providers
Hadoop jar and the required libraries
daemonlog get/set the log level for each daemon
trace view and modify Hadoop tracing settings
Most commands print help when invoked w/o parameters.
//hadoop 安装成功
5.退出master,将master复制三台服务器:slave1,slave2,slave3
#shutdown -h now
将三台服务器的Ip分别改为192.168.56.101,192.168.56.102,192.168.56.103,名字分别为slave1,slave2,slave3
查看是否都能ping通
systemctl stop firewalld //关闭防火墙
systemctl disable firewalld //关闭开机自启
6.master管理者另外三台服务器(slave1,2,3)(工具--发送键送到所以会话)
#cd /usr/local/hadoop/etc/hadoop
进行配置文件的修改
#vim core-site.xml //四台都要
<configuration>
<property>
<name>fs.defaultFS</name> //名字
<value>hdfs://master:9000</value> //hdfs协议,9000为监听端口
</property>
</configuration>
#vim /etc/hosts
192.168.56.100 master
192.168.56.101 slave1
192.168.56.102 slave2
192.168.56.103 slave3
//在master中
#hdfs namenode .format //格式化
#hadoop-daemon.sh start namenode
starting namenode, logging to /usr/local/ha
# jps
3352 NameNode //成功
3420 Jps
//在slave1,2,3中
#hadoop-daemo.sh start datanode
starting datanode, logging to /usr/local/hadoop/logs/hadoop-root-datanode-slave1.out
#jps
3210 DataNode //成功
3293 Jps
step2
一.HDFS架构
#hdfs dfsadmin -report | more //报告当前集群的情况
#netstat -ntle //查看端口
2.在web中查看(用浏览器访问192.168.56.100:50070)
当前有三个datanode
3.开启关闭集群命令
#hadoop-daemon.sh stop namenode //关闭
#hadoop-daemon.sh start namenode //开启
#jps //用来观察进程情况
4.集中式管理
添加所有机器
#vim slaves
slave1
slave2
slave3
#start-dfs.sh //启动所有机器(需要输入密码)
#jps
5.ssh免密登录
#ssh slave1 //第一次需要输入密码
#exit
#cd //进入根目录
#ls -la
#cd .ssh
#ssh-keygen -t rsa //默认直接回车确认
#ls
id_rsa id_rsa.pub known_hosts //id_rsa是root的私钥,id_rsa.pub是root的公钥
#ssh-copy-id slave1 //传递公钥
#ssh-copy-id slave2
#ssh-copy-id slave3
#ssh-copy-id master //默认本机也是远程登录,所以也需要
stop-dfs.sh //关闭集群
start-dfs.sh //开启集群
6.怎么上传一个文件到集群上(hdfs上默认一个文件是128M,超过就会分成两个,不足默认128M)
#hadoop fs -ls / //列举有哪些文件,应该是空的
#cd
#cd /usr/local
#ls
bin hadoop-2.7.3.tar.gz lib64 src
etc include libexec
games jdk-8u91-linux-x64.rpm sbin
hadoop lib share
#hadoop fs -put ./hadoop-2.7.3.tar.gz /
#hadoop fs -ls / //查看是否成功
7.多机器备份,关闭一台机器,备份会怎样
#cd hadoop
#cd etc
#cd h*
#ls
capacity-scheduler.xml httpfs-env.sh mapred-env.sh
configuration.xsl httpfs-log4j.properties mapred-queues.xml.template
container-executor.cfg httpfs-signature.secret mapred-site.xml.template
core-site.xml httpfs-site.xml slave
hadoop-env.cmd kms-acls.xml slaves
hadoop-env.sh kms-env.sh ssl-client.xml.example
hadoop-metrics2.properties kms-log4j.properties ssl-server.xml.example
hadoop-metrics.properties kms-site.xml yarn-env.cmd
hadoop-policy.xml log4j.properties yarn-env.sh
hdfs-site.xml mapred-env.cmd yarn-site.xml
#vim hdfs-site.xml //修改多少个备份
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
#cd /usr/local
# ls
bin hadoop-2.7.3.tar.gz lib64 src
etc include libexec
games jdk-8u91-linux-x64.rpm sbin
hadoop lib share
# hadoop fs -put jdk-8u91-linux-x64.rpm /
停掉一台机器,看看备份情况
# vim hdfs-site.xml
<property>
<name>dfs.namenode.heartbeat.recheck-interval</name>
<value>10000</value> //每隔10000毫秒更新
</property>
#hadoop-daemon.sh stop datanode
#hadoop-daemon.sh start datanode
step3
1.把数据存储的地方从tmp改为var
# vim core-site.xml
<property>
<name>hadoop.tmp.dir</name>
<value>/var/hadoop</value>
</property>
#hdfs namenode -format
#stop-dfs.sh
#start-dfs.sh