一. 前提条件
启动hdfs集群
start-all.sh
192.168.4.31:50070
启动hive的metastore服务
node2上启动:
service mysqld restart
cd /opt/apache-hive-1.2.1-bin/bin
./hive --service metastore
没反应就是启动成功
ctrl +c
再执行 ./hive --service metastore &
等一会后,node1上执行
$ hive
hive> show tables;
OK
启动 spark 集群
cd /opt/spark-2.1.0-bin-hadoop2.7/sbin
./start-all.sh
二. 安装配置
node1 上
cd /opt/spark-2.1.0-bin-hadoop2.7/conf
cp /opt/apache-hive-1.2.1-bin/conf/hive-site.xml ./
cd /opt/spark-2.1.0-bin-hadoop2.7/bin
vim log4j.properties
添加如下配置
log4j.logger.org.apache.spark.sql.SQLContext=WARN
log4j.logger.org.apache.spark.sql.catalyst.analysis.Analyzer=WARN
log4j.logger.org.apache.spark=WARN
log4j.logger.org.apache.spark.storage.BlockManagerMasterActor=WARN
log4j.logger.org.apache.spark.HeartbeatReceiver=WARN
log4j.logger.org.apache.spark.scheduler.local.LocalActor=WARN
启动 sqarksql 的 client
./spark-sql --master spark://192.168.4.31:7077 --executor-memory 512m
show table;
select * from person;
注意如果 加上limit 关键字就会秒出,不跑spark任务,会直接从本地文件里取
vim hive-site.xml
在spark的conf目录下配置hive-site.xml,添加配置:
启动 spark thrift server 即把sparksql 启成一个服务
cd /opt/spark-2.1.0-bin-hadoop2.7/sbin
./start-thriftserver.sh --master spark://192.168.4.31:7077 --executor-memory 512M
cd /opt/spark-2.1.0-bin-hadoop2.7/bin
./beeline
!connect jdbc:hive2://spark1:10000
show tables;