#数据平台实战
#1配置maven,tomcat,cbord的过程略
#2.在node01 启动mysql
mysql -h192.168.183.101 -uhive -phive123
creat database bi;
在cboard中配置数据源
数据源类型
新建数据源名称 bi_mysql
驱动类 bi_mysql
连接串 jdbc:mysql://192.168.183.101:3306/bi
数据库用户名 hive
数据库名 hive123
#3在kafka上创建实时日志
在node03上
cd /home/hadoop/apps/kafka_2.11-0.10.2.1
bin/kafka-topics.sh --create --zookeeper 192.168.183.101:2181 --replication-factor 1 --partitions 3 --topic userlog2
bin/kafka-topics.sh --list --zookeeper 192.168.183.101:2181
#使用kafka自带的消费者客户端脚本
bin/kafka-console-consumer.sh --zookeeper 192.168.183.101:2181 --from-beginning --topic userlog2
#在node01上
mkdir ~/apps/userlog
hadoop fs -mkdir /data/userlog
新建一个flume的日志文件 logagent.cnf 注意该IP
在node02上 修改collectagent101.cnf
先启动后端的collector
再启动前端日志收集的flume
在node02上启动flume
bin/flume-ng agent --conf conf --conf-file conf/collectagent101.conf --name collectagent -Dflume.root.logger=INFO,console
再在node01上启动flume
bin/flume-ng agent --conf conf --conf-file conf/logagent.conf --name logagent -Dflume.root.logger=INFO,console
在node01上启动生产数据的jar文件
在node01的apps上先rz上传
在apps下 java -cp logproducer-1.0-SNAPSHOT.jar cn.chinahadoop.producer.LogProducer /home/hadoop/apps/userlog f1
会发现生产出了数据
line -> user_20,windows_v1_20171201,010,2,1534645660158
line -> user_95,android_pad_v1.2_20180110,0755,1,1534645661159
line -> user_43,android_v1.2_20180110,0755,1,1534645662160
line -> user_90,apple_phone_v1.2_20180101,020,2,1534645663161
line -> user_49,android_v1.2_20180110,021,1,1534645664162
line -> user_66,apple_phone_v1.2_20180101,020,1,1534645665163
line -> user_57,apple_pad_v1.2_20180101,021,2,1534645666163
line -> user_96,apple_phone_v1.2_20180101,010,1,1534645667164
line -> user_95,android_pad_v1.2_20180110,0755,1,1534645668165
line -> user_5,android_pad_v1.2_20180110,021,2,1534645669165
line -> user_51,apple_pad_v1.2_20180101,0755,1,1534645670166
line -> user_44,windows_v1_20171201,010,1,1534645671167
line -> user_81,apple_pad_v1.2_20180101,021,2,1534645672168
line -> user_82,apple_mac_v1.2_20180101,020,1,1534645673169
line -> user_95,android_pad_v1.2_20180110,0755,1,1534645674169
line -> user_60,apple_phone_v1.2_20180101,010,2,1534645675170
line -> user_97,android_v1.2_20180110,021,1,1534645676170
line -> user_84,apple_phone_v1.2_20180101,010,1,1534645677171
line -> user_21,apple_pad_v1.2_20180101,021,2,1534645678171
line -> user_58,apple_mac_v1.2_20180101,020,1,1534645679172
line -> user_55,android_v1.2_20180110,0755,1,1534645680174
line -> user_45,apple_pad_v1.2_20180101,021,2,1534645681175
line -> user_70,apple_mac_v1.2_20180101,020,1,1534645682176
line -> user_93,apple_pad_v1.2_20180101,021,1,1534645683177
line -> user_68,windows_v1_20171201,010,2,1534645684178
line -> user_34,apple_mac_v1.2_20180101,020,1,1534645685179
line -> user_87,apple_pad_v1.2_20180101,0755,1,1534645686180
line -> user_43,android_v1.2_20180110,0755,2,1534645687182
line -> user_54,apple_phone_v1.2_20180101,020,1,1534645688183
line -> user_61,android_v1.2_20180110,021,1,1534645689184
line -> user_97,android_v1.2_20180110,021,2,1534645690185
line -> user_18,apple_phone_v1.2_20180101,020,1,1534645691185
line -> user_77,android_pad_v1.2_20180110,021,1,1534645692186
line -> user_67,android_v1.2_20180110,0755,2,1534645693187
line -> user_82,apple_mac_v1.2_20180101,020,1,1534645694188
line -> user_2,windows_v1_20171201,020,1,1534645695189
line -> user_83,android_pad_v1.2_20180110,0755,2,1534645696191
line -> user_91,android_v1.2_20180110,0755,1,1534645697191
line -> user_9,apple_pad_v1.2_20180101,021,1,1534645698193
查看已经生成的文件
cd ~/userlog/
并且在node2上一经发现监控到了数据的产生
在node01上建立数据仓库
create database ods;
use ods;
create external table ods.userlog_external_pt(
user_id string,
client_version string,
area_id string,
user_behavior int,
time string
)
partitioned by(dt string)
row format delimited
fields terminated by ','
lines terminated by '\n'
stored as textfile
location '/data/userlog';
create database pdw;
use pdw;
##1.创建用户日志明细表
//用户id,使用的客户端版本号,客户端类型,地域id,地域名称,时间戳,曝光,点击
create table pdw.userlog_detail_pt(
user_id string,
client_version string,
client_type string,
area_id string,
area_name string,
time string,
pv_cnt int,
click_cnt int
)
partitioned by(dt string)
row format delimited
fields terminated by '\t'
lines terminated by '\n'
stored as RCFILE;
##2.创建新增用户明细表(用于计算新增用户数,累计用户)
//新增用户id,时间戳
create table pdw.new_user_detail_pt(
user_id string,
time string
)
partitioned by(dt string)
row format delimited
fields terminated by '\t'
lines terminated by '\n'
stored as RCFILE;
//导入数据:(先提前建立好 mkdir -r /home/hadoop/apps/hive_test/data/ 再rz area_data.txt client_version_type_data.txt
load data local inpath '/home/hadoop/apps/hive_test/data/area_data.txt' into table area_info;
load data local inpath '/home/hadoop/apps/hive_test/data/client_version_type_data.txt' into table rel.client_version_type_info;
在mysql上创建表
CREATE TABLE `realtime_pv_click_report_daily` (
`pv_cnt` int(11) NOT NULL,
`click_cnt` int(11) NOT NULL,
`date_time` varchar(512) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
执行用户明细脚本
sh ods_user_log_2_rcfile_daily.sh 0
在hivie中
use pdw;
执行新增用户脚本