2.2 DWS公共汇总粒度事实层
Hive数据库建库建表:
创建Hive库并进入:
create database if not exists dws_nshop;
use dws_nshop;
2.2.1 用户主题
2.2.2.1 用户启动【DWS】
CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_ulog_launch (
user_id string COMMENT '用户id',
device_num string COMMENT '设备号',
device_type string COMMENT '设备类型',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
launch_count int COMMENT '启动次数'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/user/dws_nshop_ulog_launch/';
通过对dwd层的用户启动日志表做一个count聚合即可,sql如下:
insert overwrite table dws_nshop.dws_nshop_ulog_launch partition(bdp_day='20200618')
select
user_id,
device_num,
device_type,
os,
os_version,
manufacturer,
carrier,
network_type,
area_code,
count(device_num) over(partition by device_num) as launch_count
from
dwd_nshop.dwd_nshop_actlog_launch
where
bdp_day="20200618"
用户启动7days【DWS】
CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_ulog_launch_7d (
user_id string COMMENT '用户id',
device_num string COMMENT '设备号',
device_type string COMMENT '设备类型',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
launch_count int COMMENT '启动次数'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/user/dws_nshop_ulog_launch_7d/';
入库:和上面逻辑一样,只是加一周时间条件限制:
insert overwrite table dws_nshop.dws_nshop_ulog_launch_7d partition(bdp_day='20200618')
select
user_id,
device_num,
device_type,
os,
os_version,
manufacturer,
carrier,
network_type,
area_code,
count(device_num) over(partition by device_num) as launch_count
from
dwd_nshop.dwd_nshop_actlog_launch
where
bdp_day between '20200611' and '20200618'
2.2.2.2 用户浏览表
建表:
CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_ulog_view (
user_id string COMMENT '用户id',
device_num string COMMENT '设备号',
device_type string COMMENT '设备类型',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
view_count INT COMMENT '浏览次数'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/user/dws_nshop_ulog_view/';
数据从DWD层的用户产品浏览表聚合得来:
insert overwrite table dws_nshop.dws_nshop_ulog_view partition(bdp_day='20200618')
select
user_id ,
device_num ,
device_type,
os ,
os_version ,
manufacturer,
carrier ,
network_type,
area_code,
count(device_num) over(partition by device_num)as view_count
from dwd_nshop.dwd_nshop_actlog_pdtview
where
bdp_day='20200618'
用户查询【DWS】
建表:
CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_ulog_search (
user_id string COMMENT '用户id',
device_num string COMMENT '设备号',
device_type string COMMENT '设备类型',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
search_count int COMMENT '查询次数'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/user/dws_nshop_ulog_search/'
对查询主题表进行count聚合即可,如下:
insert overwrite table dws_nshop.dws_nshop_ulog_search partition(bdp_day='20200618')
select
user_id,
device_num,
device_type,
os,
os_version,
manufacturer,
carrier,
network_type,
area_code,
count(device_num) over(partition by device_num) as search_count
from
dwd_nshop.dwd_nshop_actlog_pdtsearch
where
bdp_day="20200618"
2.2.2.3 用户关注【DWS】
CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_ulog_comment (
user_id string COMMENT '用户id',
device_num string COMMENT '设备号',
device_type string COMMENT '设备类型',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
comment_count int COMMENT '评论次数',
comment_target_count int COMMENT '类别评论次数',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/user/dws_nshop_ulog_comment/';
聚合用户对产品的关注,使用count聚合出关注次数
insert overwrite table dws_nshop.dws_nshop_ulog_comment partition(bdp_day='20200618')
select
user_id,
device_num,
device_type,
os,
os_version,
manufacturer,
carrier,
network_type,
area_code,
count(target_id) over(partition by comment_count) as comment_count,
count(distinct target_id) over(partition by target_id) as comment_target_count,
bdp_day
from
dwd_nshop.dwd_actlog_product_comment
where
bdp_day="20200618"
用户交易宽表【DWS】
CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_user_orders (
user_id string COMMENT '用户id',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
orders_count INT COMMENT '订单数量',
orders_pay DECIMAL (10, 1) COMMENT '订单金额',
orders_shipping DECIMAL (10, 1) COMMENT '订单运费金额',
orders_district DECIMAL (10, 1) COMMENT '订单优惠金额',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/user/dws_nshop_user_orders/'
用户交易宽表的数据主要是从DWD层交易订单明细流水表中抽取出来,再结合用户表查出用户相关信息:
with tborder as(
select
o.order_id,
o.district_money,
o.shipping_money,
o.payment_money,
c.customer_id,
c.customer_natives
from dwd_nshop.dwd_nshop_orders_details o
join ods_nshop.ods_02_customer c
on o.customer_id=c.customer_id
where
bdp_day='20200618'
)
insert overwrite table dws_nshop.dws_nshop_user_orders partition(bdp_day='20200618')
select
customer_id,
customer_natives,
count(order_id) over(partition by customer_id) as orders_count,
sum(payment_money) over(partition by customer_id)as orders_pay,
sum(shipping_money) over(partition by customer_id)as orders_shipping,
sum(district_money) over(partition by customer_id)as orders_district,
current_timestamp() as ct
from tborder
用户交易宽表7day【DWS】
CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_user_orders_7d (
user_id string COMMENT '用户id',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
orders_count INT COMMENT '订单数量',
orders_pay DECIMAL (10, 1) COMMENT '订单金额',
orders_shipping DECIMAL (10, 1) COMMENT '订单运费金额',
orders_district DECIMAL (10, 1) COMMENT '订单优惠金额',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_week string) stored AS parquet location '/data/nshop/dws/user/dws_nshop_user_orders_7d/'
用户投诉订单宽表【DWS】
CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_user_complainant (
user_id string COMMENT '用户id',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
compl_orders_count INT COMMENT '订单数量',
compl_orders_pay DECIMAL (10, 1) COMMENT '订单金额',
compl_supplier_count INT COMMENT '商家数量',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/user/dws_nshop_user_complainant/'
用户投诉订单宽表7day【DWS】
CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_user_complainant_7d (
user_id string COMMENT '用户id',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
compl_orders_count INT COMMENT '订单数量',
compl_orders_pay DECIMAL (10, 1) COMMENT '订单金额',
compl_supplier_count INT COMMENT '商家数量',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_week string) stored AS parquet location '/data/nshop/dws/user/dws_nshop_user_complainant_7d/'
用户营销活动宽表【DWS】
CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_user_release (
user_id string COMMENT '用户id',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
source_count INT COMMENT '投放来源数量',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/user/dws_nshop_user_release/'
用户营销活动宽表7day【DWS】
CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_user_release_7d (
user_id string COMMENT '用户id',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
source_count INT COMMENT '投放来源数量',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_week string) stored AS parquet location '/data/nshop/dws/user/dws_nshop_user_release_7d/'
商家用户交互记录宽表【DWS】
CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_supplier_user (
supplier_id string COMMENT '商家id',
supplier_type INT COMMENT '供应商类型:1.自营,2.官方 3其他',
view_count INT COMMENT '浏览次数',
comment_users INT COMMENT '关注人数',
comment_area_code INT COMMENT '关注地区数量',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/supplier/dws_nshop_supplier_user/'
商家用户交互记录的数据主要统计:
1.商家维度下用户的浏览次数
所以要结合DWD层的用户产品浏览表dwd_nshop_actlog_pdtview结合页面布局维度表、产品维度表和店铺维度表查出相关信息
2.商家维度下关注人数、关注地区数
所以要结合DWD层的用户产品关注表dwd_actlog_product_comment 结合页面布局维度表,产品维度表和店铺维度表查出相关信息
整合sql如下:
-- 统计商家维度下用户的浏览次数
with pgview as(
select
su.supplier_code,
su.supplier_type,
count(*) as view_count
from dwd_nshop.dwd_nshop_actlog_pdtview pv
join
ods_nshop.dim_pub_page pp
on
pp.page_type='4'
and pp.page_code=pv.target_id
join
ods_nshop.dim_pub_product pr
on
pr.product_code=pp.page_code
join
ods_nshop.dim_pub_supplier su
on
su.supplier_code=pr.supplier_code
where
pv.bdp_day='20200618'
group by
su.supplier_code,
su.supplier_type
),
-- 统计商家维度下关注人数、关注地区数
prcomment as(
select
su.supplier_code,
su.supplier_type,
count(distinct pc.user_id) as comment_users,
count(distinct pc.area_code)as comment_area_code
from
dwd_nshop.dwd_actlog_product_comment pc
join
ods_nshop.dim_pub_page pp
on
pp.page_type='4'
and pp.page_code=pc.target_id
join
ods_nshop.dim_pub_product pr
on
pr.product_code=pp.page_code
join
ods_nshop.dim_pub_supplier su
on
su.supplier_code=pr.supplier_code
where
pc.bdp_day='20200618'
group by
su.supplier_code,
su.supplier_type
)
-- 整合指标到DWS表
insert overwrite table dws_nshop.dws_nshop_supplier_user partition(bdp_day='20200618')
select
pgview.supplier_code,
pgview.supplier_type,
pgview.view_count,
prcomment.comment_users,
prcomment.comment_area_code,
current_timestamp() as ct
from pgview
join
prcomment
on
pgview.supplier_code=prcomment.supplier_code
and
pgview.supplier_type=prcomment.supplier_type
商家用户交互记录宽表7day【DWS】
CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_supplier_user_7d (
supplier_id string COMMENT '商家id',
supplier_type INT COMMENT '供应商类型:1.自营,2.官方 3其他',
view_count INT COMMENT '浏览次数',
comment_users INT COMMENT '关注人数',
comment_area_code INT COMMENT '关注地区数量',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/supplier/dws_nshop_supplier_user_7d/'
商家日流水宽表【DWS】
CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_supplier_sales (
supplier_id string COMMENT '商家id',
supplier_type INT COMMENT '供应商类型:1.自营,2.官方 3其他',
sales_users INT COMMENT '购物人数',
sales_users_area INT COMMENT '购物地区数量',
sales_orders INT COMMENT '购物订单数',
salaes_orders_pay DECIMAL (10, 1) COMMENT '订单金额',
salaes_orders_district DECIMAL (10, 1) COMMENT '订单优惠金额',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/supplier/dws_nshop_supplier_sales/'
商家日流水宽表的数据主要是从订单明细流水表里抽取,再结合用户信息表和产品信息表、店铺表查出相关信息:
insert overwrite table dws_nshop.dws_nshop_supplier_sales partition(bdp_day='20200321')
select
od.supplier_code,
su.supplier_type,
count(distinct od.customer_id) sales_users,
count(distinct oc.customer_natives) sales_users_area,
count(distinct od.order_id) sales_orders,
sum(pr.product_price * od.product_cnt) salaes_orders_pay,
sum(od.district_money) salaes_orders_district,
current_timestamp() as ct
from dwd_nshop.dwd_nshop_orders_details od
join
ods_nshop.ods_02_customer oc
on
od.customer_id=oc.custxomer_id
join
ods_nshop.dim_pub_product pr
on
od.supplier_code=pr.supplier_code
join
ods_nshop.dim_pub_supplier su
on
su.supplier_code=od.supplier_code
where
od.bdp_day='20200321'
group by
od.supplier_code,
su.supplier_type
商家日流水宽表7day【DWS】
CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_supplier_sales_7d (
supplier_id string COMMENT '商家id',
supplier_type INT COMMENT '供应商类型:1.自营,2.官方 3其他',
sales_users INT COMMENT '购物人数',
sales_users_area INT COMMENT '购物地区数量',
sales_orders INT COMMENT '购物订单数',
salaes_orders_pay DECIMAL (10, 1) COMMENT '订单金额',
salaes_orders_district DECIMAL (10, 1) COMMENT '订单优惠金额',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_week string) stored AS parquet location '/data/nshop/dws/supplier/dws_nshop_supplier_sales_7d/'
2.3 营销活动主题
广告投放用户宽表【DWS】
CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_release_user (
release_sources string COMMENT '投放渠道',
release_category string COMMENT '投放浏览产品分类',
release_users INT COMMENT '投放浏览用户数',
release_product_page INT COMMENT '投放浏览产品页面数',
ct BIGINT COMMENT '创建时间'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/release/dws_nshop_release_user/'
对用户数和产品数进行聚合即可:
insert overwrite table dws_nshop.dws_nshop_release_user partition(bdp_day='20200321')
select
release_sources,
release_category,
count(distinct customer_id) release_users,
count(1) release_product_page,
current_timestamp() ct
from
dwd_nshop.dwd_nshop_releasedatas
where
bdp_day='20200321'
group by
release_sources,
release_category