版本:V1.0
第0题
表结构:uid,subject_id,score
求:找出所有科目成绩都大于某一学科平均成绩的用户
select
uid
from
( select t3.uid from test00_score t3 left join
( select t.uid, t.subjet_id, t.score, t1.avgScore from test00_score t left join
( select subjet_id, avg(score) as avgScore from test00_score group by subjet_id)t1
on t.subjet_id = t1.subjet_id where t.score < t1.avgScore)t2 on t2.uid = t3.uid where t2.uid is null) t4 group by uid;
第1题
我们有如下的用户访问数据
select
userid,
months,
sum(visitcount) as sumcount,
sum(sum(visitcount)) over(partition by userid order by months) as sum_count
from
(
select
lower(userid) as userid ,
date_format(from_unixtime(unix_timestamp(visitdate,'yyyy/MM/dd'),'yyyy-MM-dd'),'yyyy-MM') as months,
visitcount
from
test01_visit
)t group by months ,userid order by userid asc
userId visitDate visitCount
u01 2017/1/21 5
u02 2017/1/23 6
u03 2017/1/22 8
u04 2017/1/20 3
u01 2017/1/23 6
u01 2017/2/21 8
U02 2017/1/23 6
U01 2017/2/22 4
要求使用SQL统计出每个用户的累积访问次数,如下表所示:
用户id 月份 小计 累积
u01 2017-01 11 11
u01 2017-02 12 23
u02 2017-01 12 12
u03 2017-01 8 8
u04 2017-01 3 3
第2题
有50W个京东店铺,每个顾客访客访问任何一个店铺的任何一个商品时都会产生一条访问日志,访问日志存储的表名为Visit,访客的用户id为user_id,被访问的店铺名称为shop,请统计:
1)每个店铺的UV(访客数)
select shop, count(distinct user_id) as uv from test02_visit group by shop;
2)每个店铺访问次数top3的访客信息。输出店铺名称、访客id、访问次数
select * from (
select t1.shop, t1.user_id, t1.count_visit,
row_number() over(partition by t1.shop order by t1.count_visit desc) rk
from (
select shop, user_id, count(*) as count_visit
from test02_visit
group by shop, user_id
)
t1
) t2 where t2.rk <= 3;
第3题
已知一个表STG.ORDER,有如下字段:Date,Order_id,User_id,amount。请给出sql进行统计:数据样例:2017-01-01,10029028,1000003251,33.57。
1)给出 2017年每个月的订单数、用户数、总成交金额。
select
date_format(date
,'yyyy-MM') as month
,
count(order_id) as countOrder,
count(distinct user_id) as countUser,
cast(sum(amount) as decimal) as countAmount
from
test03_order
group by date_format(date
,'yyyy-MM')
2)给出2017年11月的新客数(指在11月才有第一笔订单)
第4题
有一个5000万的用户文件(user_id,name,age),一个2亿记录的用户看电影的记录文件(user_id,url),统计各年龄段观看电影的次数
select age,count() sumcount from
(
select
l.user_id,url,name,age
from
test04_log l
join
test04_user u
on u.user_id=l.user_id
)t group by age order by sumcount desc
select age,count() sumcount from
(
select
l.user_id,url,name,
case
when age>0 and age<10 then '0-10'
when age>=10 and age < 20 then '10-20'
when age >= 20 and age <30 then '20-30'
when age >=30 and age <40 then '30-40'
when age >=40 and age <50 then '40-50'
when age >=50 and age <60 then '50-60'
when age >=60 and age <70 then '60-70'
when age >=70 and age <80 then '70-80'
else '80-100'
end as age
from
test04_log l
join
test04_user u
on u.user_id=l.user_id
)t group by age order by sumcount desc
第5题
有日志如下,请写出代码求得所有用户和活跃用户的总数及平均年龄。(活跃用户指连续两天都有访问记录的用户)
select count(),age from (
select
user_id,count(),age
from
(
select
user_id,
avg(age) age,
dt,
rank() over(partition by user_id order by dt) rank
from test05_active
group by user_id,dt
)t group by date_sub(dt,rank),user_id,age having count(*)>=2
)t1 group by user_id,age
;日期 用户 年龄
11,test_1,23
11,test_2,19
11,test_3,39
11,test_1,23
11,test_3,39
11,test_1,23
12,test_2,19
13,test_1,23
第6题
请用sql写出所有用户中在今年10月份第一次购买商品的金额,表ordertable字段(购买用户:userid,金额:money,购买时间:paymenttime(格式:2017-10-01),订单id:orderid)
select
userid,
money
from
(
select
userid,
money
,
paymenttime
from
test06_ordertable
where date_format(paymenttime,'yyyy-MM')='2017-10'
order by paymenttime
)t
第7题
有一个线上服务器访问日志格式如下(用sql答题)
时间 接口 ip地址
2016-11-09 11:22:05 /api/user/login 110.23.5.33
2016-11-09 11:23:10 /api/user/detail 57.3.2.16
.....
2016-11-09 23:59:40 /api/user/login 200.6.5.166
求11月9号下午14点(14-15点),访问api/user/login接口的top10的ip地址
第8题
有一个账号表如下,请写出SQL语句,查询各自区组的money排名前十的账号(分组取前10)
CREATE TABIE account
(
dist_id
int(11)
DEFAULT NULL COMMENT '区组id',
account
varchar(100)DEFAULT NULL COMMENT '账号' ,
gold
int(11)DEFAULT NULL COMMENT '金币'
PRIMARY KEY (dist_id
,account_id
),
)ENGINE=InnoDB DEFAULT CHARSET-utf8
select
dist_id,
account,
rk
from
(
select
dist_id,
account,
rank() over(partition by dist_id order by gold) rk
from
test08_account
)t
where rk <10
第9题
1)有三张表分别为会员表(member)销售表(sale)退货表(regoods)
(1)会员表有字段memberid(会员id,主键)credits(积分);
(2)销售表有字段memberid(会员id,外键)购买金额(MNAccount);
(3)退货表中有字段memberid(会员id,外键)退货金额(RMNAccount);
2)业务说明:
(1)销售表中的销售记录可以是会员购买,也可是非会员购买。(即销售表中的memberid可以为空)
(2)销售表中的一个会员可以有多条购买记录
(3)退货表中的退货记录可以是会员,也可是非会员
4、一个会员可以有一条或多条退货记录
查询需求:分组查出销售表中所有会员购买金额,同时分组查出退货表中所有会员的退货金额,把会员id相同的购买金额-退款金额得到的结果更新到表会员表中对应会员的积分字段(credits)
with tmp_test09_sale as(
select
memberid,
sum(mnaccount) MNAccount
from
test09_sale
group by
memberid
),
tmp_test09_regoods as(
select
memberid,
sum(rmnaccount) RMNAccount
from
test09_regoods
group by
memberid
)
insert into table test09_credits
select
memberid,
sum(MNAccount) MNAccount,
sum(RMNAccount) RMNAccount
from(
select
memberid,
0 credits,
MNAccount,
0 RMNAccount
from
tmp_test09_sale
union all
select
memberid,
0 credits,
0 MNAccount,
RMNAccount
from
tmp_test09_regoods
)t
group by t.memberid
第10题
现在有三个表student(学生表)、course(课程表)、score(成绩单),结构如下:
create table student
(
id bigint comment ‘学号’,
name string comment ‘姓名’,
age bigint comment ‘年龄’
);
create table course
(
cid string comment ‘课程号,001/002格式’,
cname string comment ‘课程名’
);
Create table score
(
Id bigint comment ‘学号’,
cid string comment ‘课程号’,
score bigint comment ‘成绩’
) partitioned by(event_day string)
其中score中的id、cid,分别是student、course中对应的列请根据上面的表结构,回答下面的问题
1)请将本地文件(/home/users/test/20190301.csv)文件,加载到分区表score的20190301分区中,并覆盖之前的数据
2)查出平均成绩大于60分的学生的姓名、年龄、平均成绩
select
name ,
age,
cid,
avg_score
from
test10_student t
join
(
select
id,
cid,
avg(score) avg_score
from
test10_score
group by id,cid having avg(score) >60
) t1
on t.id=t1.id
2)查出没有‘001’课程成绩的学生的姓名、年龄
select
name,
age
from
(
select
name,
age,
cid
from
test10_student t1
left join
test10_score t2
on t1.id=t2.id
)t3 where cid!='001'
3)查出有‘001’\’002’这两门课程下,成绩排名前3的学生的姓名、年龄
select
name,
age,
sum(score) as score
from
(
select
name,
age,
cid,
score
from
test10_student t1
left join
test10_score t2
on t1.id=t2.id where cid='001' or cid='002'
)t3 group by name,age order by score desc limit 3
5)创建新的表score_20190317,并存入score表中20190317分区的数据
6)描述一下union和union all的区别,以及在mysql和HQL中用法的不同之处?
7)简单描述一下lateral view语法在HQL中的应用场景,并写一个HQL实例