1.示例1数据集
json_data |
---|
{"detail":{"sports":[{"sportEquip":[{"equipId":"1","euqipName":"羽毛球拍"}],"sportList":[{"id":"1","name":"羽毛球"}],"sportType":{"id":"1","name":"有氧"}},{"sportEquip":[{"equipId":"2","euqipName":"网球拍"}],"sportList":[{"id":"2","name":"网球"}],"sportType":{"id":"1","name":"有氧"}}]},"label":{"kind":"运动"}} |
1.1 提取label.kind
select json_extract_scalar(json_data, '$.label.kind') as labelKind from dataset1;
labelKind |
---|
运动 |
1.2 提取detail的sportList name&id
(1)step1: 将sports json array展开
SELECT sports FROM dataset1 CROSS JOIN UNNEST(CAST(json_extract(json_data, '$.detail.sports') as array(json))) as t(sports);
sports |
---|
{"sportEquip":[{"equipId":"1","euqipName":"羽毛球拍"}],"sportList":[{"id":"1","name":"羽毛球"}],"sportType":{"id":"1","name":"有氧"}} |
{"sportEquip":[{"equipId":"2","euqipName":"网球拍"}],"sportList":[{"id":"2","name":"网球"}],"sportType":{"id":"1","name":"有氧"}} |
UNNEST:将Array/Map的值展开变成每行数据
(2) step 2: Convert array of objects into array of map
SELECT
CAST(json_extract(sports, '$.sportList') AS ARRAY<MAP<VARCHAR, VARCHAR>>) AS sportList
FROM
(SELECT sports FROM dataset1 CROSS JOIN UNNEST(CAST(json_extract(json_data, '$.detail.sports') as array(json))) as t(sports));
sportList |
---|
[{name=羽毛球, id=1}] |
[{name=网球, id=2}] |
(3)step 3:final result
SELECT
sportList,
sport['name'] AS sport_name,
sport['id'] AS sport_id
FROM
(
SELECT
CAST(json_extract(sports, '$.sportList') AS ARRAY<MAP<VARCHAR, VARCHAR>>) AS sportList
FROM
(SELECT sports FROM dataset1 CROSS JOIN UNNEST(CAST(json_extract(json_data, '$.detail.sports') as array(json))) as t(sports))
)
CROSS JOIN UNNEST(sportList) AS sports(sport);
sportList | sport_name | sport_id |
---|---|---|
[{name=羽毛球, id=1}] | 羽毛球 | 1 |
[{name=网球, id=2}] | 网球 | 2 |
2.示例2数据集
json_data |
---|
["2015_1_age:26","2015_2_age:28","2016_3_name:coco"] |
假设我有这么一串东西,2015_1_age:16,2015代表年份,1代表id,age代表年龄,name 代表名字。想把这串大杂烩改成key->value
的形式:
[{"id":1,"result":"26","type":"age","year":2015}, {"id":2,"result":"28","type":"age","year":2015}, {"id":3,"result":"coco","type":"name","year":2016}]
可以使用transform处理:
select transform(cast(json_data as array(varchar)), (v) ->
JSON_PARSE('{"year": ' || split_part(v, '_', 1) || ',"id": ' || split_part(v, '_', 2) || ',"result": "' || split_part(v, ':', 2) || '" ,"type": "' || regexp_extract(split_part(v, '_', 3),'[a-z]+') || '"}')
) AS result from dataset2
3.示例3数据集
json_data |
---|
{"2015":{"extra":{"age":26,"height":168},"id":3,"name":"coco","type":"name"},"2016":{"extra":{"age":29,"height":160},"id":1,"name":"xixi","type":"name"}} |
想要提取里面json value的内容(不需要2015、2016这些key),并且组合成一个数组
[{"age":26,"name":"coco"}, {"age":29,"name":"xixi"}]
sql实现:
,dateset3_progress as (
SELECT json_parse(concat('[', array_join(array_agg(json_format(value)), ','), ']')) AS result_array
FROM dataset3
CROSS JOIN UNNEST(map_values(cast(json_data AS map(varchar, json)))) AS t(value)
)
SELECT transform(cast(result_array AS ARRAY(JSON)),
(v) -> JSON_PARSE(
'{"name": "' || json_extract_scalar(v, '$.name') || '","age": ' || json_extract_scalar(v, '$.extra.age') || '}'
)
) as result
FROM dateset3_progress;
map_values:返回map 中所有values
array_join(x,delimiter, null_replacement):使用指定分隔符连接数组元素
array_agg(x) :根据输入参数返回一个数组
参考
https://prestodb.io/docs/current/functions/geospatial.html#flatten_geometry_collections
https://hafizbadrie.medium.com/prestodb-convert-json-array-of-objects-into-rows-d9c916724dfc