欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

hive解析json数据方法get_json_object、json_tuple、正则化方法

程序员文章站 2022-03-06 22:20:48
...

get_json_object

数据样例:

{"id": 1701439105,"ids": [2154137571,3889177061,1496915057,……,1663973284],"total_number": 493}

建表导入数据

CREATE TABLE IF NOT EXISTS tmp_json_test (
           json string
) 
STORED AS textfile ;

load data local inpath '/opt/datas/weibotest.json' overwrite into table tmp_json_test;

解析

select get_json_object(t.json,'$.id'), get_json_object(t.json,'$.total_number') from tmp_json_test t ; 

json_tuple

select t2.* from tmp_json_test t1 lateral view json_tuple(t1.json, 'id', 'total_number') t2 as c1, c2;

正则化方法

遇到json数组时候上面的方法就不好用了,需要进行正则化处理

[{"driverId":"kyQxC","fit":0.27573048408198864,"geographyOrders":[{"lngLat":{"latitude":39.81508,"longitude":116.384641},"locationMarks":2,"orderId":"888BC"},{"lngLat":{"latitude":39.81508,"longitude":116.316226},"locationMarks":0,"orderId":"TsD0h"},{"lngLat":{"latitude":39.914909,"longitude":116.361645},"locationMarks":2,"orderId":"Q4jQY"},{"lngLat":{"latitude":39.92996,"longitude":116.363944},"locationMarks":0,"orderId":"8B9Fu"},{"lngLat":{"latitude":39.948105,"longitude":116.42546},"locationMarks":1,"orderId":"TsD0h"},{"lngLat":{"latitude":39.914203,"longitude":116.442708},"locationMarks":1,"orderId":"8B9Fu"}],"keySelector":"961","orderId":"8B9Fu","passageId":"yNqj3","pushFlag":1,"sendTime":1617958022251},{"driverId":"q7tHp","fit":-0.37450937636284354,"geographyOrders":[{"lngLat":{"latitude":39.913802,"longitude":116.42431},"locationMarks":0,"orderId":"d4Pi4"},{"lngLat":{"latitude":39.899412,"longitude":116.414537},"locationMarks":2,"orderId":"a2aDW"},{"lngLat":{"latitude":39.878595,"longitude":116.414537},"locationMarks":2,"orderId":"ghVvp"},{"lngLat":{"latitude":39.92996,"longitude":116.363944},"locationMarks":0,"orderId":"8B9Fu"},{"lngLat":{"latitude":39.973706,"longitude":116.384641},"locationMarks":1,"orderId":"d4Pi4"},{"lngLat":{"latitude":39.914203,"longitude":116.442708},"locationMarks":1,"orderId":"8B9Fu"}],"keySelector":"961","orderId":"8B9Fu","passageId":"yNqj3","pushFlag":1,"sendTime":1617958025504},{"driverId":"vmOEo","fit":-0.45518970780636714,"geographyOrders":[{"lngLat":{"latitude":39.92996,"longitude":116.363944},"locationMarks":0,"orderId":"8B9Fu"},{"lngLat":{"latitude":39.914203,"longitude":116.442708},"locationMarks":1,"orderId":"8B9Fu"}],"keySelector":"961","orderId":"8B9Fu","passageId":"yNqj3","pushFlag":1,"sendTime":1617958026602},{"driverId":"q2SFo","fit":-0.6158962058438726,"geographyOrders":[{"lngLat":{"latitude":39.92996,"longitude":116.363944},"locationMarks":0,"orderId":"8B9Fu"},{"lngLat":{"latitude":39.914203,"longitude":116.442708},"locationMarks":1,"orderId":"8B9Fu"}],"keySelector":"961","orderId":"8B9Fu","passageId":"yNqj3","pushFlag":1,"sendTime":1617958022782},{"driverId":"VnuQy","fit":-0.7347361535106847,"geographyOrders":[{"lngLat":{"latitude":39.92996,"longitude":116.363944},"locationMarks":0,"orderId":"8B9Fu"},{"lngLat":{"latitude":39.914203,"longitude":116.442708},"locationMarks":1,"orderId":"8B9Fu"}],"keySelector":"961","orderId":"8B9Fu","passageId":"yNqj3","pushFlag":1,"sendTime":1617958026868},{"driverId":"iVpqU","fit":-0.8215220235499345,"geographyOrders":[{"lngLat":{"latitude":39.92996,"longitude":116.363944},"locationMarks":0,"orderId":"8B9Fu"},{"lngLat":{"latitude":39.914203,"longitude":116.442708},"locationMarks":1,"orderId":"8B9Fu"},{"lngLat":{"latitude":39.955627,"longitude":116.42776},"locationMarks":2,"orderId":"cWn2n"},{"lngLat":{"latitude":39.968705,"longitude":116.42431},"locationMarks":2,"orderId":"o8GQy"}],"keySelector":"961","orderId":"8B9Fu","passageId":"yNqj3","pushFlag":1,"sendTime":1617958019545},{"driverId":"Rp60p","fit":-0.8974051460968164,"geographyOrders":[{"lngLat":{"latitude":39.92996,"longitude":116.363944},"locationMarks":0,"orderId":"8B9Fu"},{"lngLat":{"latitude":39.914203,"longitude":116.442708},"locationMarks":1,"orderId":"8B9Fu"}],"keySelector":"961","orderId":"8B9Fu","passageId":"yNqj3","pushFlag":1,"sendTime":1617958026025},{"driverId":"UzCbr","fit":-1.229829917139119,"geographyOrders":[{"lngLat":{"latitude":39.92996,"longitude":116.363944},"locationMarks":0,"orderId":"8B9Fu"},{"lngLat":{"latitude":39.914203,"longitude":116.442708},"locationMarks":1,"orderId":"8B9Fu"}],"keySelector":"961","orderId":"8B9Fu","passageId":"yNqj3","pushFlag":1,"sendTime":1617958026325},{"driverId":"rfsp7","fit":-1.2818360226777146,"geographyOrders":[{"lngLat":{"latitude":39.92996,"longitude":116.363944},"locationMarks":0,"orderId":"8B9Fu"},{"lngLat":{"latitude":39.914203,"longitude":116.442708},"locationMarks":1,"orderId":"8B9Fu"}],"keySelector":"961","orderId":"8B9Fu","passageId":"yNqj3","pushFlag":1,"sendTime":1617958025767},{"driverId":"Lf5ED","fit":-2.0576755342346273,"geographyOrders":[{"lngLat":{"latitude":39.92996,"longitude":116.363944},"locationMarks":0,"orderId":"8B9Fu"},{"lngLat":{"latitude":39.914203,"longitude":116.442708},"locationMarks":1,"orderId":"8B9Fu"}],"keySelector":"961","orderId":"8B9Fu","passageId":"yNqj3","pushFlag":1,"sendTime":1617958022522}]
SELECT
  ai.driver_id
  , ai.app_id
  , ai.service_id
  , ai.fit
  , ai.geography_orders
  , ai.key_selector
  , ai.order_id
  , ai.passage_id
  , ai.push_start_loc
  , ai.push_flag
  , from_unixtime(cast(ai.send_time/1000 as bigint)) as send_time
  , from_unixtime(unix_timestamp()) as bdl_datecreated
FROM
    (
        SELECT
            dispatch AS list
        FROM ${dw_sgkj}.odl_wildgoose_dispatch_dinc_liquan
        where dt="${end_time}"
    ) t lateral view explode(split(regexp_replace(regexp_extract(list,'^\\[(.+)\\]$',1),'\\}\\,\\{\\"driverId', '\\}\\|\\|\\{\\"driverId'),'\\|\\|')) list as a 
	lateral view json_tuple(a,'driverId','appId','serviceId','fit','geographyOrders','keySelector','orderId','passageId','pushStartLoc','pushFlag','sendTime') ai 
	as driver_id,app_id,service_id,fit,geography_orders,key_selector,order_id,passage_id,push_start_loc,push_flag,send_time
;
相关标签: hive hive