Hive Lab10 : json data processing using hive
Json Data Processing Using Hive.
[training@localhost ~]$ cat json1
{"name":"Sankar","age":25,"sex":"m"}
{"name":"Sankari","age":24,"sex":"f","city":"hyd"}
[training@localhost ~]$
hive> create database jsonlab;
OK
Time taken: 1.694 seconds
hive> use jsonlab;
OK
Time taken: 0.018 seconds
hive> create table raw(line string);
OK
Time taken: 0.219 seconds
hive> load data local inpath 'json1' into table raw;
hive> select * from raw;
OK
{"name":"Sankar","age":25,"sex":"m"}
{"name":"Sankari","age":24,"sex":"f","city":"hyd"}
Time taken: 0.146 seconds
hive>
hive> select get_json_object(line,'$.name') from raw;
OK
Sankar
Sankari
hive> select
> get_json_object(line,'$.name') ,
> get_json_object(line,'$.age') ,
> get_json_object(line,'$.sex') ,
> get_json_object(line,'$.city')
> from raw;
OK
Sankar 25 m NULL
Sankari 24 f hyd
hive> create table info(name string, age int,
> sex string, city string);
OK
Time taken: 0.04 seconds
hive> insert overwrite table info
> select x.* from raw
> lateral view json_tuple
(line,'name','age','sex','city') x
> as n,a,s,c;
hive> select * from info;
OK
Sankar 25 m NULL
Sankari 24 f hyd
Time taken: 0.063 seconds
hive>
[training@localhost ~]$ cat json2
{"name":"ravi","age":25,"wife":
{"name":"banu","age":24},"city":"hyd"}
{"name":"kavi","age":25,"wife":
{"name":"kavitha","age":21},"city":"del"}
[training@localhost ~]$
hive> create table jraw(line string);
OK
Time taken: 0.048 seconds
hive> load data local inpath 'json2' into table jraw;
Copying data from file:/home/training/json2
Copying file: file:/home/training/json2
Loading data to table jsonlab.jraw
OK
Time taken: 0.127 seconds
hive> select * from jraw;
OK
{"name":"ravi","age":25,"wife":
{"name":"banu","age":24},"city":"hyd"}
{"name":"kavi","age":25,"wife":
{"name":"kavitha","age":21},"city":"del"}
Time taken: 0.06 seconds
hive>
hive> create table raw2(name string, age int, wife string, city
string);
OK
Time taken: 0.033 seconds
hive> insert overwrite table raw2
> select x.* from jraw
> lateral view
> json_tuple(line,'name','age','wife','city') x as
n,a,w,c;
hive> select * from raw2;
OK
ravi 25 {"age":24,"name":"banu"} hyd
kavi 25 {"age":21,"name":"kavitha"} del
Time taken: 0.061 seconds
hive> create table jinfo(hname string, wname string,
> hage int, wage int, city string);
hive> insert overwrite table jinfo
> select name, get_json_object(wife,'$.name'),
> age , get_json_object(wife,'$.age'),
> city from raw2;
hive> select * from jinfo;
OK
ravi banu 25 24 hyd
kavi kavitha 25 21 del
Time taken: 0.079 seconds
hive>
[training@localhost ~]$ cat json1
{"name":"Sankar","age":25,"sex":"m"}
{"name":"Sankari","age":24,"sex":"f","city":"hyd"}
[training@localhost ~]$
hive> create database jsonlab;
OK
Time taken: 1.694 seconds
hive> use jsonlab;
OK
Time taken: 0.018 seconds
hive> create table raw(line string);
OK
Time taken: 0.219 seconds
hive> load data local inpath 'json1' into table raw;
hive> select * from raw;
OK
{"name":"Sankar","age":25,"sex":"m"}
{"name":"Sankari","age":24,"sex":"f","city":"hyd"}
Time taken: 0.146 seconds
hive>
hive> select get_json_object(line,'$.name') from raw;
OK
Sankar
Sankari
hive> select
> get_json_object(line,'$.name') ,
> get_json_object(line,'$.age') ,
> get_json_object(line,'$.sex') ,
> get_json_object(line,'$.city')
> from raw;
OK
Sankar 25 m NULL
Sankari 24 f hyd
hive> create table info(name string, age int,
> sex string, city string);
OK
Time taken: 0.04 seconds
hive> insert overwrite table info
> select x.* from raw
> lateral view json_tuple
(line,'name','age','sex','city') x
> as n,a,s,c;
hive> select * from info;
OK
Sankar 25 m NULL
Sankari 24 f hyd
Time taken: 0.063 seconds
hive>
[training@localhost ~]$ cat json2
{"name":"ravi","age":25,"wife":
{"name":"banu","age":24},"city":"hyd"}
{"name":"kavi","age":25,"wife":
{"name":"kavitha","age":21},"city":"del"}
[training@localhost ~]$
hive> create table jraw(line string);
OK
Time taken: 0.048 seconds
hive> load data local inpath 'json2' into table jraw;
Copying data from file:/home/training/json2
Copying file: file:/home/training/json2
Loading data to table jsonlab.jraw
OK
Time taken: 0.127 seconds
hive> select * from jraw;
OK
{"name":"ravi","age":25,"wife":
{"name":"banu","age":24},"city":"hyd"}
{"name":"kavi","age":25,"wife":
{"name":"kavitha","age":21},"city":"del"}
Time taken: 0.06 seconds
hive>
hive> create table raw2(name string, age int, wife string, city
string);
OK
Time taken: 0.033 seconds
hive> insert overwrite table raw2
> select x.* from jraw
> lateral view
> json_tuple(line,'name','age','wife','city') x as
n,a,w,c;
hive> select * from raw2;
OK
ravi 25 {"age":24,"name":"banu"} hyd
kavi 25 {"age":21,"name":"kavitha"} del
Time taken: 0.061 seconds
hive> create table jinfo(hname string, wname string,
> hage int, wage int, city string);
hive> insert overwrite table jinfo
> select name, get_json_object(wife,'$.name'),
> age , get_json_object(wife,'$.age'),
> city from raw2;
hive> select * from jinfo;
OK
ravi banu 25 24 hyd
kavi kavitha 25 21 del
Time taken: 0.079 seconds
hive>
No comments:
Post a Comment