Tuesday, July 5, 2016

Hive lab10

Hive Lab10 : json data processing using hive

Json Data Processing Using Hive.

[training@localhost ~]$ cat json1
{"name":"Sankar","age":25,"sex":"m"}
{"name":"Sankari","age":24,"sex":"f","city":"hyd"}
[training@localhost ~]$ 

hive> create database jsonlab;
OK
Time taken: 1.694 seconds
hive> use jsonlab;
OK
Time taken: 0.018 seconds
hive> create table raw(line string);
OK
Time taken: 0.219 seconds
hive> load data local inpath 'json1' into table raw;
hive> select * from raw;
OK
{"name":"Sankar","age":25,"sex":"m"}
{"name":"Sankari","age":24,"sex":"f","city":"hyd"}
Time taken: 0.146 seconds
hive> 
hive> select get_json_object(line,'$.name') from raw;
OK
Sankar
Sankari
hive> select                
    >   get_json_object(line,'$.name') ,
    >   get_json_object(line,'$.age') , 
    >   get_json_object(line,'$.sex') ,
    >   get_json_object(line,'$.city')  
    > from raw;
OK
Sankar  25      m       NULL
Sankari 24      f       hyd

hive> create table info(name string, age int,
    >   sex string, city string);
OK
Time taken: 0.04 seconds
hive> insert overwrite table info
    >   select x.* from raw
    >    lateral view json_tuple

(line,'name','age','sex','city') x
    >   as n,a,s,c;

hive> select * from info;                                       

  
OK
Sankar  25      m       NULL
Sankari 24      f       hyd
Time taken: 0.063 seconds
hive> 

[training@localhost ~]$ cat json2
{"name":"ravi","age":25,"wife":

{"name":"banu","age":24},"city":"hyd"}
{"name":"kavi","age":25,"wife":

{"name":"kavitha","age":21},"city":"del"}
[training@localhost ~]$ 

hive> create table jraw(line string);
OK
Time taken: 0.048 seconds
hive> load data local inpath 'json2' into table jraw;
Copying data from file:/home/training/json2
Copying file: file:/home/training/json2
Loading data to table jsonlab.jraw
OK
Time taken: 0.127 seconds
hive> select * from jraw;
OK
{"name":"ravi","age":25,"wife":

{"name":"banu","age":24},"city":"hyd"}
{"name":"kavi","age":25,"wife":

{"name":"kavitha","age":21},"city":"del"}
Time taken: 0.06 seconds
hive> 
hive> create table raw2(name string, age int, wife string, city 

string);
OK
Time taken: 0.033 seconds
hive> insert overwrite table raw2
    >   select x.* from jraw
    >  lateral view 
    >  json_tuple(line,'name','age','wife','city') x as 

n,a,w,c;

hive> select * from raw2;
OK
ravi    25      {"age":24,"name":"banu"}        hyd
kavi    25      {"age":21,"name":"kavitha"}     del
Time taken: 0.061 seconds
hive> create table jinfo(hname string, wname string, 
    >    hage int, wage int, city string);

hive> insert overwrite table jinfo
    >   select name, get_json_object(wife,'$.name'),
    >   age , get_json_object(wife,'$.age'),
    >  city from raw2;

hive> select * from jinfo;
OK
ravi    banu    25      24      hyd
kavi    kavitha 25      21      del
Time taken: 0.079 seconds
hive>

No comments:

Post a Comment