Sunday, July 10, 2016

Hive Lab 14: Joins and Cartisian Product

Hive Lab 14: Joins and Cartisian Product


 Joins 
______________

   used to collect data from two or more tables.

    joins are two types.

 i) Inner Joins
 ii) External Joins.

Inner Joins:
   matching rows with given join condition.

   ex:
  tab1 ---> name,city
___________
  A,hyd
  B,del
  c,hyd
  d,del
  e,pune
  f,mumbai   

tab2 ---> city, dept
_________
  hyd,production
  del,hr
  pune,accounts
  chennai,admin
_______________

 inner join --> based on city.


 a,hyd,hyd,production
 B,del,del,hr
 c,hyd,hyd,production
 d,del,del,hr
 e,pune,pune,accounts

outer join -->
   matching records and non-matching records.


 outer joins are 3 types.
  i) left outer join.
 ii) right outer join
 iii) full outer join.

left outer join:

   matchings + non matchings of left side.
   ( complete presence from left side table).
   
 tabx:--> id, dno
____________
 101,11
 102,14
___________

 taby ---> dno,loc
 _____________
 11,hyd
 13,del
 _____________--


 tabx  lefter outer join taby --> based on dno.


   101,11,11,hyd
   102,14,null,null

 right outer join -->
   matchings and non matchings of right side table.

   (complete presence from right side table ).

 tabx right outer join taby -->based on dno.

   101,11,11,hyd
   null,null,13,del

full outer join --->
  matchings, non matchings of left side,
 and  non matchings of right side table.

 tabx full outer join taby ---> based on dno.

   101,11,11,hyd
   102,14,null,null
   null,null,13,del

___________________________________


[training@localhost ~]$ cat emp
101,vino,26000,m,11
102,Sri,25000,f,11
103,mohan,13000,m,13
104,lokitha,8000,f,12
105,naga,6000,m,13
101,janaki,10000,f,12
201,aaa,30000,m,12
202,bbbb,50000,f,13
203,ccc,10000,f,13
204,ddddd,50000,m,13
304,xxx,70000,m,14
305,yyy,80000,f,15
[training@localhost ~]$ cat dept
11,marketing,hyd
12,hr,del
13,finance,hyd
20,prod,hyd
21,admin,chennai
[training@localhost ~]$ 


hive> create database joinsdb;
OK
Time taken: 1.761 seconds
hive> use joinsdb;
OK
Time taken: 0.027 seconds
hive> create table emp(id int, name string, 
    >   sal int, sex string, dno int)
    > row format delimited 
    >   fields terminated by ',';
OK
Time taken: 0.297 seconds
hive> load data local inpath 'emp' into table emp;
Copying data from file:/home/training/emp
Copying file: file:/home/training/emp
Loading data to table joinsdb.emp
OK
Time taken: 0.207 seconds
hive> select * from emp;
OK
101     vino    26000   m       11
102     Sri     25000   f       11
103     mohan   13000   m       13
104     lokitha 8000    f       12
105     naga    6000    m       13
101     janaki  10000   f       12
201     aaa     30000   m       12
202     bbbb    50000   f       13
203     ccc     10000   f       13
204     ddddd   50000   m       13
304     xxx     70000   m       14
305     yyy     80000   f       15
Time taken: 0.194 seconds
hive> 

hive> load data local inpath 'dept'
    >  into table dept;
Copying data from file:/home/training/dept
Copying file: file:/home/training/dept
Loading data to table joinsdb.dept
OK
Time taken: 0.12 seconds
hive> select * from dept;
OK
11      marketing       hyd
12      hr      del
13      finance hyd
20      prod    hyd
21      admin   chennai
Time taken: 0.068 seconds
hive> 

  Inner Join:

hive> select id,name,sal,sex,l.dno,r.dno,
    >    dname,loc
    >  from  emp l join dept r
    >  on (l.dno=r.dno);

101     vino    26000   m       11      11      marketing hyd
102     Sri     25000   f       11      11      marketing hyd
104     lokitha 8000    f       12      12      hrdel
101     janaki  10000   f       12      12      hrdel
201     aaa     30000   m       12      12      hrdel
103     mohan   13000   m       13      13      finance   hyd
105     naga    6000    m       13      13      finance   hyd
202     bbbb    50000   f       13      13      finance   hyd
203     ccc     10000   f       13      13      finance   hyd
204     ddddd   50000   m       13      13      finance   hyd
Time taken: 14.11 seconds
hive> 


Left outer Join :


hive> select id,name,sal,sex,l.dno,r.dno,
    >    dname,loc                       
    >  from  emp l left outer join dept r
    >  on (l.dno=r.dno);                 



101     vino    26000   m       11      11      marketing hyd
102     Sri     25000   f       11      11      marketing hyd
104     lokitha 8000    f       12      12      hrdel
101     janaki  10000   f       12      12      hrdel
201     aaa     30000   m       12      12      hrdel
103     mohan   13000   m       13      13      finance   hyd
105     naga    6000    m       13      13      finance   hyd
202     bbbb    50000   f       13      13      finance   hyd
203     ccc     10000   f       13      13      finance   hyd
204     ddddd   50000   m       13      13      finance   hyd
304     xxx     70000   m       14      NULL    NULL      NULL
305     yyy     80000   f       15      NULL    NULL      NULL
Time taken: 12.786 seconds
hive> 


Right outer Join:

hive> select id,name,sal,sex,l.dno,r.dno,
    >    dname,loc                       
    >  from  emp l right outer join dept r
    >  on (l.dno=r.dno);  

101     vino    26000   m       11      11      marketing hyd
102     Sri     25000   f       11      11      marketing hyd
104     lokitha 8000    f       12      12      hrdel
101     janaki  10000   f       12      12      hrdel
201     aaa     30000   m       12      12      hrdel
103     mohan   13000   m       13      13      finance   hyd
105     naga    6000    m       13      13      finance   hyd
202     bbbb    50000   f       13      13      finance   hyd
203     ccc     10000   f       13      13      finance   hyd
204     ddddd   50000   m       13      13      finance   hyd
NULL    NULL    NULL    NULL    NULL    20      prod      hyd
NULL    NULL    NULL    NULL    NULL    21      admin     chennai
Time taken: 12.429 seconds
hive> 

Full outer Join;

hive> select id,name,sal,sex,l.dno,r.dno, 
    >    dname,loc                        
    >  from  emp l full outer join dept r 
    >  on (l.dno=r.dno);   

   

Denormalizing:

hive> create table info(id int, name string, 
    >   sal int, sex string , dname string, 
    >   loc string);
OK
Time taken: 0.04 seconds
hive> insert overwrite table info
    >   select id, name, sal, sex, dname, loc
    >  from emp l full outer join dept r
    >  on (l.dno=r.dno);

           

hive> select * from info;
OK
101     vino    26000   m       marketing       hyd
102     Sri     25000   f       marketing       hyd
104     lokitha 8000    f       hr      del
101     janaki  10000   f       hr      del
201     aaa     30000   m       hr      del
103     mohan   13000   m       finance hyd
105     naga    6000    m       finance hyd
202     bbbb    50000   f       finance hyd
203     ccc     10000   f       finance hyd
204     ddddd   50000   m       finance hyd
304     xxx     70000   m       NULL    NULL
305     yyy     80000   f       NULL    NULL
NULL    NULL    NULL    NULL    prod    hyd
NULL    NULL    NULL    NULL    admin   chennai
Time taken: 0.071 seconds
hive> 

_________________

Task:

hive> create table projects(dno1 int, dno2 int,
    >   sal int);
OK
Time taken: 0.072 seconds
hive> insert overwrite table projects
    >   select l.dno, r.dno, sal from 
    >   emp l full outer join dept r 
    >   on (l.dno=r.dno);

hive> create table proj(stat string, sal int);
hive> insert overwrite table proj
    >  select 
    > if(dno1 is not null and dno2 is not null,
    > 'Working',
    >  if(dno2 is null,'BenchTeam','BenchProj')),
    >  sal from projects;

hive> select * from proj;
OK
Working 26000
Working 25000
Working 8000
Working 10000
Working 30000
Working 13000
Working 6000
Working 50000
Working 10000
Working 50000
BenchTeam       70000
BenchTeam       80000
BenchProj       NULL
BenchProj       NULL
Time taken: 0.087 seconds
hive> 

hive> insert overwrite table proj
    >  select stat, if(sal is null,0,sal)
    >  from proj;


hive> create table summary(stat string, 
    >    totsal int, cnt int);
OK
Time taken: 0.042 seconds
hive> insert overwrite table summary
    >   select stat, sum(sal), count(*)
    >  from proj
    >    group by stat;

hive> select * from summary;
OK
BenchProj       0       2
BenchTeam       150000  2
Working 228000  10


___________________________

[training@localhost ~]$ cat trans
01/01/2010,30000
01/07/2010,40000
01/08/2010,30000
01/26/2010,40000
02/01/2010,30000
02/07/2010,40000
02/09/2010,30000
02/28/2010,40000
03/01/2010,80000
03/07/2010,40000
04/01/2010,30000
04/17/2010,70000
04/18/2010,80000
04/26/2010,90000
05/01/2010,10000
05/07/2010,20000
05/08/2010,30000
05/26/2010,60000
06/01/2010,30000
06/17/2010,50000
07/01/2010,30000
07/07/2010,40000
08/08/2010,10000
08/26/2010,20000
09/01/2010,90000
09/07/2010,40000
09/18/2010,30000
09/30/2010,40000
10/01/2010,30000
10/07/2010,40000
10/08/2010,30000
11/26/2010,40000
11/01/2010,30000
12/07/2010,40000
12/08/2010,30000
12/26/2010,40000
[training@localhost ~]$ 

hive> create table rawsales(dt string, 
    >  amt int)
    row format delimited
     fields terminated by ',';
OK
Time taken: 0.057 seconds
hive> load data local inpath 'trans'
    >  into table rawsales;
Copying data from file:/home/training/trans
Copying file: file:/home/training/trans
Loading data to table joinsdb.rawsales
OK
Time taken: 0.514 seconds
hive> 


create table raw2(dt array<string> , amt int);

insert overwrite table raw2
  select split(dt,'/') , amt from rawsales;


hive> select * from raw2 limit 3;
OK
["01","01","2010"]      30000
["01","07","2010"]      40000
["01","08","2010"]      30000
Time taken: 0.061 seconds
hive> insert overwrite table sales
    >   select 
    > concat(dt[2],'-',dt[0],'-',dt[1]), amt
    >  from raw2;

hive> create table sales2(mon int, amt int);
OK
Time taken: 0.044 seconds
hive> insert overwrite table sales2
    >   select month(dt), amt from sales;


hive> create table rep1(mon int, tot int);
OK
Time taken: 0.034 seconds
hive> insert overwrite table rep1
    >   select mon, sum(amt) from sales2
    >    group by mon;



hive> create table carts(m1 int, m2 int, 
    >   tot1 int, tot2 int);             
OK
Time taken: 0.044 seconds
hive> insert overwrite table carts
    >   select l.mon , r.mon , l.tot, r.tot
    >  from rep1 l join rep1 r;


hive> insert overwrite table carts
    >  select * from carts
    >    where (m1-m2)=1;

hive> select * from carts;
OK
12      11      110000  70000
7       6       70000   80000
2       1       140000  140000
8       7       30000   70000
4       3       270000  120000
9       8       200000  30000
10      9       100000  200000
5       4       120000  270000
11      10      70000   100000
3       2       120000  140000
6       5       80000   120000


hive> alter table carts add columns(per int);
OK
Time taken: 0.067 seconds
hive> insert overwrite table carts
    >   select m1,m2,tot1,tot2,
    >     ((tot1-tot2)*100)/tot2 from carts;

hive> select * from carts;
OK
12      11      110000  70000   57
7       6       70000   80000   -12
2       1       140000  140000  0
8       7       30000   70000   -57
4       3       270000  120000  125
9       8       200000  30000   566
10      9       100000  200000  -50
5       4       120000  270000  -55
11      10      70000   100000  -30
3       2       120000  140000  -14
6       5       80000   120000  -33
Time taken: 0.061 seconds
hive> 

Thursday, July 7, 2016

Hive lab13

Hive Lab 13 : Eliminating Duplicates and Unions (merging)


[training@localhost ~]$ cat dupes
101,aaa,10000
101,bbb,20000
101,aaa,10000
101,aaa,10000
101,aaa,10000
102,bbb,40000
103,cccc,50000
102,bbb,40000
102,bbb,40000
[training@localhost ~]$ 

hive> create database hdp;
OK
Time taken: 1.702 seconds
hive> use hdp;
OK
Time taken: 0.018 seconds
hive> create table info(id int, name string, 
    >   sal int)
    >  row format delimited fields terminated 
    >  by ',';
OK
Time taken: 0.439 seconds
hive> 

hive> load data local inpath 'dupes'
    >  into table info;
Copying data from file:/home/training/dupes
Copying file: file:/home/training/dupes
Loading data to table hdp.info
OK
Time taken: 0.209 seconds
hive> select * from info;
OK
101     aaa     10000
101     bbb     20000
101     aaa     10000
101     aaa     10000
101     aaa     10000
102     bbb     40000
103     cccc    50000
102     bbb     40000
102     bbb     40000
Time taken: 0.201 seconds
hive> 
hive> select distinct(id),name,sal
    >   from info;

Tuesday, July 5, 2016

Hive Lab 12 : Loading Array, Struct, Map collection Items

Hive Lab 12 : Loading Array, Struct, Map collection Items

[training@localhost ~]$ cat profile1
101,Venu,btech#mtech,25,hyd
102,Veni,bsc#msc#mtech,26,pune
[training@localhost ~]$ 



hive> create database pract;
OK
Time taken: 1.798 seconds
hive> use pract;
OK
Time taken: 0.027 seconds
hive> create table profile1(
    >   id int, name string, 
    >     qual  array<string>, age int,
    >     city string) 
    > row format delimited        
    >     fields terminated by ','
    >     collection items terminated by '#';

hive> load data local inpath 'profile1'
    >   into table profile1;
Copying data from file:/home/training/profile1
Copying file: file:/home/training/profile1
Loading data to table pract.profile1
OK
Time taken: 0.208 seconds
hive> select * from profile1;
OK
101     Venu    ["btech","mtech"]       25      hyd
102     Veni    ["bsc","msc","mtech"]   26      pune
Time taken: 0.199 seconds
hive> 

__________________
[training@localhost ~]$ cat profile2
101,Venu,Vani#25#btech,hyd
102,Varun,Varuna#24#mba,pune
[training@localhost ~]$ 

hive> create table profile2(
    >  id int, name string, 
    >   wife struct<name:string,age:int,qual:string>, city string)
    >   row format delimited 
    >     fields terminated by ','
    >     collection items terminated by '#';
OK
Time taken: 0.044 seconds
hive> load data local inpath 'profile2'
    >   into table profile2;
Copying data from file:/home/training/profile2
Copying file: file:/home/training/profile2
Loading data to table pract.profile2
OK
Time taken: 0.1 seconds
hive> 
hive> select * from profile2;
OK
101     Venu    {"name":"Vani","age":25,"qual":"btech"}   hyd
102     Varun   {"name":"Varuna","age":24,"qual":"mba"}   pune
Time taken: 0.066 seconds
hive> select name, wife.name from profile2;

Venu    Vani
Varun   Varuna

hive> create table info(id int, name string, 
    >   wname string, wage int, wqual string, 
    >   city string)
    > ;
OK
Time taken: 0.039 seconds
hive> insert overwrite table info
    >   select id, name, wife.name, wife.age,
    >   wife.qual , city  from profile2;

hive> select * from info;
OK
101     Venu    Vani    25      btech   hyd
102     Varun   Varuna  24      mba     pune
Time taken: 0.066 seconds
hive> 

____________________________________
[training@localhost ~]$ cat profile3
101,p1#1000$p3#2000$p7#4000,hyd
102,p1#1200$p2#5000,del
[training@localhost ~]$ 

hive> create table  trans(cid int, 
    >   plist map<string,int>,
    >  city string)
    > row format delimited 
    >    fields terminated by ','
    >    collection items terminated by '$'
    >   map keys terminated by '#';
OK
Time taken: 0.048 seconds
hive> load data local inpath 'profile3'
    >   into table trans;
Copying data from file:/home/training/profile3
Copying file: file:/home/training/profile3
Loading data to table pract.trans
OK
Time taken: 0.103 seconds

hive> select cid, plist['p1'] , city from trans;

hive>
101     1000    hyd
102     1200    del


hive> create table sales(cid int, 
    >    prid  array<string>,
    >   pr array<int>);
OK
Time taken: 0.05 seconds
hive> insert overwrite table sales
    >   select cid, map_keys(plist),
    >    map_values(plist)  from trans;


hive> select * from sales;
OK
101     ["p1","p3","p7"]        [1000,2000,4000]
102     ["p1","p2"]     [1200,5000]
Time taken: 0.056 seconds
hive> 
___________________________________







Hive lab11

Hive Lab 11 : Json Array Processing

[training@localhost ~]$ cat json3
{"name":"Ravi","qual":["btech","mtech"]}
{"name":"avani","qual":["btech","mtech","mba"]}
{"name":"avinash","qual":["btech","mba"]}
[training@localhost ~]$ 
hive> create database jsons;
OK
Time taken: 1.747 seconds
hive> use jsons;
OK
Time taken: 0.032 seconds
hive> create table raw(line string);
OK
Time taken: 0.382 seconds
hive>  

hive>  load data local inpath 'json3'
    >  into table raw;

hive> select * from raw;  
OK
{"name":"Ravi","qual":["btech","mtech"]}
{"name":"avani","qual":["btech","mtech","mba"]}
{"name":"avinash","qual":["btech","mba"]}
Time taken: 0.233 seconds
hive> 

hive> create table raw2(name string, 
    >   qual  string);
OK
Time taken: 0.047 seconds
hive> insert overwrite table raw2
    >   select x.* from raw
    > lateral view
    >  json_tuple(line,'name','qual') x
    > as n,q;

hive> select * from raw2;
OK
Ravi    ["btech","mtech"]
avani   ["btech","mtech","mba"]
avinash ["btech","mba"]
Time taken: 0.064 seconds
hive> create table raw3(name string, 
    >   qual  array<string>);
OK
Time taken: 0.037 seconds
hive> insert overwrite table raw3
    >   select name, split(qual,',') from raw2;

hive> select * from raw3;
OK
Ravi    ["[\"btech\"","\"mtech\"]"]
avani   ["[\"btech\"","\"mtech\"","\"mba\"]"]
avinash ["[\"btech\"","\"mba\"]"]
Time taken: 0.063 seconds
hive> create table raw4(name string, qual string);
OK

hive> insert overwrite table raw4
    >   select name, myq from raw3
    > lateral view explode(qual) q as myq;

 > create table info(name string, 
    >    qual string);
OK
Time taken: 0.039 seconds
hive> insert overwrite table info
    >   select name, split(qual,'"')[1]
    >  from raw4;

hive> select * from info;
OK
Ravi    btech
Ravi    mtech
avani   btech
avani   mtech
avani   mba
avinash btech
avinash mba
Time taken: 0.065 seconds
hive> 

Hbase Lab 3 : Hive and Hbase Integration

Hbase Lab 3 : Hive and Hbase Integration

Hive and Hbase Integration:
____________________________

 adv:- i)  we can process Hbase data using hql queries.
     ii) Hbase shell commands  are used for random operations over columns.
  but not for sequential filters, transformations,  aggregations  etc.

   if Hbase table is integrated with Hive,Impala , we can perform all batch related operations.

   generally, for Hive table backend storage is hdfs, if hive table is integrated with hbase,\
 Hive's tables backend is Hbase table.

 when you submit hql statement, hive starts fetching data from Hbase table, query will be executed in MapReduce Excecution model with parallel process style.

Example of Integration:
______________________

i)  
hive> create table hbaseimg1(k int, v int) 
    > stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
    > with serdeproperties("hbase.columns.mapping"=":key,x:a")
    > ;
OK
Time taken: 3.446 seconds
hive> select * from hbaseimg1;
OK
Time taken: 0.307 seconds
hive> 

hbase(main):006:0> list 'hbaseimg1'
TABLE                                             
hbaseimg1                                         
1 row(s) in 0.0710 seconds

hbase(main):007:0> 


as per above example, both hive and hbase table names are same.

 want to integrate hive table with some existed or different hbase table.

hive> create table hbaseimg2(k int, a int, b int)
    > stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
    > with serdeproperties("hbase.columns.mapping"=":key,x:a,x:b")
    > tblproperties("hbase.table.name"="hbasetabx");
OK
Time taken: 1.255 seconds
hive> 

 now hive table :  hbaseimg2
  its backend hbase table : hbasetabx


hbase(main):009:0> 
hbase(main):009:0> put 'hbasetabx',101,'x:a',10
0 row(s) in 0.0650 seconds

hbase(main):010:0> put 'hbasetabx',101,'x:b',20
0 row(s) in 0.0080 seconds

hbase(main):011:0> put 'hbasetabx',102,'x:a',100
0 row(s) in 0.0040 seconds

hbase(main):012:0> put 'hbasetabx',102,'x:b',200
0 row(s) in 0.0140 seconds

hbase(main):013:0> put 'hbasetabx',103,'x:a',1000
0 row(s) in 0.0090 seconds

hbase(main):014:0> put 'hbasetabx',103,'x:b',2000
0 row(s) in 0.0090 seconds

hbase(main):015:0> 

hbase(main):015:0> scan 'hbasetabx'
ROW           COLUMN+CELL                         
 101          column=x:a, timestamp=1467252561122,
               value=10                           
 101          column=x:b, timestamp=1467252568609,
               value=20                           
 102          column=x:a, timestamp=1467252578422,
               value=100                          
 102          column=x:b, timestamp=1467252583648,
               value=200                          
 103          column=x:a, timestamp=1467252595722,
               value=1000                         
 103          column=x:b, timestamp=1467252603469,
               value=2000                         
3 row(s) in 0.0660 seconds

hbase(main):016:0> 

hive> select * from hbaseimg1;
OK
Time taken: 0.152 seconds
hive> select * from hbaseimg2;
OK
101     10      20
102     100     200
103     1000    2000
Time taken: 0.16 seconds

_______________________________________
 mysql + sqoop + hbase + hive
____________________________

mysql> select * from emp;
+-----+-------+-------+------+------+
| id  | name  | sal   | sex  | dno  |
+-----+-------+-------+------+------+
| 101 | aaa   | 10000 | m    |   11 | 
| 102 | baaa  | 20000 | m    |   12 | 
| 103 | baaav | 30000 | f    |   12 | 
| 104 | xaaav | 50000 | f    |   11 | 
+-----+-------+-------+------+------+
4 rows in set (0.00 sec)

mysql> 
hive> create table demo1(id int, name string,
    >  sal int, sex string, dno int)
    > stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
    > with serdeproperties("hbase.columns.mapping"=":key,e:name,e:sal,e:sex,e:dno")
    > tblproperties("hbase.table.name"="hbdemo1");
OK
Time taken: 1.235 seconds
hive> select * from demo1;
OK
Time taken: 0.154 seconds
hive> 


[training@localhost ~]$ sqoop import \
>  --connect jdbc:mysql://localhost/sqdb \
>  --username root \
>  --table emp \
>  --hbase-table hbdemo1 \
>  --column-family e





hbase(main):018:0> list 'hbdemo1'
TABLE                                             
hbdemo1                                           
1 row(s) in 0.0600 seconds

hbase(main):019:0> scan 'hbdemo1'
ROW           COLUMN+CELL                         
 101          column=e:dno, timestamp=146725340767
              5, value=11                         
 101          column=e:name, timestamp=14672534076
              75, value=aaa                       
 101          column=e:sal, timestamp=146725340767
              5, value=10000                      
 101          column=e:sex, timestamp=146725340767
              5, value=m                          
 102          column=e:dno, timestamp=146725340790
              5, value=12                         
 102          column=e:name, timestamp=14672534079
              05, value=baaa                      
 102          column=e:sal, timestamp=146725340790
              5, value=20000                      
 102          column=e:sex, timestamp=146725340790
              5, value=m                          
 103          column=e:dno, timestamp=146725340905
              9, value=12                         
 103          column=e:name, timestamp=14672534090
              59, value=baaav                     
 103          column=e:sal, timestamp=146725340905
              9, value=30000                      
 103          column=e:sex, timestamp=146725340905
              9, value=f                          
 104          column=e:dno, timestamp=146725340905
              9, value=11                         
 104          column=e:name, timestamp=14672534090
              59, value=xaaav                     
 104          column=e:sal, timestamp=146725340905
              9, value=50000                      
 104          column=e:sex, timestamp=146725340905
              9, value=f                          
4 row(s) in 0.0540 seconds

hbase(main):020:0> 


hive> select * from demo1;
OK
101     aaa     10000   m       11
102     baaa    20000   m       12
103     baaav   30000   f       12
104     xaaav   50000   f       11
Time taken: 0.153 seconds
hive> 

_____________________________________

mysql> select * from ed;
+-----+-------+-------+------+-------+------+
| id  | name  | sal   | sex  | dname | loc  |
+-----+-------+-------+------+-------+------+
| 101 | aaa   | 10000 | m    | mrkt  | hyd  | 
| 102 | baaa  | 20000 | m    | hr    | del  | 
| 103 | baaav | 30000 | f    | hr    | del  | 
| 104 | xaaav | 50000 | f    | mrkt  | hyd  | 
+-----+-------+-------+------+-------+------+
4 rows in set (0.00 sec)

mysql> 

hive> create table demo2(id int, name string,
    >   sal int, sex string, 
    >  dname string, loc string)
    > stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
    > with serdeproperties("hbase.columns.mapping"=":key,e:name,e:sal,e:sex,d:dname,d:loc")
    > tblproperties("hbase.table.name"="hbdemo2");
OK
Time taken: 1.244 seconds
hive> 

[training@localhost ~]$ sqoop import  --connect jdbc:mysql://localhost/sqdb  --username root  --table ed --columns name,sal,sex,id  --hbase-table hbdemo2  --column-family e

hive> select * from demo2;
OK
101     aaa     10000   m       NULL    NULL
102     baaa    20000   m       NULL    NULL
103     baaav   30000   f       NULL    NULL
104     xaaav   50000   f       NULL    NULL
Time taken: 0.143 seconds
hive>

[training@localhost ~]$ sqoop import  --connect jdbc:mysql://localhost/sqdb  --username root  --table ed --columns dname,loc,id  --hbase-table hbdemo2  --column-family d


hive> select * from demo2;
OK
101     aaa     10000   m       mrkt    hyd
102     baaa    20000   m       hr      del
103     baaav   30000   f       hr      del
104     xaaav   50000   f       mrkt    hyd
Time taken: 0.13 seconds
hive> 
































Hbase lab2

HBase Lab2 : Importing data from RDBMS to Hbase

sqoop and hbase
__________________

Importing data from Rdbms to Hbase table.

mysql> create database  sqdb;
Query OK, 1 row affected (0.00 sec)

mysql> use sqdb;
Database changed

mysql> 


mysql> insert into emp values(101,'aaa',10000,'m',11);
Query OK, 1 row affected (0.00 sec)

mysql> insert into emp values(102,'baaa',20000,'m',12);
Query OK, 1 row affected (0.00 sec)

mysql> insert into emp values(103,'baaav',30000,'f',12);
Query OK, 1 row affected (0.00 sec)

mysql> insert into emp values(104,'xaaav',50000,'f',11);
Query OK, 1 row affected (0.00 sec)

mysql> 

mysql> select * from emp;
+-----+-------+-------+------+------+
| id  | name  | sal   | sex  | dno  |
+-----+-------+-------+------+------+
| 101 | aaa   | 10000 | m    |   11 | 
| 102 | baaa  | 20000 | m    |   12 | 
| 103 | baaav | 30000 | f    |   12 | 
| 104 | xaaav | 50000 | f    |   11 | 
+-----+-------+-------+------+------+
4 rows in set (0.00 sec)

mysql> 

hbase(main):047:0> create 'heroes1','e'
0 row(s) in 1.1840 seconds

hbase(main):048:0> 

[training@localhost ~]$ sqoop import \
>  --connect jdbc:mysql://localhost/sqdb \
>  --username root \
>  --table emp  \
>  --hbase-table heroes1 \
>  --column-family e

hbase(main):048:0> scan 'heroes1'
ROW           COLUMN+CELL                         
 101          column=e:dno, timestamp=146716789297
              1, value=11                         
 101          column=e:name, timestamp=14671678929
              71, value=aaa                       
 101          column=e:sal, timestamp=146716789297
              1, value=10000                      
 101          column=e:sex, timestamp=146716789297
              1, value=m                          
 102          column=e:dno, timestamp=146716789311
              0, value=12                         
 102          column=e:name, timestamp=14671678931
              10, value=baaa                      
 102          column=e:sal, timestamp=146716789311
              0, value=20000                      
 102          column=e:sex, timestamp=146716789311
              0, value=m                          
 103          column=e:dno, timestamp=146716789419
              1, value=12                         
 103          column=e:name, timestamp=14671678941
              91, value=baaav                     
 103          column=e:sal, timestamp=146716789419
              1, value=30000                      
 103          column=e:sex, timestamp=146716789419
              1, value=f                          
 104          column=e:dno, timestamp=146716789419
              1, value=11                         
 104          column=e:name, timestamp=14671678941
              91, value=xaaav                     
 104          column=e:sal, timestamp=146716789419
              1, value=50000                      
 104          column=e:sex, timestamp=146716789419
              1, value=f                          
4 row(s) in 0.0360 seconds

hbase(main):049:0> 
-----

if target table (hbase) not existed.

[training@localhost ~]$ sqoop import  --connect jdbc:mysql://localhost/sqdb  --username root  --table emp   --hbase-table heroes2  --column-family e   --hbase-create-table

----------------------------

if table does not have primary key.



mysql> create table emp2(id int, name char(10),
    ->   sal int, sex char(1), dno int);
Query OK, 0 rows affected (0.01 sec)

mysql> insert into emp2 select * from emp;
Query OK, 4 rows affected (0.00 sec)
Records: 4  Duplicates: 0  Warnings: 0

mysql> select * from emp2;
+------+-------+-------+------+------+
| id   | name  | sal   | sex  | dno  |
+------+-------+-------+------+------+
|  101 | aaa   | 10000 | m    |   11 | 
|  102 | baaa  | 20000 | m    |   12 | 
|  103 | baaav | 30000 | f    |   12 | 
|  104 | xaaav | 50000 | f    |   11 | 
+------+-------+-------+------+------+
4 rows in set (0.00 sec)

mysql> insert into emp2 values(101,'xxxx',100000,'m',13);
Query OK, 1 row affected (0.00 sec)

mysql> select * from emp2;
+------+-------+--------+------+------+
| id   | name  | sal    | sex  | dno  |
+------+-------+--------+------+------+
|  101 | aaa   |  10000 | m    |   11 | 
|  102 | baaa  |  20000 | m    |   12 | 
|  103 | baaav |  30000 | f    |   12 | 
|  104 | xaaav |  50000 | f    |   11 | 
|  101 | xxxx  | 100000 | m    |   13 | 
+------+-------+--------+------+------+
5 rows in set (0.00 sec)

mysql> 

[training@localhost ~]$ sqoop import  --connect jdbc:mysql://localhost/sqdb  --username root  --table emp2 -m 1  --hbase-table heroines --hbase-row-key id   --column-family e   --hbase-create-table

_____________________________

mysql> select * from emp;
+-----+-------+-------+------+------+
| id  | name  | sal   | sex  | dno  |
+-----+-------+-------+------+------+
| 101 | aaa   | 10000 | m    |   11 | 
| 102 | baaa  | 20000 | m    |   12 | 
| 103 | baaav | 30000 | f    |   12 | 
| 104 | xaaav | 50000 | f    |   11 | 
+-----+-------+-------+------+------+
4 rows in set (0.00 sec)

mysql> create table dept(dno int, name char(10),
    ->   loc char(10));
Query OK, 0 rows affected (0.01 sec)

mysql> insert into dept values(11,'mrkt','hyd')
    -> ;
Query OK, 1 row affected (0.00 sec)

mysql> insert into dept values(12,'hr','del')
    -> ;
Query OK, 1 row affected (0.00 sec)

mysql> select * from dept;
+------+------+------+
| dno  | name | loc  |
+------+------+------+
|   11 | mrkt | hyd  | 
|   12 | hr   | del  | 
+------+------+------+
2 rows in set (0.00 sec)

mysql> create table ed(id int primary key, 
    ->   name char(10),
    ->  sal int,
    ->   sex char(1),
    ->   dname char(10),
    ->  loc char(10));
Query OK, 0 rows affected (0.00 sec)

mysql> 

mysql> insert into ed 
    ->  select id, l.name, sal, sex, r.name,loc    ->   from emp l join dept r
    ->  where (l.dno=r.dno);
Query OK, 4 rows affected (0.00 sec)
Records: 4  Duplicates: 0  Warnings: 0

mysql> select * from ed;
+-----+-------+-------+------+-------+------+
| id  | name  | sal   | sex  | dname | loc  |
+-----+-------+-------+------+-------+------+
| 101 | aaa   | 10000 | m    | mrkt  | hyd  | 
| 102 | baaa  | 20000 | m    | hr    | del  | 
| 103 | baaav | 30000 | f    | hr    | del  | 
| 104 | xaaav | 50000 | f    | mrkt  | hyd  | 
+-----+-------+-------+------+-------+------+
4 rows in set (0.00 sec)

mysql> 


mysql> select * from ed;
+-----+-------+-------+------+-------+------+
| id  | name  | sal   | sex  | dname | loc  |
+-----+-------+-------+------+-------+------+
| 101 | aaa   | 10000 | m    | mrkt  | hyd  | 
| 102 | baaa  | 20000 | m    | hr    | del  | 
| 103 | baaav | 30000 | f    | hr    | del  | 
| 104 | xaaav | 50000 | f    | mrkt  | hyd  | 
+-----+-------+-------+------+-------+------+
4 rows in set (0.00 sec)

mysql> 

[training@localhost ~]$ sqoop import  --connect jdbc:mysql://localhost/sqdb  --username root  --table ed  --columns name,sal,sex,id --hbase-table heroines2   --column-family e   --hbase-create-table



[training@localhost ~]$ sqoop import  --connect jdbc:mysql://localhost/sqdb  --username root  --table ed  --columns dname,loc,id  --hbase-table heroines2   --column-family d  --hbase-create-table
______________________________