Hive Lab 13 : Eliminating Duplicates and Unions (merging)
[training@localhost ~]$ cat dupes
101,aaa,10000
101,bbb,20000
101,aaa,10000
101,aaa,10000
101,aaa,10000
102,bbb,40000
103,cccc,50000
102,bbb,40000
102,bbb,40000
[training@localhost ~]$
hive> create database hdp;
OK
Time taken: 1.702 seconds
hive> use hdp;
OK
Time taken: 0.018 seconds
hive> create table info(id int, name string,
> sal int)
> row format delimited fields terminated
> by ',';
OK
Time taken: 0.439 seconds
hive>
hive> load data local inpath 'dupes'
> into table info;
Copying data from file:/home/training/dupes
Copying file: file:/home/training/dupes
Loading data to table hdp.info
OK
Time taken: 0.209 seconds
hive> select * from info;
OK
101 aaa 10000
101 bbb 20000
101 aaa 10000
101 aaa 10000
101 aaa 10000
102 bbb 40000
103 cccc 50000
102 bbb 40000
102 bbb 40000
Time taken: 0.201 seconds
hive>
hive> select distinct(id),name,sal
> from info;
No comments:
Post a Comment