Hadoop: Mr 3

MR Lab3
grouping By Multiple columns.

ex:

select dno, sex, sum(sal) from emp
group by dno, sex;

DnoSexSalMap.java
--------------------
package mr.analytics;

import java.io.IOException;

import
org.apache.hadoop.io.IntWritable;
import
org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import
org.apache.hadoop.mapreduce.Mapper;
public class DnoSexSalMap extends

Mapper
<LongWritable,Text,Text,IntWritable>
{
     // file : emp
     // schema : id,name,sal,sex,dno
    // delimiter : "," (comma)
// sample row : 101,amar,20000,m,11
//   sex as key, sal as value.
    public void map(LongWritable
k,Text v,
            Context con)
     throws IOException,
InterruptedException
     {
        String line =
v.toString();
      String[] w = line.split(",");
      String sex = w[3];
      String dno = w[4];
      String myKey = dno+"\t"+sex;
     int sal =Integer.parseInt(w[2]);
    con.write(new Text(myKey),new
IntWritable(sal));
     }
}

----------------
Driver8.java
----------------

package mr.analytics;

import

org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import

org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import

org.apache.hadoop.mapreduce.lib.input.F

ileInputFormat;
import

org.apache.hadoop.mapreduce.lib.output.

FileOutputFormat;

public class Driver8
{
public static void main(String

[] args)
     throws Exception
     {
        Configuration c = new

Configuration();
Job j = new Job

(c,"d8");
j.setJarByClass

(Driver8.class);
j.setMapperClass

(DnoSexSalMap.class);
j.setReducerClass

(RedForSum.class);
j.setOutputKeyClass

(Text.class);
j.setOutputValueClass

(IntWritable.class);
Path p1 = new Path

(args[0]); //input
Path p2 = new Path

(args[1]); //output

FileInputFormat.addInputPath(j,p1);
FileOutputFormat.setOutputPath(j, p2);

System.exit(j.waitForCompletion(true) ?

0:1);
}
}

--------------------------

submit:

[training@localhost ~]$ hadoop fs -cat

mrlab/r8/part-r-00000
11      f       25000
11      m       26000
12      f       18000
13      m       19000

______________________________

Hadoop

Wednesday, August 31, 2016

Mr 3

No comments:

Post a Comment