MR Progs For Self Excercise
MR Progs For Self Excercise
MR Progs For Self Excercise
package com.sample;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.util.Tool;
package com.sample;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException
{
String line = value.toString();
String[] words = line.split(" ");
for (int i = 0; i < words.length; i++)
{
context.write(new Text(words[i]), one);
}
}
}
Reducer Class of WordCount: (WordCountReducer.java)
package com.sample;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.io.Text;
1. Create a wordcount project in Eclipse and create driver, mapper and reducer classes in it.
2. Go to project build path and add all the jar files (or) the following jars:
hadoop-common. jar (/usr/lib/hadoop/hadoop-common.jar)
hadoop-core. jar (/usr/lib/hadoop-0.20mapreduce/hadoop-core.jar)
3. Now, once the errors are resolved, right click on your project and export the
jarfile.(wordcount.jar)
4. Move the input dataset of wordcount (wc1.txt and wc2.txt) from local filesystem to
hadoopfilesystem.
hadoop fs -mkdir /user/cloudera/wordcount_input
hadoop fs -put wc1.txt /user/cloudera/wordcount_input
hadoop fs -put wc2.txt/user/cloudera/wordcount_input
package weather;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
job.setMapperClass(WeatherMapper.class);
job.setReducerClass(WeatherReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(FloatWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
MultipleOutputs.addNamedOutput(job, caOutputName, TextOutputFormat.class,
Text.class, Text.class);
MultipleOutputs.addNamedOutput(job, nyOutputName, TextOutputFormat.class,
Text.class, Text.class);
MultipleOutputs.addNamedOutput(job, njOutputName, TextOutputFormat.class,
Text.class, Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
Mapper Class of Weather Report: (WeatherMapper.java)
package weather;
import java.io.IOException;
importjava.util.StringTokenizer;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public void map(Object key, Text dayReport, Context context) throws IOException,
InterruptedException
{
StringTokenizer st2 = new StringTokenizer(dayReport.toString(), "\t");
int counter = 0;
String cityDateString = "";
String maxTempTime = "";
String minTempTime = "";
String curTime = "";
float curTemp = 0;
float minTemp = Float.MAX_VALUE;
float maxTemp = Float.MIN_VALUE;
while (st2.hasMoreElements())
{
if (counter == 0)
{
cityDateString = st2.nextToken();
}
else
{
if (counter % 2 == 1)
{
curTime = st2.nextToken();
}
else if (counter % 2 == 0)
{
curTemp =Float.parseFloat(st2.nextToken());
if (minTemp > curTemp)
{
minTemp = curTemp;
minTempTime = curTime;
}
else if (maxTemp < curTemp)
{
maxTemp = curTemp;
maxTempTime = curTime;
}
}
}
counter++;
}
fValue.set(maxTemp);
cityDate.set(cityDateString);
context.write(cityDate, fValue);
fValue.set(minTemp);
cityDate.set(cityDateString);
context.write(cityDate, fValue);
}
}
Reducer Class of Weather Report: (WeatherReducer.java)
package weather;
import java.io.IOException;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
@Override
public void cleanup(Context context) throws IOException, InterruptedException
{
mos.close();
}
}
1. Create a weather project in Eclipse and create driver,mapper and reducer classes in it.
2. Go to project build path and add all the jar files (or) the following jars:
hadoop-common. jar (/usr/lib/hadoop/hadoop-common.jar)
hadoop-core. jar (/usr/lib/hadoop-0.20mapreduce/hadoop-core.jar)
3. Now, once the errors are resolved, right click on your project and export the jarfile.
4. Move the input dataset of weather report (wreport.txt) from local filesystem to
hadoopfilesystem.
package matrix;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
package matrix;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
}
}
Reducer Class of Matrix Multiplication: (MatrixReducer.java)
package matrix;
import java.io.IOException;
import java.util.HashMap;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
1. Create a Matrix project in Eclipse and create driver, mapper and reducer classes in it.
2. Go to project build path and add all the jar files (or) the following jars:
hadoop-common. jar(/usr/lib/hadoop/hadoop-common.jar)
hadoop-core. jar (/usr/lib/hadoop-0.20mapreduce/hadoop- core.jar)
3. Now, once the errors are resolved, right click on your project and export the jarfile.
4. Move the input dataset of Matrix Multiplication (data1.txt or data2.txt) from local
filesystem to hadoopfilesystem.
hadoop fs -cat/user/cloudera/matrix_output/part-r-00000