Conversion of Hive Queries To MapReduce and Vice Versa
Conversion of Hive Queries To MapReduce and Vice Versa
Conversion of Hive Queries To MapReduce and Vice Versa
Q.1 Schema format: TableName|ColumnName:ColumnType|ColumnName:ColumnType|... Supported data type: INTEGER, DECIMAL, DATE and TEXT. T1|F1:INTEGER|F2:INTEGER T2|F3:INTEGER|F4:INTEGER
Answer.1 : There are 2 result files. Each file represents a MapReduce job. TestQuery1.java TestQuery2.java TestQuery1.java Source Code:
public class TestQuery1 extends Configured implements Tool{ public static class Map extends Mapper<Object, Text,Text,Text>{ Hashtable<String,Double>[] adv_gb_output=new Hashtable[1]; Hashtable<String,Integer> adv_count_output=new Hashtable<String,Integer>(); public void setup(Context context) throws IOException, InterruptedException { for(int i =0;i<1;i++){ adv_gb_output[i] = new Hashtable<String,Double>(); } } public void cleanup(Context context) throws IOException, InterruptedException { for(String tmp_key:adv_count_output.keySet()){ Double count = (double) adv_count_output.get(tmp_key); adv_gb_output[0].put(tmp_key.toString(),count); context.write(new Text(tmp_key.toString()),new Text(count + "&"+"|")); } } public void map(Object key, Text value, Context context) throws IOException,InterruptedException{
String line = value.toString(); String[] line_buf = new String[2]; int prev=0,i=0,n=0; for(i=0,n=0,prev=0;i<line.length();i++){ if (line.charAt(i) == '|'){ line_buf[n] = line.substring(prev,i); n = n+1; prev = i+1; } if(n == 2) break; } if(n<2) line_buf[n] = line.substring(prev,i); String hash_key = line_buf[0]+"|"+line_buf[1]+"|"; if(adv_count_output.containsKey(hash_key)){ Integer count = adv_count_output.get(hash_key)+1; adv_count_output.put(hash_key,count); }else{ adv_count_output.put(hash_key,1); } } } public static class Reduce extends Reducer<Text,Text,NullWritable,Text>{ public void reduce(Text key, Iterable<Text> v, Context context) throws IOException,InterruptedException{ Iterator values = v.iterator(); Double[] result = new Double[1]; ArrayList[] d_count_buf = new ArrayList[1]; String tmp = ""; for(int i=0;i<1;i++){ result[i] = 0.0; d_count_buf[i] = new ArrayList(); } int[] al_line = new int[1]; for(int i=0;i<1;i++){ al_line[0] = 0; } int tmp_count = 0; while(values.hasNext()){ String[] tmp_buf = values.next().toString().split("\\|"); tmp = key.toString(); String[] agg_tmp; agg_tmp = tmp_buf[0].split("&"); al_line[0]+= Double.parseDouble(agg_tmp[0]); tmp_count++; } String[] line_buf = tmp.split("\\|");
result[0] = (double)al_line[0]; NullWritable key_op = NullWritable.get(); context.write(key_op,new Text((result[0]) + "|")); } } public int run(String[] args) throws Exception{ Configuration conf = new Configuration(); Job job = new Job(conf,"TestQuery1"); job.setJarByClass(TestQuery1.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return (job.waitForCompletion(true) ? 0 : 1); } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new TestQuery1(), args); System.exit(res); } }
public class TestQuery2 extends Configured implements Tool{ public static class Map extends Text,IntWritable,Text>{ Mapper<Object,
private int left = 0; public void setup(Context context) throws IOException, InterruptedException { int last_index = -1, start_index = -1; String path = ((FileSplit)context.getInputSplit()).getPath().toString(); last_index = path.lastIndexOf('/'); last_index = last_index - 1; start_index = path.lastIndexOf('/',last_index); String f_name = path.substring(start_index+1,last_index+1); if(f_name.compareTo("T1") == 0 ) left = 1; }
public void map(Object key, Text value,Context context) throws IOException,InterruptedException{ String line = value.toString(); int prev=0,i=0,n=0; if(this.left == 1){ String[] line_buf = new String[2]; for(i=0,n=0,prev=0;i<line.length();i++){ if (line.charAt(i) == '|'){ line_buf[n] = line.substring(prev,i); n = n+1; prev = i+1; } if(n == 2) break; } if(n<2) line_buf[n] = line.substring(prev,i); context.write(new IntWritable(Integer.parseInt(line_buf[0])), new Text("L"+"|" +Integer.parseInt(line_buf[1])+ "|" +Integer.parseInt(line_buf[0])+ "|" )); }else{ String[] line_buf = new String[2]; for(i=0,n=0,prev=0;i<line.length();i++){ if (line.charAt(i) == '|'){ line_buf[n] = line.substring(prev,i); n = n+1; prev = i+1; } if(n == 2) break; } if(n<2) line_buf[n] = line.substring(prev,i); context.write(new IntWritable(Integer.parseInt(line_buf[0])), new Text("R"+"|" +Integer.parseInt(line_buf[1])+ "|" +Integer.parseInt(line_buf[0])+ "|" )); } } } public static class Reduce extends Reducer<IntWritable,Text,NullWritable,Text>{ public void reduce(IntWritable key, Iterable<Text> v, Context context) throws IOException,InterruptedException{ Iterator values = v.iterator(); ArrayList al_left = new ArrayList(); ArrayList al_right = new ArrayList(); while(values.hasNext()){
String tmp = values.next().toString(); if(tmp.charAt(0) == 'L'){ al_left.add(tmp.substring(2)); }else{ al_right.add(tmp.substring(2)); } } NullWritable key_op = NullWritable.get(); for(int i=0;i<al_left.size();i++){ String[] left_buf = ((String)al_left.get(i)).split("\\|"); for(int j=0;j<al_right.size();j++){ String[] right_buf = ((String)al_right.get(j)).split("\\|"); if(Integer.parseInt(left_buf[1]) == Integer.parseInt(right_buf[1])){ context.write(key_op, new Text(Integer.parseInt(left_buf[0])+ "|" +Integer.parseInt(right_buf[0])+ "|" )); } } } } } public int run(String[] args) throws Exception{ Configuration conf = new Configuration(); Job job = new Job(conf,"TestQuery2"); job.setJarByClass(TestQuery2.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); FileInputFormat.addInputPath(job,new Path(args[0])); FileInputFormat.addInputPath(job,new Path(args[1])); FileOutputFormat.setOutputPath(job, new Path(args[2])); return (job.waitForCompletion(true) ? 0 : 1); } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new TestQuery2(), args); System.exit(res); } }
select l_returnflag, l_linestatus, sum(l_quantity) as sum_qty, sum(l_extendedprice) as sum_base_price, sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, avg(l_quantity) as avg_qty, avg(l_extendedprice) as avg_price, avg_disc, count(*) as count_order from avg(l_discount) as
Answer 2:
TestQuery1.java
public class TestQuery1 extends Configured implements Tool{ public static class Map extends Mapper<Object, Text,Text,Text>{
public void map(Object key, Text value, Context context) throws IOException,InterruptedException{ String line = value.toString(); String[] line_buf = new String[12]; int prev=0,i=0,n=0; for(i=0,n=0,prev=0;i<line.length();i++){ if (line.charAt(i) == '|'){ line_buf[n] = line.substring(prev,i); n = n+1; prev = i+1; } if(n == 12) break; } if(n<12) line_buf[n] = line.substring(prev,i); context.write(new Text(line_buf[0] +"|"+line_buf[1] +"|"),new Text(line_buf[2] +"|"+line_buf[3] +"|"+line_buf[4] +"|"+line_buf[5] +"|"+line_buf[6] +"|"+line_buf[7] +"|"+line_buf[8] +"|"+line_buf[9] +"|"+line_buf[10] +"|"+line_buf[11] +"|")); } }
public static class Reduce extends Reducer<Text,Text,NullWritable,Text>{ public void reduce(Text key, Iterable<Text> v, Context context) throws IOException,InterruptedException{ Iterator values = v.iterator(); NullWritable key_op = NullWritable.get(); while(values.hasNext()){ String tmp = values.next().toString(); context.write(key_op,new Text(tmp)); } } } public int run(String[] args) throws Exception{ Configuration conf = new Configuration(); conf.set("mapreduce.partition.keycomparator.options","-k1,1 -k2,2 "); conf.set("mapreduce.map.output.key.field.separator", "|"); Job job = new Job(conf, "TestQuery1"); job.setJarByClass(TestQuery1.class); job.setSortComparatorClass(KeyFieldBasedComparator.class); job.setPartitionerClass(KeyFieldBasedPartitioner.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return (job.waitForCompletion(true) ? 0 : 1); } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new TestQuery1(),args); System.exit(res); } }
TestQuery2.java
public class TestQuery2 extends Configured implements Tool{ public static class Map extends Mapper<Object, Text,Text,Text>{
Hashtable<String,Double>[] adv_gb_output=new Hashtable[8]; Hashtable<String,Integer> adv_count_output=new Hashtable<String,Integer>(); public void setup(Context context) throws IOException, InterruptedException { for(int i =0;i<8;i++){ adv_gb_output[i] = new Hashtable<String,Double>(); } } public void cleanup(Context context) throws IOException, InterruptedException { for(String tmp_key:adv_count_output.keySet()){ Double count = (double) adv_count_output.get(tmp_key); Double tmp_0 = adv_gb_output[0].get(tmp_key); Double tmp_1 = adv_gb_output[1].get(tmp_key); Double tmp_2 = adv_gb_output[2].get(tmp_key); Double tmp_3 = adv_gb_output[3].get(tmp_key); Double avg_4 = adv_gb_output[4].get(tmp_key); Double avg_5 = adv_gb_output[5].get(tmp_key); Double avg_6 = adv_gb_output[6].get(tmp_key); adv_gb_output[7].put(tmp_key.toString(),count); context.write(new Text(tmp_key.toString()),new Text(tmp_0 + "&"+"|"+tmp_1 + "&"+"|"+tmp_2 + "&"+"|"+tmp_3 + "&"+"|"+avg_4 + "&"+count+"|"+avg_5 + "&"+count+"|"+avg_6 + "&"+count+"|"+count + "&"+"|")); } } public void map(Object key, Text value, Context context) throws IOException,InterruptedException{ String line = value.toString(); String[] line_buf = new String[11]; int prev=0,i=0,n=0; for(i=0,n=0,prev=0;i<line.length();i++){ if (line.charAt(i) == '|'){ line_buf[n] = line.substring(prev,i); n = n+1; prev = i+1; } if(n == 11) break; } if(n<11) line_buf[n] = line.substring(prev,i); String hash_key = line_buf[8]+ "|" +line_buf[9]+ "|" ;
if(line_buf[10].compareTo("1998-09-04") <= 0){ if(adv_count_output.containsKey(hash_key)){ Integer count = adv_count_output.get(hash_key)+1; adv_count_output.put(hash_key,count); }else{ adv_count_output.put(hash_key,1); } if(adv_gb_output[0].containsKey(hash_key)){ Double sum_tmp = (double)Double.parseDouble(line_buf[4]); sum_tmp += adv_gb_output[0].get(hash_key); adv_gb_output[0].put(hash_key, sum_tmp); }else{ adv_gb_output[0].put(hash_key,(double)Double.parseDouble(line_buf[4 ])); } if(adv_gb_output[1].containsKey(hash_key)){ Double sum_tmp = (double)Double.parseDouble(line_buf[5]); sum_tmp += adv_gb_output[1].get(hash_key); adv_gb_output[1].put(hash_key, sum_tmp); }else{ adv_gb_output[1].put(hash_key,(double)Double.parseDouble(line_buf[5 ])); } if(adv_gb_output[2].containsKey(hash_key)){ Double sum_tmp = (double)((Double.parseDouble(line_buf[5]) * (1 Double.parseDouble(line_buf[6])))); sum_tmp += adv_gb_output[2].get(hash_key); adv_gb_output[2].put(hash_key, sum_tmp); }else{ adv_gb_output[2].put(hash_key,(double)((Double.parseDouble(line_buf [5]) * (1 - Double.parseDouble(line_buf[6]))))); } if(adv_gb_output[3].containsKey(hash_key)){ Double sum_tmp = (double)(((Double.parseDouble(line_buf[5]) * (1 Double.parseDouble(line_buf[6]))) * (1 + Double.parseDouble(line_buf[7])))); sum_tmp += adv_gb_output[3].get(hash_key); adv_gb_output[3].put(hash_key, sum_tmp); }else{ adv_gb_output[3].put(hash_key,(double)(((Double.parseDouble(line_bu f[5]) * (1 - Double.parseDouble(line_buf[6]))) * (1 + Double.parseDouble(line_buf[7]))))); } if(adv_gb_output[4].containsKey(hash_key)){ Double sum_tmp = (double)Double.parseDouble(line_buf[4]); sum_tmp += adv_gb_output[4].get(hash_key); adv_gb_output[4].put(hash_key, sum_tmp); }else{
adv_gb_output[4].put(hash_key,(double)Double.parseDouble(line_buf[4 ])); } if(adv_gb_output[5].containsKey(hash_key)){ Double sum_tmp = (double)Double.parseDouble(line_buf[5]); sum_tmp += adv_gb_output[5].get(hash_key); adv_gb_output[5].put(hash_key, sum_tmp); }else{ adv_gb_output[5].put(hash_key,(double)Double.parseDouble(line_buf[5 ])); } if(adv_gb_output[6].containsKey(hash_key)){ Double sum_tmp = (double)Double.parseDouble(line_buf[6]); sum_tmp += adv_gb_output[6].get(hash_key); adv_gb_output[6].put(hash_key, sum_tmp); }else{ adv_gb_output[6].put(hash_key,(double)Double.parseDouble(line_buf[6 ])); } } } } public static class Reduce extends Reducer<Text,Text,NullWritable,Text>{ public void reduce(Text key, Iterable<Text> v, Context context) throws IOException,InterruptedException{ Iterator values = v.iterator(); Double[] result = new Double[8]; ArrayList[] d_count_buf = new ArrayList[8]; String tmp = ""; for(int i=0;i<8;i++){ result[i] = 0.0; d_count_buf[i] = new ArrayList(); } int[] al_line = new int[8]; for(int i=0;i<8;i++){ al_line[7] = 0; } int tmp_count = 0; while(values.hasNext()){ String[] tmp_buf = values.next().toString().split("\\|"); tmp = key.toString(); String[] agg_tmp; agg_tmp = tmp_buf[0].split("&"); result[0] += Double.parseDouble(agg_tmp[0]); agg_tmp = tmp_buf[1].split("&"); result[1] += Double.parseDouble(agg_tmp[0]);
agg_tmp = tmp_buf[2].split("&"); result[2] += Double.parseDouble(agg_tmp[0]); agg_tmp = tmp_buf[3].split("&"); result[3] += Double.parseDouble(agg_tmp[0]); agg_tmp = tmp_buf[4].split("&"); result[4] += Double.parseDouble(agg_tmp[0]); al_line[4]+= Double.parseDouble(agg_tmp[1]); agg_tmp = tmp_buf[5].split("&"); result[5] += Double.parseDouble(agg_tmp[0]); al_line[5]+= Double.parseDouble(agg_tmp[1]); agg_tmp = tmp_buf[6].split("&"); result[6] += Double.parseDouble(agg_tmp[0]); al_line[6]+= Double.parseDouble(agg_tmp[1]); agg_tmp = tmp_buf[7].split("&"); al_line[7]+= Double.parseDouble(agg_tmp[0]); tmp_count++; } String[] line_buf = tmp.split("\\|"); result[4] = result[4] /al_line[4]; result[5] = result[5] /al_line[5]; result[6] = result[6] /al_line[6]; result[7] = (double)al_line[7]; NullWritable key_op = NullWritable.get(); context.write(key_op,new Text(line_buf[0] + "|"+line_buf[1] + "|"+line_buf[0] + "|"+line_buf[1] + "|"+(result[0]) + "|"+(result[1]) + "|"+(result[2]) + "|"+(result[3]) + "|"+(result[4]) + "|"+(result[5]) + "|"+(result[6]) + "|"+(result[7]) + "|")); } } public int run(String[] args) throws Exception{ Configuration conf = new Configuration(); Job job = new Job(conf,"TestQuery2"); job.setJarByClass(TestQuery2.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return (job.waitForCompletion(true) ? 0 : 1); } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new TestQuery2(), args); System.exit(res); } }
Question 3:
Query: Query: select d_year from ddate where d_year >=1992 and d_year <= 1997;
Answer:
TestQuery1.java
public class TestQuery1 extends Configured implements Tool{ public static class Map extends Mapper<Object, Text,NullWritable,IntWritable>{ public void map(Object key, Text value, Context context) throws IOException,InterruptedException{ String line = value.toString(); String[] line_buf = new String[1]; int prev=0,i=0,n=0; for(i=0,n=0,prev=0;i<line.length();i++){ if (line.charAt(i) == '|'){ line_buf[n] = line.substring(prev,i); n = n+1; prev = i+1; } if(n == 1) break; } if(n<1) line_buf[n] = line.substring(prev,i); if(Integer.parseInt(line_buf[0]) >= 1992 && Integer.parseInt(line_buf[0]) <= 1997){ NullWritable key_op = NullWritable.get(); context.write(key_op , new IntWritable(Integer.parseInt(line_buf[0]))); } } } public int run(String[] args) throws Exception{ Configuration conf = new Configuration(); Job job = new Job(conf,"TestQuery1"); job.setJarByClass(TestQuery1.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1])); return (job.waitForCompletion(true) ? 0 : 1); } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new TestQuery1(), args); System.exit(res); } }