Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                

Conversion of Hive Queries To MapReduce and Vice Versa

Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 13

Conversion of Hive Queries to MapReduce and vice versa:

Q.1 Schema format: TableName|ColumnName:ColumnType|ColumnName:ColumnType|... Supported data type: INTEGER, DECIMAL, DATE and TEXT. T1|F1:INTEGER|F2:INTEGER T2|F3:INTEGER|F4:INTEGER

QUERY: select count(*) from

t1, t2 where t1.f1 = t2.f3 group by t1.f2, t2.f4;

Answer.1 : There are 2 result files. Each file represents a MapReduce job. TestQuery1.java TestQuery2.java TestQuery1.java Source Code:

public class TestQuery1 extends Configured implements Tool{ public static class Map extends Mapper<Object, Text,Text,Text>{ Hashtable<String,Double>[] adv_gb_output=new Hashtable[1]; Hashtable<String,Integer> adv_count_output=new Hashtable<String,Integer>(); public void setup(Context context) throws IOException, InterruptedException { for(int i =0;i<1;i++){ adv_gb_output[i] = new Hashtable<String,Double>(); } } public void cleanup(Context context) throws IOException, InterruptedException { for(String tmp_key:adv_count_output.keySet()){ Double count = (double) adv_count_output.get(tmp_key); adv_gb_output[0].put(tmp_key.toString(),count); context.write(new Text(tmp_key.toString()),new Text(count + "&"+"|")); } } public void map(Object key, Text value, Context context) throws IOException,InterruptedException{

String line = value.toString(); String[] line_buf = new String[2]; int prev=0,i=0,n=0; for(i=0,n=0,prev=0;i<line.length();i++){ if (line.charAt(i) == '|'){ line_buf[n] = line.substring(prev,i); n = n+1; prev = i+1; } if(n == 2) break; } if(n<2) line_buf[n] = line.substring(prev,i); String hash_key = line_buf[0]+"|"+line_buf[1]+"|"; if(adv_count_output.containsKey(hash_key)){ Integer count = adv_count_output.get(hash_key)+1; adv_count_output.put(hash_key,count); }else{ adv_count_output.put(hash_key,1); } } } public static class Reduce extends Reducer<Text,Text,NullWritable,Text>{ public void reduce(Text key, Iterable<Text> v, Context context) throws IOException,InterruptedException{ Iterator values = v.iterator(); Double[] result = new Double[1]; ArrayList[] d_count_buf = new ArrayList[1]; String tmp = ""; for(int i=0;i<1;i++){ result[i] = 0.0; d_count_buf[i] = new ArrayList(); } int[] al_line = new int[1]; for(int i=0;i<1;i++){ al_line[0] = 0; } int tmp_count = 0; while(values.hasNext()){ String[] tmp_buf = values.next().toString().split("\\|"); tmp = key.toString(); String[] agg_tmp; agg_tmp = tmp_buf[0].split("&"); al_line[0]+= Double.parseDouble(agg_tmp[0]); tmp_count++; } String[] line_buf = tmp.split("\\|");

result[0] = (double)al_line[0]; NullWritable key_op = NullWritable.get(); context.write(key_op,new Text((result[0]) + "|")); } } public int run(String[] args) throws Exception{ Configuration conf = new Configuration(); Job job = new Job(conf,"TestQuery1"); job.setJarByClass(TestQuery1.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return (job.waitForCompletion(true) ? 0 : 1); } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new TestQuery1(), args); System.exit(res); } }

TestQuery2.java Source Code:

public class TestQuery2 extends Configured implements Tool{ public static class Map extends Text,IntWritable,Text>{ Mapper<Object,

private int left = 0; public void setup(Context context) throws IOException, InterruptedException { int last_index = -1, start_index = -1; String path = ((FileSplit)context.getInputSplit()).getPath().toString(); last_index = path.lastIndexOf('/'); last_index = last_index - 1; start_index = path.lastIndexOf('/',last_index); String f_name = path.substring(start_index+1,last_index+1); if(f_name.compareTo("T1") == 0 ) left = 1; }

public void map(Object key, Text value,Context context) throws IOException,InterruptedException{ String line = value.toString(); int prev=0,i=0,n=0; if(this.left == 1){ String[] line_buf = new String[2]; for(i=0,n=0,prev=0;i<line.length();i++){ if (line.charAt(i) == '|'){ line_buf[n] = line.substring(prev,i); n = n+1; prev = i+1; } if(n == 2) break; } if(n<2) line_buf[n] = line.substring(prev,i); context.write(new IntWritable(Integer.parseInt(line_buf[0])), new Text("L"+"|" +Integer.parseInt(line_buf[1])+ "|" +Integer.parseInt(line_buf[0])+ "|" )); }else{ String[] line_buf = new String[2]; for(i=0,n=0,prev=0;i<line.length();i++){ if (line.charAt(i) == '|'){ line_buf[n] = line.substring(prev,i); n = n+1; prev = i+1; } if(n == 2) break; } if(n<2) line_buf[n] = line.substring(prev,i); context.write(new IntWritable(Integer.parseInt(line_buf[0])), new Text("R"+"|" +Integer.parseInt(line_buf[1])+ "|" +Integer.parseInt(line_buf[0])+ "|" )); } } } public static class Reduce extends Reducer<IntWritable,Text,NullWritable,Text>{ public void reduce(IntWritable key, Iterable<Text> v, Context context) throws IOException,InterruptedException{ Iterator values = v.iterator(); ArrayList al_left = new ArrayList(); ArrayList al_right = new ArrayList(); while(values.hasNext()){

String tmp = values.next().toString(); if(tmp.charAt(0) == 'L'){ al_left.add(tmp.substring(2)); }else{ al_right.add(tmp.substring(2)); } } NullWritable key_op = NullWritable.get(); for(int i=0;i<al_left.size();i++){ String[] left_buf = ((String)al_left.get(i)).split("\\|"); for(int j=0;j<al_right.size();j++){ String[] right_buf = ((String)al_right.get(j)).split("\\|"); if(Integer.parseInt(left_buf[1]) == Integer.parseInt(right_buf[1])){ context.write(key_op, new Text(Integer.parseInt(left_buf[0])+ "|" +Integer.parseInt(right_buf[0])+ "|" )); } } } } } public int run(String[] args) throws Exception{ Configuration conf = new Configuration(); Job job = new Job(conf,"TestQuery2"); job.setJarByClass(TestQuery2.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); FileInputFormat.addInputPath(job,new Path(args[0])); FileInputFormat.addInputPath(job,new Path(args[1])); FileOutputFormat.setOutputPath(job, new Path(args[2])); return (job.waitForCompletion(true) ? 0 : 1); } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new TestQuery2(), args); System.exit(res); } }

Question2: Another Query:

select l_returnflag, l_linestatus, sum(l_quantity) as sum_qty, sum(l_extendedprice) as sum_base_price, sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, avg(l_quantity) as avg_qty, avg(l_extendedprice) as avg_price, avg_disc, count(*) as count_order from avg(l_discount) as

lineitem where l_shipdate <= '1998-09-04 group by

l_returnflag, l_linestatus order by l_returnflag, l_linestatus;

Answer 2:

TestQuery1.java
public class TestQuery1 extends Configured implements Tool{ public static class Map extends Mapper<Object, Text,Text,Text>{

public void map(Object key, Text value, Context context) throws IOException,InterruptedException{ String line = value.toString(); String[] line_buf = new String[12]; int prev=0,i=0,n=0; for(i=0,n=0,prev=0;i<line.length();i++){ if (line.charAt(i) == '|'){ line_buf[n] = line.substring(prev,i); n = n+1; prev = i+1; } if(n == 12) break; } if(n<12) line_buf[n] = line.substring(prev,i); context.write(new Text(line_buf[0] +"|"+line_buf[1] +"|"),new Text(line_buf[2] +"|"+line_buf[3] +"|"+line_buf[4] +"|"+line_buf[5] +"|"+line_buf[6] +"|"+line_buf[7] +"|"+line_buf[8] +"|"+line_buf[9] +"|"+line_buf[10] +"|"+line_buf[11] +"|")); } }

public static class Reduce extends Reducer<Text,Text,NullWritable,Text>{ public void reduce(Text key, Iterable<Text> v, Context context) throws IOException,InterruptedException{ Iterator values = v.iterator(); NullWritable key_op = NullWritable.get(); while(values.hasNext()){ String tmp = values.next().toString(); context.write(key_op,new Text(tmp)); } } } public int run(String[] args) throws Exception{ Configuration conf = new Configuration(); conf.set("mapreduce.partition.keycomparator.options","-k1,1 -k2,2 "); conf.set("mapreduce.map.output.key.field.separator", "|"); Job job = new Job(conf, "TestQuery1"); job.setJarByClass(TestQuery1.class); job.setSortComparatorClass(KeyFieldBasedComparator.class); job.setPartitionerClass(KeyFieldBasedPartitioner.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return (job.waitForCompletion(true) ? 0 : 1); } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new TestQuery1(),args); System.exit(res); } }

TestQuery2.java

public class TestQuery2 extends Configured implements Tool{ public static class Map extends Mapper<Object, Text,Text,Text>{

Hashtable<String,Double>[] adv_gb_output=new Hashtable[8]; Hashtable<String,Integer> adv_count_output=new Hashtable<String,Integer>(); public void setup(Context context) throws IOException, InterruptedException { for(int i =0;i<8;i++){ adv_gb_output[i] = new Hashtable<String,Double>(); } } public void cleanup(Context context) throws IOException, InterruptedException { for(String tmp_key:adv_count_output.keySet()){ Double count = (double) adv_count_output.get(tmp_key); Double tmp_0 = adv_gb_output[0].get(tmp_key); Double tmp_1 = adv_gb_output[1].get(tmp_key); Double tmp_2 = adv_gb_output[2].get(tmp_key); Double tmp_3 = adv_gb_output[3].get(tmp_key); Double avg_4 = adv_gb_output[4].get(tmp_key); Double avg_5 = adv_gb_output[5].get(tmp_key); Double avg_6 = adv_gb_output[6].get(tmp_key); adv_gb_output[7].put(tmp_key.toString(),count); context.write(new Text(tmp_key.toString()),new Text(tmp_0 + "&"+"|"+tmp_1 + "&"+"|"+tmp_2 + "&"+"|"+tmp_3 + "&"+"|"+avg_4 + "&"+count+"|"+avg_5 + "&"+count+"|"+avg_6 + "&"+count+"|"+count + "&"+"|")); } } public void map(Object key, Text value, Context context) throws IOException,InterruptedException{ String line = value.toString(); String[] line_buf = new String[11]; int prev=0,i=0,n=0; for(i=0,n=0,prev=0;i<line.length();i++){ if (line.charAt(i) == '|'){ line_buf[n] = line.substring(prev,i); n = n+1; prev = i+1; } if(n == 11) break; } if(n<11) line_buf[n] = line.substring(prev,i); String hash_key = line_buf[8]+ "|" +line_buf[9]+ "|" ;

if(line_buf[10].compareTo("1998-09-04") <= 0){ if(adv_count_output.containsKey(hash_key)){ Integer count = adv_count_output.get(hash_key)+1; adv_count_output.put(hash_key,count); }else{ adv_count_output.put(hash_key,1); } if(adv_gb_output[0].containsKey(hash_key)){ Double sum_tmp = (double)Double.parseDouble(line_buf[4]); sum_tmp += adv_gb_output[0].get(hash_key); adv_gb_output[0].put(hash_key, sum_tmp); }else{ adv_gb_output[0].put(hash_key,(double)Double.parseDouble(line_buf[4 ])); } if(adv_gb_output[1].containsKey(hash_key)){ Double sum_tmp = (double)Double.parseDouble(line_buf[5]); sum_tmp += adv_gb_output[1].get(hash_key); adv_gb_output[1].put(hash_key, sum_tmp); }else{ adv_gb_output[1].put(hash_key,(double)Double.parseDouble(line_buf[5 ])); } if(adv_gb_output[2].containsKey(hash_key)){ Double sum_tmp = (double)((Double.parseDouble(line_buf[5]) * (1 Double.parseDouble(line_buf[6])))); sum_tmp += adv_gb_output[2].get(hash_key); adv_gb_output[2].put(hash_key, sum_tmp); }else{ adv_gb_output[2].put(hash_key,(double)((Double.parseDouble(line_buf [5]) * (1 - Double.parseDouble(line_buf[6]))))); } if(adv_gb_output[3].containsKey(hash_key)){ Double sum_tmp = (double)(((Double.parseDouble(line_buf[5]) * (1 Double.parseDouble(line_buf[6]))) * (1 + Double.parseDouble(line_buf[7])))); sum_tmp += adv_gb_output[3].get(hash_key); adv_gb_output[3].put(hash_key, sum_tmp); }else{ adv_gb_output[3].put(hash_key,(double)(((Double.parseDouble(line_bu f[5]) * (1 - Double.parseDouble(line_buf[6]))) * (1 + Double.parseDouble(line_buf[7]))))); } if(adv_gb_output[4].containsKey(hash_key)){ Double sum_tmp = (double)Double.parseDouble(line_buf[4]); sum_tmp += adv_gb_output[4].get(hash_key); adv_gb_output[4].put(hash_key, sum_tmp); }else{

adv_gb_output[4].put(hash_key,(double)Double.parseDouble(line_buf[4 ])); } if(adv_gb_output[5].containsKey(hash_key)){ Double sum_tmp = (double)Double.parseDouble(line_buf[5]); sum_tmp += adv_gb_output[5].get(hash_key); adv_gb_output[5].put(hash_key, sum_tmp); }else{ adv_gb_output[5].put(hash_key,(double)Double.parseDouble(line_buf[5 ])); } if(adv_gb_output[6].containsKey(hash_key)){ Double sum_tmp = (double)Double.parseDouble(line_buf[6]); sum_tmp += adv_gb_output[6].get(hash_key); adv_gb_output[6].put(hash_key, sum_tmp); }else{ adv_gb_output[6].put(hash_key,(double)Double.parseDouble(line_buf[6 ])); } } } } public static class Reduce extends Reducer<Text,Text,NullWritable,Text>{ public void reduce(Text key, Iterable<Text> v, Context context) throws IOException,InterruptedException{ Iterator values = v.iterator(); Double[] result = new Double[8]; ArrayList[] d_count_buf = new ArrayList[8]; String tmp = ""; for(int i=0;i<8;i++){ result[i] = 0.0; d_count_buf[i] = new ArrayList(); } int[] al_line = new int[8]; for(int i=0;i<8;i++){ al_line[7] = 0; } int tmp_count = 0; while(values.hasNext()){ String[] tmp_buf = values.next().toString().split("\\|"); tmp = key.toString(); String[] agg_tmp; agg_tmp = tmp_buf[0].split("&"); result[0] += Double.parseDouble(agg_tmp[0]); agg_tmp = tmp_buf[1].split("&"); result[1] += Double.parseDouble(agg_tmp[0]);

agg_tmp = tmp_buf[2].split("&"); result[2] += Double.parseDouble(agg_tmp[0]); agg_tmp = tmp_buf[3].split("&"); result[3] += Double.parseDouble(agg_tmp[0]); agg_tmp = tmp_buf[4].split("&"); result[4] += Double.parseDouble(agg_tmp[0]); al_line[4]+= Double.parseDouble(agg_tmp[1]); agg_tmp = tmp_buf[5].split("&"); result[5] += Double.parseDouble(agg_tmp[0]); al_line[5]+= Double.parseDouble(agg_tmp[1]); agg_tmp = tmp_buf[6].split("&"); result[6] += Double.parseDouble(agg_tmp[0]); al_line[6]+= Double.parseDouble(agg_tmp[1]); agg_tmp = tmp_buf[7].split("&"); al_line[7]+= Double.parseDouble(agg_tmp[0]); tmp_count++; } String[] line_buf = tmp.split("\\|"); result[4] = result[4] /al_line[4]; result[5] = result[5] /al_line[5]; result[6] = result[6] /al_line[6]; result[7] = (double)al_line[7]; NullWritable key_op = NullWritable.get(); context.write(key_op,new Text(line_buf[0] + "|"+line_buf[1] + "|"+line_buf[0] + "|"+line_buf[1] + "|"+(result[0]) + "|"+(result[1]) + "|"+(result[2]) + "|"+(result[3]) + "|"+(result[4]) + "|"+(result[5]) + "|"+(result[6]) + "|"+(result[7]) + "|")); } } public int run(String[] args) throws Exception{ Configuration conf = new Configuration(); Job job = new Job(conf,"TestQuery2"); job.setJarByClass(TestQuery2.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return (job.waitForCompletion(true) ? 0 : 1); } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new TestQuery2(), args); System.exit(res); } }

Question 3:

Query: Query: select d_year from ddate where d_year >=1992 and d_year <= 1997;

Answer:

TestQuery1.java
public class TestQuery1 extends Configured implements Tool{ public static class Map extends Mapper<Object, Text,NullWritable,IntWritable>{ public void map(Object key, Text value, Context context) throws IOException,InterruptedException{ String line = value.toString(); String[] line_buf = new String[1]; int prev=0,i=0,n=0; for(i=0,n=0,prev=0;i<line.length();i++){ if (line.charAt(i) == '|'){ line_buf[n] = line.substring(prev,i); n = n+1; prev = i+1; } if(n == 1) break; } if(n<1) line_buf[n] = line.substring(prev,i); if(Integer.parseInt(line_buf[0]) >= 1992 && Integer.parseInt(line_buf[0]) <= 1997){ NullWritable key_op = NullWritable.get(); context.write(key_op , new IntWritable(Integer.parseInt(line_buf[0]))); } } } public int run(String[] args) throws Exception{ Configuration conf = new Configuration(); Job job = new Job(conf,"TestQuery1"); job.setJarByClass(TestQuery1.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); FileInputFormat.addInputPath(job, new Path(args[0]));

FileOutputFormat.setOutputPath(job, new Path(args[1])); return (job.waitForCompletion(true) ? 0 : 1); } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new TestQuery1(), args); System.exit(res); } }

You might also like