开发工具:eclipse
目标:对下面文档phone_numbers进行倒排索引:
13599999999 1008613899999999 12013944444444 1380013800013722222222 1380013800018800000000 12013722222222 1008618944444444 10086
代码:
1 import java.io.IOException; 2 import org.apache.hadoop.conf.Configured; 3 import org.apache.hadoop.conf.Configuration; 4 import org.apache.hadoop.fs.Path; 5 import org.apache.hadoop.util.Tool; 6 import org.apache.hadoop.util.ToolRunner; 7 import org.apache.hadoop.io.*; 8 import org.apache.hadoop.maPReduce.*; 9 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;10 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;11 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;12 13 public class Test_1 extends Configured implements Tool 14 {15 enum Counter16 {17 LINESKIP, // error lines18 }19 20 public static class Map extends Mapper<LongWritable, Text, Text, Text>21 {22 public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException23 {24 String line = value.toString(); // read original data25 26 try27 {28 // process data29 String[] lineSplit = line.split(" ");30 String anum = lineSplit[0];31 String bnum = lineSplit[1];32 33 context.write(new Text(bnum), new Text(anum)); // map output34 }35 catch(java.lang.ArrayIndexOutOfBoundsException e)36 {37 context.getCounter(Counter.LINESKIP).increment(1);38 return;39 }40 41 }42 }43 public static class Reduce extends Reducer<Text, Text, Text, Text>44 {45 public void reduce(Text key, Iterable<Text>values, Context context)throws IOException, InterruptedException46 {47 String valueString;48 String out = "";49 50 for (Text value : values)51 {52 valueString = value.toString();53 out += valueString + "|";54 }55 56 context.write(key, new Text(out)); // reduce output57 }58 }59 public int run(String[] args)throws Exception60 {61 Configuration conf = getConf();62 63 Job job = new Job(conf, "Test_1"); // task name64 job.setJarByClass(Test_1.class); // specified task65 66 FileInputFormat.addInputPath(job, new Path(args[0])); // input path67 FileOutputFormat.setOutputPath(job, new Path(args[1])); // output path68 69 job.setMapperClass(Map.class);70 job.setReducerClass(Reduce.class);71 job.setOutputFormatClass(TextOutputFormat.class);72 job.setOutputKeyClass(Text.class);73 job.setOutputValueClass(Text.class);74 75 job.waitForCompletion(true);76 77 return job.isSuccessful() ? 0 : 1;78 }79 80 public static void main(String[] args)throws Exception81 {82 int res = ToolRunner.run(new Configuration(), new Test_1(), args);83 System.exit(res);84 }85 }
运行结果:
新闻热点
疑难解答