Hadoop实例之利用MapReduce实现Wordcount单词统计 (附源代码)

hdfsMapReduceInputFormatkeyvaluemap<word,1>reduceMapperReducer

Mapper

mapObject key,Text valuekeyvalueContext contextkeyvaluecontext.write(word,one)contextmapmapHadoopvalueStringTokenizer()keyvalue为1<key,value>

Reducer

map<key,value>shufflekeyvalue<key,values>reducereduce<key,values>keykey,forvalueskeyvalue<key,value>

import  java.io.IOException;   import  java.util.StringTokenizer;   import  org.apache.hadoop.fs.Path;   import  org.apache.hadoop.io.IntWritable;   import  org.apache.hadoop.io.Text;   import  org.apache.hadoop.mapreduce.Job;   import  org.apache.hadoop.mapreduce.Mapper;   import  org.apache.hadoop.mapreduce.Reducer;   import  org.apache.hadoop.mapreduce.lib.input.FileInputFormat;   import  org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;   public  class  WordCount  {       public  static  void  main(String[]  args)  throws  IOException,  ClassNotFoundException,  InterruptedException  {           Job  job  =  Job.getInstance();           job.setJobName("WordCount");           job.setJarByClass(WordCount.class);           job.setMapperClass(doMapper.class);           job.setReducerClass(doReducer.class);           job.setOutputKeyClass(Text.class);           job.setOutputValueClass(IntWritable.class);           Path  in  =  new  Path("hdfs://192.168.68.130:9000/user/hadoop/wordcount.txt"); //需要统计的文本所在位置          Path  out  = new Path("hdfs://192.168.68.130:9000/user/hadoop/output3");  //注意output3不能存在         FileInputFormat.addInputPath(job,  in);           FileOutputFormat.setOutputPath(job,  out);           System.exit(job.waitForCompletion(true) ? 0  :  1);       }       public  static  class  doMapper  extends  Mapper<Object,  Text,  Text,  IntWritable>{           public  static  final  IntWritable  one  =  new  IntWritable(1);           public  static  Text  word  =  new  Text();           @Override           protected  void  map(Object  key,  Text  value,  Context  context)                       throws  IOException,  InterruptedException  {               StringTokenizer  tokenizer  =  new  StringTokenizer(value.toString(),  " ");               while(tokenizer.hasMoreTokens()) {                   word.set(tokenizer.nextToken());                   context.write(word,  one);               }                  }       }       public  static  class  doReducer  extends  Reducer<Text,  IntWritable,  Text,  IntWritable>{           private  IntWritable  result  =  new  IntWritable();           @Override           protected  void  reduce(Text  key,  Iterable<IntWritable>  values,  Context  context)           throws  IOException,  InterruptedException  {           int  sum  =  0;           for  (IntWritable  value  :  values)  {           sum  +=  value.get();           }           result.set(sum);           System.out.println(sum);         context.write(key,  result);           }       }   }

来源：博客园

作者：sakura**

链接：https://www.cnblogs.com/sakura--/p/11448874.html

标签

MapReduce

Hadoop

mapreduce实例