wordcount代码

时间 2020-02-18

原文原文链接

1.写出map类数组

public class WCMapper extends Mapper<LongWritable,Text,Text,LongWritable>{缓存

   @Override
   protected void map(LongWritable key,Text value,Context context)throws IOException,InterruptedException{

       //value转换成string类型返回字符串
       String line = value.toSting();
       //使用空格切分单词返回字符数组
       String[] words = StringUtils.split(line," ");

       //遍历这个数组，按照特定格式输出key-value形式 key:单词 value：1
       for(String word : words){
           context.write(new Text(word),new LongWritable(1));
       }
       LongWritable,Text,Text,LongWritable

   }
       app

}框架

2.写出reduce类ide

public class WCReducer extends Reducer<Text,LongWritable,Text,LongWritable>{

   //框架在map处理完以后，将全部kv对缓存起来，进行分组，而后传递给<key,values{}>,调用一次reduce方法
   //<hello,{1,1,1,1,1,1.......}>
   @Override
   protected void reduce(Text key,Iterable<LongWriteable> values,Context context)throws IOException,InterruptedException{
       long count = 0;
       //遍历values的list，进行累加求和
       for(LongWriteable value:values){
           count += value.get();
       }
       //输出这个单词的统计结果
       context.write(key,new LongWritable(count));
   }

}oop

3.job提交类.net

   //用来描述一个特定的做业，好比说
   //该job使用哪一个类做为逻辑处理中的map，哪一个做为reduce
   //指定需处理的数据的路径
   //指定输出结果路径
public class WCRunner{
   public static void main(String[] args){

       Context conf = new Context();

       Job job = Job.getInstance(conf);

       //设置整个job所用的那些类在哪一个jar包
       job.setJarByClass(WCRunner);

       //使用map、reduce的类
       job.setMapperClass(WCMapper.class);
       job.setReduceClass(WCReducer.class);

       //指定reduce的输出的kv类型
       job.setOutputKeyClass(Text.class);
       job.setOutputValueClass(LongWritable.class);

       //指定map的输出的kv类型
       job.setMapOutputKeyClass(Text.class);
       job.setMapOutputValueClass(LongWritable.class);

       //指定数据源路径
       FileInputFormat.setInputPaths(job,new Path("/wc/srcdata"));
       //指定结果输出路径
       FileOutputFormat.setOutputPath(job,new Path("/wc/output/"));

       //将job提交给集群
       job.waitForCompletion(true);
   }

}orm

4打成jar包hadoop

5.hadoop上运行字符串