1.写出map类数组
public class WCMapper extends Mapper<LongWritable,Text,Text,LongWritable>{缓存
@Override
protected void map(LongWritable key,Text value,Context context)throws IOException,InterruptedException{
//value转换成string类型 返回字符串
String line = value.toSting();
//使用空格切分单词 返回字符数组
String[] words = StringUtils.split(line," ");
//遍历这个数组,按照特定格式输出key-value形式 key:单词 value:1
for(String word : words){
context.write(new Text(word),new LongWritable(1));
}
LongWritable,Text,Text,LongWritable
}
app
}框架
2.写出reduce类ide
public class WCReducer extends Reducer<Text,LongWritable,Text,LongWritable>{
//框架在map处理完以后,将全部kv对缓存起来,进行分组,而后传递给<key,values{}>,调用一次reduce方法
//<hello,{1,1,1,1,1,1.......}>
@Override
protected void reduce(Text key,Iterable<LongWriteable> values,Context context)throws IOException,InterruptedException{
long count = 0;
//遍历values的list,进行累加求和
for(LongWriteable value:values){
count += value.get();
}
//输出这个单词的统计结果
context.write(key,new LongWritable(count));
}
}oop
3.job提交类.net
//用来描述一个特定的做业,好比说
//该job使用哪一个类做为逻辑处理中的map,哪一个做为reduce
//指定需处理的数据的路径
//指定输出结果路径
public class WCRunner{
public static void main(String[] args){
Context conf = new Context();
Job job = Job.getInstance(conf);
//设置整个job所用的那些类在哪一个jar包
job.setJarByClass(WCRunner);
//使用map、reduce的类
job.setMapperClass(WCMapper.class);
job.setReduceClass(WCReducer.class);
//指定reduce的输出的kv类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
//指定map的输出的kv类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
//指定数据源路径
FileInputFormat.setInputPaths(job,new Path("/wc/srcdata"));
//指定结果输出路径
FileOutputFormat.setOutputPath(job,new Path("/wc/output/"));
//将job提交给集群
job.waitForCompletion(true);
}
}orm
4打成jar包hadoop
5.hadoop上运行字符串