当未配置时就会使用自动的配置。java
Mapper:封装了应用程序Mapper阶段的数据处理逻辑,下面是hadoop中以及现实了的mapper子类正则表达式
ChainMapper:方便用户编写链式Map任务, 即Map阶段包含多个Mapper,便可以别写多个自定义map去参与运算。
InverseMapper:一个能交换key和value的Mapper
RegexMapper:检查输入是否匹配某正则表达式, 输出匹配字符串和计数器(用的不多)
TockenCounterMapper:将输入分解为独立的单词, 输出个单词和计数器(以空格分割单词,value值为1)apache
代码实例:app
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.chain.ChainMapper; import org.apache.hadoop.mapreduce.lib.chain.ChainReducer; import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat; import org.apache.hadoop.mapreduce.lib.map.InverseMapper; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class PatentReference_0010 extends Configured implements Tool{ static class PatentReferenceMapper extends Mapper<Text,Text,Text,IntWritable>{ private IntWritable one=new IntWritable(1); @Override protected void map(Text key,Text value,Context context) throws IOException, InterruptedException{ context.write(key,one); } } @Override public int run(String[] args) throws Exception{ Configuration conf=getConf(); Path input=new Path(conf.get("input")); Path output=new Path(conf.get("output")); //输入格式为KeyValueTextInputFormat时能够指定key和value的分隔符是什么,它的默认分隔符是"\t"也就是tab键 conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator",","); Job job=Job.getInstance(conf,this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); ChainMapper.addMapper(job,InverseMapper.class, // 输入的键值类型由InputFormat决定 Text.class,Text.class, // 输出的键值类型与输入的键值类型相反 Text.class,Text.class,conf); ChainMapper.addMapper(job,PatentReferenceMapper.class, // 输入的键值类型由前一个Mapper输出的键值类型决定 Text.class,Text.class, Text.class,IntWritable.class,conf); ChainReducer.setReducer(job,IntSumReducer.class, Text.class,IntWritable.class, Text.class,IntWritable.class,conf); ChainReducer.addMapper(job,InverseMapper.class, Text.class,IntWritable.class, IntWritable.class,Text.class,conf); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); KeyValueTextInputFormat.addInputPath(job,input); TextOutputFormat.setOutputPath(job,output); return job.waitForCompletion(true)?0:1; } public static void main(String[] args) throws Exception{ System.exit(ToolRunner.run(new P00010_PatentReference_0010(),args)); } }