欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

MapReduce编程模型的创建

程序员文章站 2024-03-18 11:30:16
...

6、编写WordCountMapper类,完成对单词的切分处理,并以(k,v)的形式输出到Reduce阶段
  让【WordCountMapper】继承类Mapper同时指定需要的参数类型,根据业务逻辑修改map类的内容如下:

package com.simple;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class WordCountMapper extends Mapper<LongWritable, Text, NullWritable, LongWritable> {
    @Override
    protected void map(LongWritable key, Text value,
            Mapper<LongWritable, Text, NullWritable, LongWritable>.Context context)
            throws IOException, InterruptedException {
        //获取value的字符串
        String valueString = value.toString();
        //对字符串进行分割
        String wArr[] = valueString.split(' ');
        //map输出(k,v)
        context.write(NullWritable.get(), new LongWritable(wArr.length));
    }
}

7、完成WordCountReducer类的编写,主要是对单词个数的统计。

package com.simple;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
public class WordCountReducer extends Reducer<NullWritable, LongWritable, NullWritable, LongWritable> {
    @Override
    protected void reduce(NullWritable key, Iterable<LongWritable> v2s,
            Reducer<NullWritable, LongWritable, NullWritable, LongWritable>.Context context)
            throws IOException, InterruptedException {
        Iterator<LongWritable> it = v2s.iterator();
        // 定义一个sum用来记录总行数
        long sum = 0;
        //通过迭代器处理,进行总行数的统计
        while (it.hasNext()) {
            sum += it.next().get();
        }
        context.write(NullWritable.get(), new LongWritable(sum));
    }
}

8、创建TestMapReducer类,主要是对Map、Reduce的编写的运行调用

package com.simple;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class TestMapReducer {
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        conf.set('fs.defaultFS', 'hdfs://localhost:9000');
        // 获取一个Job实例
        Job job = Job.getInstance(conf);
        //设置主类
        job.setJarByClass(TestMapReducer.class);
        //设置Mapper类和Reducer类
        job.setMapperClass(WordCountMapper.class);
        job.setReducerClass(WordCountReducer.class);
        //设置map阶段和reduce阶段的输出类型
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(LongWritable.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(LongWritable.class);
        // 设置输入输出路径
        FileInputFormat.setInputPaths(job, new Path('/data/dataset/EnglishWordsCount.txt'));
        FileOutputFormat.setOutputPath(job, new Path('/data/dataset/output/'));
        //提交任务
        job.waitForCompletion(true);
    }
}
相关标签: Hadoop实验代码