欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

mapreduce求共同好友案例示例

程序员文章站 2022-05-01 13:02:53
...

案例需求:
求出两两之间的共同好友

一:案例分析图解
mapreduce求共同好友案例示例
------------------------------------------------------------第一阶段----------------------------------------------------------------------------

1.导入pom.xml依赖,特别留意打包插件,他可以把依赖的jar包全部打包,保证可以在hadoop集群上运行
 <dependencies>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.7.5</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.7.5</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.7.5</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>2.7.5</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>RELEASE</version>
        </dependency>
    </dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.1</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                    <encoding>UTF-8</encoding>
                    <!--    <verbal>true</verbal>-->
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>2.4.3</version>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <minimizeJar>true</minimizeJar>
                        </configuration>
                    </execution>
                </executions>
            </plugin>

        </plugins>
    </build>
2.map阶段:把k1 v1 --> k2  v2
public class MyMapper extends Mapper<LongWritable,Text,Text,Text> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //1.切割v1,获取v2
        String[] split01 = value.toString().split(":");
        String v2 = split01[0];
        //2.再切割
        String[] split02 = split01[1].split(",");
        Text text_k2 = new Text();
        Text text_v2 = new Text();
        //3.遍历数组获取k2,且把k2  v2写入context域中传递给shuffle阶段/reduce阶段
        for (String k2 : split02){
            text_k2.set(k2);
            text_v2.set(v2);
            context.write(text_k2,text_v2);
        }
    }
}
3.reduce阶段:把k2 v2 --> k3  v3
public class MyReducer extends Reducer<Text,Text,Text,Text> {
    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        //把k2  v2 转化成 k3  v3
        //1.遍历集合,把v2转成k2
        StringBuffer buffer = new StringBuffer();
        for (Text text : values){
            buffer.append(text).append("-");
        }
        context.write(new Text(buffer.toString()),key);
    }
}

4.主方法,执行程序入口类
public class JonMain extends Configured implements Tool {
    @Override
    public int run(String[] args) throws Exception {
        //获取job任务对象
        Job job = Job.getInstance(super.getConf(),"friend");

        //第一步:设置读取方式和读取路径
        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job,new Path("hdfs://node01:8020/input"));

        //第二步:设置map阶段
        job.setMapperClass(MyMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        //第三步:分区
        //第四步:排序
        //第五步:规约
        //第六步:分组

        //第七步:设置reduce阶段
        job.setReducerClass(MyReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        //第八步:设置输出方式和输出路径
        job.setOutputFormatClass(TextOutputFormat.class);
        Path path = new Path("hdfs://node01:8020/output01");
        TextOutputFormat.setOutputPath(job,path);

        //获取文件系统对象
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node01"),super.getConf());
        //判断输出目录是否存在,存在就删除,避免多次执行报错
        if (fileSystem.exists(path)){
            fileSystem.delete(path);
        }

        //等待任务结束
        boolean b = job.waitForCompletion(true);
        return b ? 0 : 1 ;
    }

    //主方法入口
    public static void main(String[] args) throws Exception {
        Configuration configuration = new Configuration();
        int run = ToolRunner.run(configuration, new JonMain(), args);
        System.exit(run);
    }
}

--------------------------------------------------------------第二阶段----------------------------------------------------------------------------

1.map阶段
public class MyMapper extends Mapper<LongWritable,Text,Text,Text> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //1.切割v1,获取v2
        String[] split01 = value.toString().split("\t");
        String v2 = split01[1];
        //2.再切割
        String[] split02 = split01[0].split("-");
        Text text_k2 = new Text();
        Text text_v2 = new Text();
        text_v2.set(v2);
        //3.遍历数组获取k2
       for (int i =0;i < split02.length-1;i++){
           //4.排序
           Arrays.sort(split02);
           for (int j=i+1;j < split02.length ; j++){
               String k2 = split02[i]+"-"+split02[j];
               text_k2.set(k2);
               context.write(text_k2,text_v2);
           }
       }
    }
}
2.reduce阶段
public class MyReducer extends Reducer<Text,Text,Text,Text> {
    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        //把k2  v2 转化成 k3  v3
        //1.遍历集合,拼接v2
        StringBuffer buffer = new StringBuffer();
        for (Text text : values){
            buffer.append(text).append("\t");
        }
        context.write(key,new Text(buffer.toString()));
    }
}
3.主方法
public class JonMain extends Configured implements Tool {
    @Override
    public int run(String[] args) throws Exception {
        //获取job任务对象
        Job job = Job.getInstance(super.getConf(),"friend");

        //第一步:设置读取方式和读取路径
        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job,new Path("hdfs://node01:8020/output01/part-r-00000"));

        //第二步:设置map阶段
        job.setMapperClass(MyMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        //第三步:分区
        //第四步:排序
        //第五步:规约
        //第六步:分组

        //第七步:设置reduce阶段
        job.setReducerClass(MyReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        //第八步:设置输出方式和输出路径
        job.setOutputFormatClass(TextOutputFormat.class);
        Path path = new Path("hdfs://node01:8020/output02");
        TextOutputFormat.setOutputPath(job,path);

        //获取文件系统对象
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node01"),super.getConf());
        //判断输出目录是否存在,存在就删除,避免多次执行报错
        if (fileSystem.exists(path)){
            fileSystem.delete(path);
        }

        //等待任务结束
        boolean b = job.waitForCompletion(true);
        return b ? 0 : 1 ;
    }

    //主方法入口
    public static void main(String[] args) throws Exception {
        Configuration configuration = new Configuration();
        int run = ToolRunner.run(configuration, new JonMain(), args);
        System.exit(run);
    }
}


---------------------------------------------------------------结果对比示例---------------------------------------------------------------------------

1.第一次运行结果
I-K-B-G-F-H-O-C-D- A
A-F-C-J-E- B
E-A-H-B-F-G-K- C
E-C-L-A-F-H-G- D
K- D
F-M-L-H-G-D-C-B-A- E
D-A-M- F
G-L- F
M- G
O- H
O- I
C- I
O- J
B- K
D-E- L
E- M
F- M
F- O
J-I-H-A- O

2.第二次运行结果
A-B E C
A-C B E D
A-D F E
A-E C B D
A-F E B D C
A-G E D C
A-H C O D E
A-I O
A-J B O
A-K C
A-L E D
A-M F E
B-C A E
B-D E A
B-E C
B-F C E A
B-G C A E
B-H E C A
B-I A
B-K C A
B-L E
B-M E
B-O A
C-D E A
C-E B D
C-F E D B A
C-G D E A
C-H E D A
C-I A
C-J B
C-K A
C-L D E
C-M E
C-O A
D-E L
D-F E A
D-G A E
D-H A E
D-I A
D-K A
D-L E
D-M E F
D-O A
E-F D C B
E-G D C
E-H D C
E-J B
E-K C
E-L D
F-G D C A E
F-H A C D E
F-I A
F-J B
F-K C A
F-L D E
F-M E
F-O A
G-H A D E C
G-I A
G-K A C
G-L D E F
G-M E
G-O A
H-I A O
H-J O
H-K A C
H-L E D
H-M E
H-O A
I-J O
I-K A
I-O A
K-O A
L-M E