Hadoop社交好友推荐(二度好友)
程序员文章站
2024-03-23 08:05:22
...
Hadoop社交好友推荐(二度好友)
情景简述
如果A和B具有好友关系,B和C具有好友关系,而A和C却不是好友关系,那么我们称A和C这样的关系为:二度好友关系。
在生活中,二度好友推荐的运用非常广泛,一般在主流的社交平台上关于好友推荐上就有这方面的应用,当然,在当下海量的数据中,利用MapReduce编程模型来实现不失为一种较好的方式,具体的过程如下图。
把上面的图抽象一下可以表示为下图:
也可以用文字抽象表示为:
a b c
b a d
c a e f
d b h
e c i j
f c k
或者表示为:
-
a
- b
- c
-
b
- a
- d
-
c
- a
- e
- f
-
d
- b
- h
-
f
-
c
-
k
-
解决方案
-
基于笛卡尔积思想
好友关系文字表示可以使用笛卡尔积思想去分析。难度比树结构要简单得多,只要基于编程去实现即可。下面将贴出分析思路图:
Mapper、Reducer代码示例:
public static class Map extends Mapper<LongWritable, Text,Text,Text>{ private Text out_key = new Text(); private Text out_value = new Text(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] line = value.toString().split(" "); out_key.set(line[0]); for (int i = 1; i < line.length; i++) { out_value.set(line[i]); context.write(out_key,out_value); context.write(out_value,out_key); } count += 1; } } public static class Reducer extends org.apache.hadoop.mapreduce.Reducer<Text,Text,Text,Text>{ private static int reducer_count = 0; private List<String> keyList = new ArrayList<String>(); private List<List<String>> valuesList = new ArrayList<List<String>>(); private Text out_key = new Text(); private Text out_value = new Text(); @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { //使用 keyList 和 valuesList 方案 keyList.add(key.toString()); List<String> passList = new ArrayList<String>(); for (Text value : values) { passList.add(value.toString()); } passList = new ArrayList<String>(new HashSet<String>(passList)); valuesList.add(passList); if (++reducer_count == count){ StringBuilder pass_value; for (int i = 0; i < keyList.size(); i++) { String flag = keyList.get(i); pass_value = new StringBuilder(); for (int j = i+1; j < valuesList.size(); j++) { if (valuesList.get(j).get(0).equals(flag)){ for (int i1 = 1; i1 < valuesList.get(j).size(); i1++) { pass_value.append(String.format("%s ",valuesList.get(j).get(i1))); } } } if (pass_value.length() > 1){ out_key.set(flag); out_value.set(pass_value.toString().trim()); context.write(out_key,out_value); } } } } }
-
树
关系图适合转化为树的结构。总体来说并不难,理好思路使用递归做好插入节点与遍历需要结果即可。
但是使用数据结构去解决这个问题不太妥,大数据起来内存——危。得在每次调用Reducer的时候进行合并(插入)树,还得再最后一次调用Reducer的时候才write。推荐使用笛卡尔积的方式。
下面为树的数据结构的类代码:
import java.util.ArrayList; import java.util.List; public class TreeData { private String data = null; private List<TreeData> cellTree = new ArrayList<TreeData>(); private int layer = 0; private static List<String> queryResult = new ArrayList<String>(); private static List<String> relationMessage = new ArrayList<String>(); public TreeData(){} public TreeData(int num){layer = num;} /** * 以根节点初始化 * @param node 根节点 */ public TreeData(String node){ data = node; } public TreeData(String node ,String data){ this(node); inster(data); } public TreeData(String node ,List<String> data){ this(node); inster(data); } /** * 向根节点插入子节点 * @param data 插入的子节点 */ public void inster(String data){ cellTree.add(new TreeData(data)); } public void inster(List<String> data){ for (String s : data) { inster(s); } } /** * 获取该节点下的所有子节点 * @return 子节点的data */ public List<String> getChildNode(){ List<String> pass = new ArrayList<String>(); for (TreeData treeData : cellTree) { pass.add(treeData.toString()); } return pass; } public List<String> getChildNode(String ban){ List<String> pass = new ArrayList<String>(); for (TreeData treeData : cellTree) { if (!treeData.toString().equals(ban)) pass.add(treeData.toString()); } return pass; } @Deprecated /** * 把b树插入a树 并不能解决插入节点时节点的子节点有插入点值重复问题 * @param a 一个树 * @param b 一个树 * @return 返回合并好的树 */ public static TreeData addTree(TreeData a ,TreeData b){ if (a == null){ return b; }else if (b == null){ return a; } insert(a,b.toString(),b); return a; } // 过滤掉插入节点时节点的子节点有插入点值重复 public static TreeData addTreeNoRepetition(TreeData a ,TreeData b){ if (a == null){ return b; }else if (b == null){ return a; } insert(a,b.toString(),b,a.toString()); return a; } @Deprecated /** * 不能解决插入节点时节点的子节点有插入点值重复问题 * 插入新结点 输入父结点id进行定位 ,将新结点 插入到父结点的 sonList 中 * @param changeNode 传入根结点,传入前需判断:若根结点不存在,待插入结点成为根结点,不必进入此方法 * @param fatherId 新结点的 父结点id * @param newNode 新结点 */ public static void insert(TreeData changeNode, String fatherId, TreeData newNode) { if (fatherId.equals(changeNode.toString())) { changeNode.inster(newNode.getChildNode()); return; } List<TreeData> sonList = changeNode.getCellTree(); if (sonList != null && !sonList.isEmpty()) { //若该结点 的子结点集合不为空 for (TreeData aSonList : sonList) { insert(aSonList, fatherId, newNode); } } } /** * 插入树(节点)的详细方法 * @param changeNode 要被插入的树 * @param fatherId 插入树的根节点 * @param newNode 要插入的树 * @param ban 禁止的节点 */ public static void insert(TreeData changeNode, String fatherId, TreeData newNode ,String ban) { if (fatherId.equals(changeNode.toString())) { changeNode.inster(newNode.getChildNode(ban)); return; } List<TreeData> sonList = changeNode.getCellTree(); if (sonList != null && !sonList.isEmpty()) { //若该结点 的子结点集合不为空 for (TreeData aSonList : sonList) { insert(aSonList, fatherId, newNode ,changeNode.toString()); } } } @Deprecated /** * 不能解决 该节点的子节点拥有父节点的值问题 * 输出该树所有关系节点到变量里 * @param treeData 树 */ public static void outMessage(TreeData treeData){ StringBuilder pass = new StringBuilder(treeData.toString()); List<TreeData> passList = new ArrayList<TreeData>(); for (int i = 0; i < treeData.getCellTree().size(); i++) { TreeData data = treeData.getCellTree().get(i); if (data.getCellTree() != null && !data.getCellTree().isEmpty()){ passList.add(data); for (TreeData treeData1 : data.getCellTree()) { pass.append(String.format(" %s", treeData1.toString())); } } if (i == treeData.getCellTree().size()-1 && pass.length() > 1){ TreeData.getRelationMessage().add(pass.toString()); } } for (TreeData data : passList) { outMessage(data); } } /** * 遍历节点将符合二度好友关系的节点保存至relationMessage中 * @param treeData 要输出的树(节点) * @param faterNode 该树(节点)的父节点 若为根节点则父节点与根节点一致 */ public static void outMessageNoRepetition(TreeData treeData ,TreeData faterNode){ StringBuilder pass = new StringBuilder(treeData.toString()); List<TreeData> passList = new ArrayList<TreeData>(); for (int i = 0; i < treeData.getCellTree().size(); i++) { TreeData data = treeData.getCellTree().get(i); if (data.getCellTree() != null && !data.getCellTree().isEmpty()){ passList.add(data); for (TreeData treeData1 : data.getCellTree()) { if (!treeData1.getCellTree().toString().contains(treeData.toString()) && !treeData1.toString().contains(faterNode.toString())){ pass.append(String.format(" %s", treeData1.toString())); } } } if (i == treeData.getCellTree().size()-1 && pass.length() > 1){ TreeData.getRelationMessage().add(pass.toString()); } } for (TreeData data : passList) { outMessageNoRepetition(data ,treeData); } } public static void queryAll(TreeData changeNode, int depth){ List<TreeData> sonList = changeNode.getCellTree(); String space = generateSpace(depth); //根据深度depth,返回(depth)长度的空格 if (sonList==null||sonList.isEmpty()){ return; } for (int i = 0; i <sonList.size() ; i++) { //打印空格 和结点id,name System.out.println(space+"---" +sonList.get(i).getData()); queryResult.add(String.format("%s%s",space,sonList.get(i).getData())); if(i==0){ depth = depth+1; //结点深度+1,每个for循环仅执行一次。作为子结点sonList.get(i)的深度 } queryAll(sonList.get(i),depth); } } //打印空格 public static String generateSpace(int count) { //count = count*3; char[] chs = new char[count]; for(int i = 0; i < count; i++) { chs[i] = ' '; } return new String(chs); } @Override public boolean equals(Object obj) { if (this == obj){ return true; } if (obj instanceof TreeData){ return this.toString().equals(obj.toString()); } return false; } @Override public String toString() { return data; } public String getData() { return data; } public void setData(String data) { this.data = data; } public List<TreeData> getCellTree() { return cellTree; } public void setCellTree(List<TreeData> cellTree) { this.cellTree = cellTree; } public int getLayer() { return layer; } public void setLayer(int layer) { this.layer = layer; } public static List<String> getQueryResult() { return queryResult; } public static void setQueryResult(List<String> queryResult) { TreeData.queryResult = queryResult; } public static List<String> getRelationMessage() { return relationMessage; } public static void setRelationMessage(List<String> relationMessage) { TreeData.relationMessage = relationMessage; } public static void clear(){ TreeData.getQueryResult().clear(); TreeData.getRelationMessage().clear(); } }
代码示例:
public class Main { private static int count = 0; private static int count_reducer = 0; public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Clear.delete(args[1]); Job job = Job.getInstance(new Configuration(),"Main"); job.setJarByClass(Main.class); job.setMapperClass(FriendsIntroductionMap.class); job.setReducerClass(FriendsIntroductionReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.setInputPaths(job,new Path(args[0])); FileOutputFormat.setOutputPath(job,new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } public static class FriendsIntroductionMap extends Mapper<LongWritable,Text,Text, Text>{ private Text outKey = new Text(); private Text outValue = new Text(); private TreeData treeData = new TreeData(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] line = value.toString().split(" "); StringBuilder pass = new StringBuilder(); outKey.set(line[0]); for (int i = 1; i < line.length; i++) { pass.append(String.format("%s ",line[i])); } outValue.set(pass.toString().trim()); context.write(outKey, outValue); count += 1; } } public static class FriendsIntroductionReducer extends Reducer<Text, Text, Text, NullWritable>{ private static TreeData treeData = null; private Text outKey = new Text(); @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { TreeData tree = new TreeData(key.toString()); for (Text value : values) { for (String s : value.toString().split(" ")) { tree.inster(s); } } treeData = TreeData.addTreeNoRepetition(treeData,tree); //这里可以改善 在map写一个key为count value为1 在这里接收整合判断 if (++count_reducer == count) { TreeData.outMessageNoRepetition(treeData ,treeData); for (String s : TreeData.getRelationMessage()) { outKey.set(s); context.write(outKey, NullWritable.get()); } } } } }
-
图
其实图才是更适合描述好友关系的,可以使用图的数据结构去解决问题。
图也是数据结构,在大数据使用数据结构是不妥的。主要是图的插入比树要麻烦,我这里没有去实现它
结果
输入图:
输出图:
下一篇: 入门unity开发笔记