欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

使用apache common math 中的聚类方法DBSCAN与Kmeans

程序员文章站 2022-07-03 11:47:22
...

使用apache common math 中的聚类方法

1)DBSCAN的使用

public class DBSCAN {

    /**
     * �����inputPathΪjaccardCoding.txt·��
     */
    public static final String inputPath = "D:\\jaccardCoding.txt";
    static List<Location> locations = new ArrayList<>();

    public static void main(String[] args) throws IOException {
        // TODO Auto-generated method stub
        DBSCAN dbscan = new DBSCAN();
        Map<NodePair, Double> nodesPairMap = dbscan.getCodingFileMap(inputPath);
        dbscan.getDBSCANResult(locations, nodesPairMap,0.5,10);
    }




    /**
     * @author YYH
     * @param locations �ڵ���
     * @param nodesPairMap Ȩ��ӳ��
     * @param eps   the distance that defines the ��-neighborhood of a point 
     * @param minPts the minimum number of density-connected points required to form a cluster 
     */
    public void getDBSCANResult(
            List<Location> locations,
            Map<NodePair, Double> nodesPairMap,
            double eps,
            int minPts){
        List<LocationWrapper> clusterInput = new ArrayList<LocationWrapper>(locations.size());
        for (Location location : locations){
            clusterInput.add(new LocationWrapper(location));
        }
    //   initialize a new clustering algorithm. 
    //       we use KMeans++ with 10 clusters and 10000 iterations maximum.
    //       we did not specify a distance measure; the default (euclidean distance) is used.
        JaccardDistance jaccardDistance = new JaccardDistance(nodesPairMap);

        DBSCANClusterer<LocationWrapper> clusterer = new DBSCANClusterer<LocationWrapper>(eps, minPts,jaccardDistance);
        List<Cluster<LocationWrapper>> clusterResults = clusterer.cluster(clusterInput);

        // output the clusters
        for (int i=0; i<clusterResults.size(); i++){
            System.out.println("Cluster " + i);
            for (LocationWrapper locationWrapper : clusterResults.get(i).getPoints()){
                System.out.println(locationWrapper.getLocation());
            }
            System.out.println();
        }

    }


    /**
     * ��ȡjaccardCoding.txt�ļ��������locations��Map(�������jaccardҪ��)
     * @param inputPath
     * @return
     * @throws IOException
     */
    public Map<NodePair, Double> getCodingFileMap(String inputPath) throws IOException{
        BufferedReader bReader = FileUtil.getReader(inputPath);
        Map<NodePair, Double> nodesPairMap = new HashMap<>();
        Set<Location> locationSet = new HashSet<>();
        String txtLine = "";
        while ((txtLine = bReader.readLine())!=null){
            String[] values = txtLine.split("\t");
            NodePair nodePair = new NodePair();
            nodePair.setNode1(values[0]);
            nodePair.setNode2(values[1]);
            nodesPairMap.put(nodePair, Double.valueOf(values[2]));
            locationSet.add( new Location(Double.valueOf(values[0]))); //����ֻ�ǰѽڵ㻻��String2double���͵�
            locationSet.add( new Location(Double.valueOf(values[1]))); //����ֻ�ǰѽڵ㻻��String2double���͵�

        }
        locations.addAll(locationSet);
        if(bReader != null){
            bReader.close();
        }
        return nodesPairMap;
    }

    /**
     * @author YYH
     * �ı���DistanceMeasure,�����������Լ��Ĺ��캯�������һ����Լ����ڲ��࣬����ô������ѽ
     * ��jaccardϵ���ļ���ת��һ�£�����jaccard����
     *
     */
    public static class JaccardDistance implements DistanceMeasure{
        private static final long serialVersionUID = 1L;
        public static final String outPath = "D:\\Test.txt";
        public Map<NodePair, Double> nodesPairMap;
        public JaccardDistance(Map<NodePair, Double> nodesPairMap){
            this.nodesPairMap = nodesPairMap;
        }
        public JaccardDistance(){
        }

        /**
         * ���������һЩ��Ϣ���ò�����ʱ����Ե���
         */
        @Override
        public double compute(double[] a, double[] b) throws DimensionMismatchException {
            BufferedWriter bwriter = FileUtil.getWriter(outPath);
            double value = 0.;
            String node1 = String.valueOf(a[0]);
            String node2 = String.valueOf(b[0]);
            node1 = node1.substring(0, node1.indexOf('.'));
            node2 = node2.substring(0, node2.indexOf('.'));
            NodePair nodePair = new NodePair(node1,node2);
            if(nodesPairMap.containsKey(nodePair)){
                value= nodesPairMap.get(nodePair);
            }

            System.out.println(node1+" : "+node2+" : "+value);
            try {
                bwriter.write(node1+" : "+node2+" : "+value);
                bwriter.newLine();
                bwriter.flush();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            if(bwriter != null){
                try {
                    bwriter.close();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
            return value;
        }

    }



    /**
     * @author YYH
     * Location �ڵ�ı�ţ����߱�ǣ�
     * points�ڵ���Ϣ�����ھ���ļ���
     *
     */
    public static class LocationWrapper implements Clusterable {
        private double[] points;
        private Location location;

        public LocationWrapper(Location location) {
            this.location = location;
            this.points = new double[] { location.getX() };
        }

        public Location getLocation() {
            return location;
        }

        public double[] getPoint() {
            return points;
        }
    }


}

2)Kmeans++的使用

/**
 * @author YYH
 *
 */
public class KmeansPlusPlus {
    /**
     * �����inputPathΪjaccardCoding.txt·��
     */
    public static final String inputPath = "D:\\jaccardCoding.txt";
    static List<Location> locations = new ArrayList<>();

    public static void main(String[] args) throws IOException {
        // TODO Auto-generated method stub
        KmeansPlusPlus kmeans = new KmeansPlusPlus();
        Map<NodePair, Double> nodesPairMap = kmeans.getCodingFileMap(inputPath);
        kmeans.getKMeansResult(locations, nodesPairMap,2,10000);
    }



    /**
     * @author Administrator
     * @param locations   �ڵ���
     * @param nodesPairMap Ȩ��ӳ��
     * @param classNumber   Ҫ���ֵ�����
     * @param maxIterations  ���Ҫ�����Ĵ���
     */
    public void getKMeansResult(
            List<Location> locations,
            Map<NodePair, Double> nodesPairMap,
            int classNumber,
            int maxIterations){
        List<LocationWrapper> clusterInput = new ArrayList<LocationWrapper>(locations.size());
        for (Location location : locations){
            clusterInput.add(new LocationWrapper(location));
        }
    //   initialize a new clustering algorithm. 
    //       we use KMeans++ with 10 clusters and 10000 iterations maximum.
    //       we did not specify a distance measure; the default (euclidean distance) is used.
        JaccardDistance jaccardDistance = new JaccardDistance(nodesPairMap);

        KMeansPlusPlusClusterer<LocationWrapper> clusterer = new KMeansPlusPlusClusterer<LocationWrapper>(classNumber, maxIterations,jaccardDistance);
        List<CentroidCluster<LocationWrapper>> clusterResults = clusterer.cluster(clusterInput);

        // output the clusters
        for (int i=0; i<clusterResults.size(); i++){
            System.out.println("Cluster " + i);
            for (LocationWrapper locationWrapper : clusterResults.get(i).getPoints()){
                System.out.println(locationWrapper.getLocation());
            }
            System.out.println();
        }

    }


    /**
     * ��ȡjaccardCoding.txt�ļ��������locations��Map(�������jaccardҪ��)
     * @param inputPath
     * @return
     * @throws IOException
     */
    public Map<NodePair, Double> getCodingFileMap(String inputPath) throws IOException{
        BufferedReader bReader = FileUtil.getReader(inputPath);
        Map<NodePair, Double> nodesPairMap = new HashMap<>();
        Set<Location> locationSet = new HashSet<>();
        String txtLine = "";
        while ((txtLine = bReader.readLine())!=null){
            String[] values = txtLine.split("\t");
            NodePair nodePair = new NodePair();
            nodePair.setNode1(values[0]);
            nodePair.setNode2(values[1]);
            nodesPairMap.put(nodePair, Double.valueOf(values[2]));
            locationSet.add( new Location(Double.valueOf(values[0]))); //����ֻ�ǰѽڵ㻻��String2double���͵�
            locationSet.add( new Location(Double.valueOf(values[1]))); //����ֻ�ǰѽڵ㻻��String2double���͵�

        }
        locations.addAll(locationSet);
        if(bReader != null){
            bReader.close();
        }
        return nodesPairMap;
    }

    /**
     * @author YYH
     * �ı���DistanceMeasure,�����������Լ��Ĺ��캯�������һ����Լ����ڲ��࣬����ô������ѽ
     * ��jaccardϵ���ļ���ת��һ�£�����jaccard����
     *
     */
    public static class JaccardDistance implements DistanceMeasure{
        private static final long serialVersionUID = 1L;
        public static final String outPath = "D:\\Test.txt";
        public Map<NodePair, Double> nodesPairMap;
        public JaccardDistance(Map<NodePair, Double> nodesPairMap){
            this.nodesPairMap = nodesPairMap;
        }
        public JaccardDistance(){
        }

        /**
         * ���������һЩ��Ϣ���ò�����ʱ����Ե���
         */
        @Override
        public double compute(double[] a, double[] b) throws DimensionMismatchException {
            BufferedWriter bwriter = FileUtil.getWriter(outPath);
            double value = 0.;
            String node1 = String.valueOf(a[0]);
            String node2 = String.valueOf(b[0]);
            node1 = node1.substring(0, node1.indexOf('.'));
            node2 = node2.substring(0, node2.indexOf('.'));
            NodePair nodePair = new NodePair(node1,node2);
            if(nodesPairMap.containsKey(nodePair)){
                value= nodesPairMap.get(nodePair);
            }

            System.out.println(node1+" : "+node2+" : "+value);
            try {
                bwriter.write(node1+" : "+node2+" : "+value);
                bwriter.newLine();
                bwriter.flush();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            if(bwriter != null){
                try {
                    bwriter.close();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
            return value;
        }

    }



    /**
     * @author YYH
     * Location �ڵ�ı�ţ����߱�ǣ�
     * points�ڵ���Ϣ�����ھ���ļ���
     *
     */
    public static class LocationWrapper implements Clusterable {
        private double[] points;
        private Location location;

        public LocationWrapper(Location location) {
            this.location = location;
            this.points = new double[] { location.getX() };
        }

        public Location getLocation() {
            return location;
        }

        public double[] getPoint() {
            return points;
        }
    }


}