使用apache common math 中的聚类方法DBSCAN与Kmeans
程序员文章站
2022-07-03 11:47:22
...
使用apache common math 中的聚类方法
1)DBSCAN的使用
public class DBSCAN {
/**
* �����inputPathΪjaccardCoding.txt·��
*/
public static final String inputPath = "D:\\jaccardCoding.txt";
static List<Location> locations = new ArrayList<>();
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
DBSCAN dbscan = new DBSCAN();
Map<NodePair, Double> nodesPairMap = dbscan.getCodingFileMap(inputPath);
dbscan.getDBSCANResult(locations, nodesPairMap,0.5,10);
}
/**
* @author YYH
* @param locations �ڵ���
* @param nodesPairMap Ȩ��ӳ��
* @param eps the distance that defines the ��-neighborhood of a point
* @param minPts the minimum number of density-connected points required to form a cluster
*/
public void getDBSCANResult(
List<Location> locations,
Map<NodePair, Double> nodesPairMap,
double eps,
int minPts){
List<LocationWrapper> clusterInput = new ArrayList<LocationWrapper>(locations.size());
for (Location location : locations){
clusterInput.add(new LocationWrapper(location));
}
// initialize a new clustering algorithm.
// we use KMeans++ with 10 clusters and 10000 iterations maximum.
// we did not specify a distance measure; the default (euclidean distance) is used.
JaccardDistance jaccardDistance = new JaccardDistance(nodesPairMap);
DBSCANClusterer<LocationWrapper> clusterer = new DBSCANClusterer<LocationWrapper>(eps, minPts,jaccardDistance);
List<Cluster<LocationWrapper>> clusterResults = clusterer.cluster(clusterInput);
// output the clusters
for (int i=0; i<clusterResults.size(); i++){
System.out.println("Cluster " + i);
for (LocationWrapper locationWrapper : clusterResults.get(i).getPoints()){
System.out.println(locationWrapper.getLocation());
}
System.out.println();
}
}
/**
* ��ȡjaccardCoding.txt�ļ��������locations��Map(�������jaccardҪ��)
* @param inputPath
* @return
* @throws IOException
*/
public Map<NodePair, Double> getCodingFileMap(String inputPath) throws IOException{
BufferedReader bReader = FileUtil.getReader(inputPath);
Map<NodePair, Double> nodesPairMap = new HashMap<>();
Set<Location> locationSet = new HashSet<>();
String txtLine = "";
while ((txtLine = bReader.readLine())!=null){
String[] values = txtLine.split("\t");
NodePair nodePair = new NodePair();
nodePair.setNode1(values[0]);
nodePair.setNode2(values[1]);
nodesPairMap.put(nodePair, Double.valueOf(values[2]));
locationSet.add( new Location(Double.valueOf(values[0]))); //����ֻ�ǰѽڵ㻻��String2double���͵�
locationSet.add( new Location(Double.valueOf(values[1]))); //����ֻ�ǰѽڵ㻻��String2double���͵�
}
locations.addAll(locationSet);
if(bReader != null){
bReader.close();
}
return nodesPairMap;
}
/**
* @author YYH
* �ı���DistanceMeasure,�����������Լ��Ĺ��캯�������һ����Լ����ڲ��࣬����ô������ѽ
* ��jaccardϵ���ļ���ת��һ�£�����jaccard����
*
*/
public static class JaccardDistance implements DistanceMeasure{
private static final long serialVersionUID = 1L;
public static final String outPath = "D:\\Test.txt";
public Map<NodePair, Double> nodesPairMap;
public JaccardDistance(Map<NodePair, Double> nodesPairMap){
this.nodesPairMap = nodesPairMap;
}
public JaccardDistance(){
}
/**
* ���������һЩ��Ϣ���ò�����ʱ����Ե���
*/
@Override
public double compute(double[] a, double[] b) throws DimensionMismatchException {
BufferedWriter bwriter = FileUtil.getWriter(outPath);
double value = 0.;
String node1 = String.valueOf(a[0]);
String node2 = String.valueOf(b[0]);
node1 = node1.substring(0, node1.indexOf('.'));
node2 = node2.substring(0, node2.indexOf('.'));
NodePair nodePair = new NodePair(node1,node2);
if(nodesPairMap.containsKey(nodePair)){
value= nodesPairMap.get(nodePair);
}
System.out.println(node1+" : "+node2+" : "+value);
try {
bwriter.write(node1+" : "+node2+" : "+value);
bwriter.newLine();
bwriter.flush();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
if(bwriter != null){
try {
bwriter.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
return value;
}
}
/**
* @author YYH
* Location �ڵ�ı�ţ����߱�ǣ�
* points�ڵ���Ϣ�����ھ���ļ���
*
*/
public static class LocationWrapper implements Clusterable {
private double[] points;
private Location location;
public LocationWrapper(Location location) {
this.location = location;
this.points = new double[] { location.getX() };
}
public Location getLocation() {
return location;
}
public double[] getPoint() {
return points;
}
}
}
2)Kmeans++的使用
/**
* @author YYH
*
*/
public class KmeansPlusPlus {
/**
* �����inputPathΪjaccardCoding.txt·��
*/
public static final String inputPath = "D:\\jaccardCoding.txt";
static List<Location> locations = new ArrayList<>();
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
KmeansPlusPlus kmeans = new KmeansPlusPlus();
Map<NodePair, Double> nodesPairMap = kmeans.getCodingFileMap(inputPath);
kmeans.getKMeansResult(locations, nodesPairMap,2,10000);
}
/**
* @author Administrator
* @param locations �ڵ���
* @param nodesPairMap Ȩ��ӳ��
* @param classNumber Ҫ���ֵ�����
* @param maxIterations ���Ҫ�����Ĵ���
*/
public void getKMeansResult(
List<Location> locations,
Map<NodePair, Double> nodesPairMap,
int classNumber,
int maxIterations){
List<LocationWrapper> clusterInput = new ArrayList<LocationWrapper>(locations.size());
for (Location location : locations){
clusterInput.add(new LocationWrapper(location));
}
// initialize a new clustering algorithm.
// we use KMeans++ with 10 clusters and 10000 iterations maximum.
// we did not specify a distance measure; the default (euclidean distance) is used.
JaccardDistance jaccardDistance = new JaccardDistance(nodesPairMap);
KMeansPlusPlusClusterer<LocationWrapper> clusterer = new KMeansPlusPlusClusterer<LocationWrapper>(classNumber, maxIterations,jaccardDistance);
List<CentroidCluster<LocationWrapper>> clusterResults = clusterer.cluster(clusterInput);
// output the clusters
for (int i=0; i<clusterResults.size(); i++){
System.out.println("Cluster " + i);
for (LocationWrapper locationWrapper : clusterResults.get(i).getPoints()){
System.out.println(locationWrapper.getLocation());
}
System.out.println();
}
}
/**
* ��ȡjaccardCoding.txt�ļ��������locations��Map(�������jaccardҪ��)
* @param inputPath
* @return
* @throws IOException
*/
public Map<NodePair, Double> getCodingFileMap(String inputPath) throws IOException{
BufferedReader bReader = FileUtil.getReader(inputPath);
Map<NodePair, Double> nodesPairMap = new HashMap<>();
Set<Location> locationSet = new HashSet<>();
String txtLine = "";
while ((txtLine = bReader.readLine())!=null){
String[] values = txtLine.split("\t");
NodePair nodePair = new NodePair();
nodePair.setNode1(values[0]);
nodePair.setNode2(values[1]);
nodesPairMap.put(nodePair, Double.valueOf(values[2]));
locationSet.add( new Location(Double.valueOf(values[0]))); //����ֻ�ǰѽڵ㻻��String2double���͵�
locationSet.add( new Location(Double.valueOf(values[1]))); //����ֻ�ǰѽڵ㻻��String2double���͵�
}
locations.addAll(locationSet);
if(bReader != null){
bReader.close();
}
return nodesPairMap;
}
/**
* @author YYH
* �ı���DistanceMeasure,�����������Լ��Ĺ��캯�������һ����Լ����ڲ��࣬����ô������ѽ
* ��jaccardϵ���ļ���ת��һ�£�����jaccard����
*
*/
public static class JaccardDistance implements DistanceMeasure{
private static final long serialVersionUID = 1L;
public static final String outPath = "D:\\Test.txt";
public Map<NodePair, Double> nodesPairMap;
public JaccardDistance(Map<NodePair, Double> nodesPairMap){
this.nodesPairMap = nodesPairMap;
}
public JaccardDistance(){
}
/**
* ���������һЩ��Ϣ���ò�����ʱ����Ե���
*/
@Override
public double compute(double[] a, double[] b) throws DimensionMismatchException {
BufferedWriter bwriter = FileUtil.getWriter(outPath);
double value = 0.;
String node1 = String.valueOf(a[0]);
String node2 = String.valueOf(b[0]);
node1 = node1.substring(0, node1.indexOf('.'));
node2 = node2.substring(0, node2.indexOf('.'));
NodePair nodePair = new NodePair(node1,node2);
if(nodesPairMap.containsKey(nodePair)){
value= nodesPairMap.get(nodePair);
}
System.out.println(node1+" : "+node2+" : "+value);
try {
bwriter.write(node1+" : "+node2+" : "+value);
bwriter.newLine();
bwriter.flush();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
if(bwriter != null){
try {
bwriter.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
return value;
}
}
/**
* @author YYH
* Location �ڵ�ı�ţ����߱�ǣ�
* points�ڵ���Ϣ�����ھ���ļ���
*
*/
public static class LocationWrapper implements Clusterable {
private double[] points;
private Location location;
public LocationWrapper(Location location) {
this.location = location;
this.points = new double[] { location.getX() };
}
public Location getLocation() {
return location;
}
public double[] getPoint() {
return points;
}
}
}