欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

高维聚类结果可视化

程序员文章站 2024-02-13 19:46:28
...

利用sklearn包里的BIRCH算法,以iris数据集,聚类结果可视化

代码如下

 

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets.samples_generator import make_blobs
from sklearn.cluster import Birch
import urllib.request
import sys
from sklearn.manifold import TSNE
import pandas as pd
# import ybirch

with open("F:\Tabtad\Downloads\iris\Iris.txt", 'r') as f:#打开数据文件
    line = f.readlines()
# data = urllib.request .urlopen(target_url),encoding = 'utf-8'
xList = []
# labels = []

for data in line:
    # line = line.decode()
    row = data.strip().split(",")#切词,并将数据变成浮点形式
    row = list(map(float,row))

    # labels.append(row[0])
    del row[0]
    xList.append(row)

X = np.array(xList)#转为numpy的矩阵形式
print(xList)
print(X)
#未使用Birch之前的数据情况
plt.scatter(X[:,0], X[:,1],X[:,2], marker = 'o')
plt.show()
# print(labels)
nrow = len(xList)
ncol = len(xList[0])
print ("Number of Rows of Data = " + str(len(xList)) + '\n')
sys.stdout.write("Number of Columns of Data = " + str(len(xList[1])) + '\n')
#y = make_blobs(n_samples=150, n_features=4, cluster_std=[0.4, 0.3, 0.4, 0.3])
# #
#设置birch函数,训练函数
model = Birch(n_clusters = 3,threshold = 0.4)
y_pred = model.fit_predict(X)
#
print(y_pred)
# 输出标签下样本数目
r1 = pd.Series(model.labels_).value_counts()
print(r1)#统计各个类别的数目

# 绘图
plt.scatter(X[:,0], X[:,1],X[:,2], c= y_pred)
plt.show()
from sklearn import  metrics
print("Calinski_Harabasz Score",metrics.calinski_harabasz_score(X,y_pred))
# # help(Birch)
# # print(len(y_pred))