Kmeans的python代码实现
程序员文章站
2022-05-20 19:49:57
...
import math
import random
import pandas as pd
#计算距离
def distance(a,b):
distan = math.sqrt((a[0] - b[0])**2+(a[1] - b[1])**2)
return distan
#D:数据集,k:簇数,maxIter:最大迭代次数,:簇划分的集合,C:质心集合
def kmeans(D,k,maxIter):
if(k>=len(D)):
return D
C = []
U = []
#初始化簇划分集合
for i in range(k):
U.append([])
#从D中随机选择k个样本作为初始质心{u1,u2,...,uk}
ramdomNum = random.sample(range(0,len(D)),k)
for i in range(k):
C.append(D[ramdomNum[i]])
Iter = 0
tag = 0
temp = 0
C1 = C
#计算各点到各质心的距离,划入距离最小的质心的簇。
while(Iter < maxIter):
i = 0
while (i < len(D)):
j = 0
distan = 1000
while (j < k):
temp = distance(D[i], C[j])
if (temp < distan):
tag = j
distan = temp
j = j + 1
U[tag].append(D[i])
i = i + 1
# 在小于最大迭代次数时,找出新质心
C.clear()
Iter = Iter + 1
if(Iter < maxIter):
break
for num in range(k):
C.append(findNew(U[num]))
U[num].clear()
if(C1 == C & Iter >=5 ):
break
C1 = C
return U
#找出新质心
def findNew(L):
x = 0
y = 0
tup = ()
for i in range(len(L)):
x = x + L[i][0]
y = y + L[i][1]
tup = (x/len(L),y/len(L))
return tup
if __name__ =="__main__":
dataset = pd.read_csv('data.csv',delimiter=',')
data = dataset.values.tolist()
D = []
for i in range(len(data)):
D.append(tuple(data[i]))
num = int(input("请输入簇数:"));
U = kmeans(D,num,10)
print(U)
data.csv中内容
运行结果:
上一篇: 学习笔记:机器学习实战之Kmeans
下一篇: 机器学习之k-means算法详解