欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

Kmeans的python代码实现

程序员文章站 2022-05-20 19:49:57
...
import math
import random
import pandas as pd

#计算距离
def distance(a,b):
    distan = math.sqrt((a[0] - b[0])**2+(a[1] - b[1])**2)
    return distan

#D:数据集,k:簇数,maxIter:最大迭代次数,:簇划分的集合,C:质心集合
def kmeans(D,k,maxIter):
    if(k>=len(D)):
        return D
    C = []
    U = []
    #初始化簇划分集合
    for i in range(k):
        U.append([])
    #从D中随机选择k个样本作为初始质心{u1,u2,...,uk}
    ramdomNum = random.sample(range(0,len(D)),k)
    for i in range(k):
        C.append(D[ramdomNum[i]])
    Iter = 0
    tag = 0
    temp = 0
    C1 = C
    #计算各点到各质心的距离,划入距离最小的质心的簇。
    while(Iter < maxIter):
        i = 0
        while (i < len(D)):
            j = 0
            distan = 1000
            while (j < k):
                temp = distance(D[i], C[j])
                if (temp < distan):
                    tag = j
                    distan = temp
                j = j + 1
            U[tag].append(D[i])
            i = i + 1
        # 在小于最大迭代次数时,找出新质心
        C.clear()
        Iter = Iter + 1
        if(Iter < maxIter):
            break
        for num in range(k):
            C.append(findNew(U[num]))
            U[num].clear()
        if(C1 == C & Iter >=5 ):
            break
        C1 = C
    return U

#找出新质心
def findNew(L):
    x = 0
    y = 0
    tup = ()
    for i in range(len(L)):
        x = x + L[i][0]
        y = y + L[i][1]
        tup = (x/len(L),y/len(L))
    return tup

if __name__ =="__main__":
    dataset = pd.read_csv('data.csv',delimiter=',')
    data = dataset.values.tolist()
    D = []
    for i in range(len(data)):
        D.append(tuple(data[i]))
    num = int(input("请输入簇数:"));
    U = kmeans(D,num,10)
    print(U)



data.csv中内容

Kmeans的python代码实现

运行结果:

Kmeans的python代码实现
Kmeans的python代码实现

相关标签: kmeans