无监督学习之K-Means算法python实现
程序员文章站
2022-06-22 17:41:15
1.定义无监督学习:无明确的惩罚,以某种形式的回报激励决策,典型代表有K-Means聚类与主成分分析,无监督学习的目标是使得奖励最大化,被视为人工智能的实现方法。K-Means聚类:将无标注的样本通过迭代聚类称为k个簇。2.K-Means算法实现步骤选定k个簇心(可随意)分别计算每个样本到簇心距离,并进行归属重新计算簇重心,更新k个簇心反复迭代知道达到终止条件3.Python实现代码import numpy as npimport mat....
1.定义
- 无监督学习:无明确的惩罚,以某种形式的回报激励决策,典型代表有K-Means聚类与主成分分析,无监督学习的目标是使得奖励最大化,被视为人工智能的实现方法。
- K-Means聚类:将无标注的样本通过迭代聚类称为k个簇。
2.K-Means算法实现步骤
-
选定k个簇心(可随意)
-
分别计算每个样本到簇心距离,并进行归属
-
重新计算簇重心,更新k个簇心
-
反复迭代知道达到终止条件
3.Python实现代码
import numpy as np
import matplotlib.pyplot as plt
import math
import os
def gene_save_data(f):
p1 = np.array([2,3])
p2 = np.array([5,8])
p3 = np.array([7,4])
g = [1,1,1]
g[0] = list(np.add(p1, np.random.uniform(-2, 2, (40,1,2))))
g[1] = list(np.add(p2, np.random.uniform(-2, 2, (40,1,2))))
g[2] = list(np.add(p3, np.random.uniform(-2, 2, (40,1,2))))
for i in range(0,3):
for j in range(0,40):
f.write(str(g[i][j][0]))
f.write(',')
f.write('\n')
return(0)
def draw_scatter(g1, g2, g3, h, num):
# colors = [plt.cm.tab10(i/2.0) for i in range(3)]
colors1 = ['c','g', 'm']#点的颜色
colors2 = 'r'
plt.figure(1)
plt.subplot(2, 10, num+1)
plt.scatter(x=[g1[j][0] for j in range(len(g1))], y= [float(g1[k][1]) for k in range(len(g1))], c='#AFEEEE', alpha=0.4)
plt.scatter(x=[g2[j][0] for j in range(len(g2))], y= [float(g2[k][1]) for k in range(len(g2))], c='#98FB98', alpha=0.4)
plt.scatter(x=[g3[j][0] for j in range(len(g3))], y= [float(g3[k][1]) for k in range(len(g3))], c='#EE82EE', alpha=0.4)
plt.scatter(x=[h[j*2] for j in range(3)], y= [h[j*2+1] for j in range(3)], c=colors1, marker='o')
plt.scatter(x=[2, 5, 7], y= [3, 8, 4], c=colors2, marker='x')
# Decorations
plt.gca().set(xlim=(-1.0, 12.0), ylim=(-1.0, 12.0))
plt.xticks(fontsize=10); plt.yticks(fontsize=10)
plt.title("no.%d clustering"%num, fontsize=8)
def distance(point, heart):
dist = 0
dist = math.sqrt((point[0]-heart[0])**2 + (point[1]-heart[1])**2)
return dist
def list_string2float(g):
for i in range(40):
g[i][0] = float(g[i][0])
g[i][1] = float(g[i][1])
return(0)
def cal_heart(heart1, heart2, heart3, g1, g2, g3):
heart1 = np.mean(np.array(g1), axis=0).tolist()
# print(heart2)
# print(g2)
heart2 = np.mean(np.array(g2), axis=0).tolist()
# print(heart2)
heart3 = np.mean(np.array(g3), axis=0).tolist()
return(heart1, heart2, heart3)
def belong_group(all, h1, h2, h3, g1, g2, g3):
g1 *= 0
g2 *= 0
g3 *= 0
for i in range(120):
dis = [distance(all[i], h1), distance(all[i], h2), distance(all[i], h3)]
ind = dis.index(min(dis))
if(ind==0):
g1.append(all[i])
elif(ind==1):
g2.append(all[i])
else:
g3.append(all[i])
return(0)
def kmeans():
#生成测试数据
if(os.path.exists('data.txt') and os.path.getsize('data.txt')):
f = open('data.txt', 'r')
print('data already exist and not empty, pass!')
pass
else:
f = open('data.txt','w')
gene_save_data(f)
f.close()
f = open('data.txt', 'r')
#选定簇点
heart1 = input('输入第一个初始簇点,格式如: 1 1\n')
heart1 = [float(heart1[0]), float(heart1[2])]
heart2 = input('输入第二个初始簇点,格式如: 1 1\n')
heart2 = [float(heart2[0]), float(heart2[2])]
heart3 = input('输入第三个初始簇点,格式如: 1 1\n')
heart3 = [float(heart3[0]), float(heart3[2])]
#读取样本数据并进行归属
lines = f.readlines()
g1 = lines[0].strip().strip(',').split(',')
g1 = [g1[i].strip('[').strip(']').strip().split( ) for i in range(40)]
list_string2float(g1)
g2 = lines[1].strip().strip(',').split(',')
g2 = [g2[i].strip('[').strip(']').strip().split( ) for i in range(40)]
list_string2float(g2)
g3 = lines[2].strip().strip(',').split(',')
g3 = [g3[i].strip('[').strip(']').strip().split( ) for i in range(40)]
list_string2float(g3)
f.close()
draw_scatter(g1,g2,g3,(heart1+heart2+heart3), 0)
all = g1 + g2 + g3
for i in range(19):
print('第%d次的簇点:'%(i+1))
print(heart1, heart2, heart3)
belong_group(all, heart1, heart2, heart3, g1, g2, g3)
heart1, heart2, heart3 = cal_heart(heart1, heart2, heart3, g1, g2, g3)
draw_scatter(g1,g2,g3,(heart1+heart2+heart3), i+1)
plt.show()
if __name__ == '__main__':
kmeans()
4.实现效果
-
控制台输出
-
输出图像
本文地址:https://blog.csdn.net/weixin_46318945/article/details/107326010