k均值聚类python实现
程序员文章站
2024-03-17 17:40:52
...
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.datasets import make_blobs # 生成数据
X, y = make_blobs()
plt.scatter(X[:, 0], X[:, 1], c=y)
plt.show()
data = X
print(data)
data = (np.array(data))
(r, c) = np.shape(data)
n_clusters = int(input("n_clusters:"))
cluster = np.array([])
c_max_list = np.max(data, axis=0)
c_min_list = np.min(data, axis=0)
for i in range(c):
if (i == 0):
cluster = np.random.uniform(c_min_list[i], c_max_list[i], (n_clusters, 1))
else:
cluster = np.c_[cluster, np.random.uniform(c_min_list[i], c_max_list[i], (n_clusters, 1))]
print(cluster)
final_type = []
while 1:
data_type = np.zeros(r)
update = np.zeros((n_clusters, c + 1))
for i in range(r): # 数据行
length = 0
length_j = 0
for j in range(n_clusters): # 簇中心
length1 = 0
for k in range(c): # 数据列
length1 += (cluster[j][k] - data[i][k]) ** 2
if length == 0:
length = length1
else:
if length1 < length:
length = length1
length_j = j
data_type[i] = length_j
update[length_j][0:c] += data[i][:]
update[length_j][c] += 1
judge = 1
for i in range(n_clusters):
for j in range(c):
if update[i][c] != 0:
update[i][j] = update[i][j] / update[i][c]
if update[i][j] != cluster[i][j]:
judge = 0
cluster[i][j] = update[i][j]
else:
for jj in range(c):
cluster[i][jj] = np.random.uniform(c_min_list[jj], c_max_list[jj])
judge = 0
break
if judge:
final_type = data_type
break
print(data_type)
plt.scatter(X[:, 0], X[:, 1], c=data_type)
plt.show()
运行结果:
[[ -6.20944523 -7.33166233]
[ -6.59508332 -6.7686606 ]
[ -5.86770245 -6.04789805]
[ -4.9669678 -7.0455184 ]
[ 7.70549982 8.10100282]
[ 4.00735285 -7.9514294 ]
[ 6.09639615 8.91574546]
[ -3.73500463 -7.19508706]
[ -7.23302282 -8.13058997]
[ 7.94516753 10.35617676]
[ -7.29664332 -6.59538103]
[ -5.42210382 -7.17325291]
[ 9.91655471 9.95702789]
[ -5.85890928 -6.95543785]
[ 2.95112483 -8.42581647]
[ 2.87843225 -5.79839309]
[ 10.00666879 9.47464151]
[ 8.57732658 8.61403764]
[ 3.34740723 -5.96096412]
[ 7.50539769 9.59745877]
[ -6.80224581 -6.66214873]
[ 10.06946677 10.99349945]
[ -6.224985 -7.58182892]
[ 9.20856985 9.30139934]
[ 9.66365481 8.87359162]
[ 9.99306839 10.22089127]
[ -6.15264246 -8.25664891]
[ -5.99608561 -6.2647838 ]
[ 6.36115813 -8.80321234]
[ 4.62643508 -6.76776824]
[ -5.81652698 -6.76163874]
[ 4.14544423 -6.8583864 ]
[ 2.51332026 -7.83183723]
[ 8.25002436 10.51595518]
[ -7.77566223 -6.29449878]
[ 8.93876589 9.52540323]
[ -6.52407223 -5.77372853]
[ 9.53944164 10.06880603]
[ 8.35561852 11.88595248]
[ -7.25885238 -7.23222087]
[ 8.32330758 9.94445983]
[ -5.68727598 -5.29916468]
[ 9.07573593 8.26142339]
[ 8.07989415 9.41109159]
[ -6.92155563 -7.49079317]
[ 3.33945894 -6.22464168]
[ 10.13921133 11.25100618]
[ -5.48548598 -6.06290165]
[ -5.31428808 -4.84456997]
[ -4.20504992 -7.17575386]
[ -7.2808905 -7.97642 ]
[ 3.02584676 -4.63254456]
[ 8.94365396 9.41844164]
[ 2.82423067 -7.01582269]
[ 3.5629921 -7.23172739]
[ 2.13613233 -7.4896424 ]
[ 7.36758375 10.94971455]
[ 1.77340912 -7.63109169]
[ 3.90272275 -6.98553468]
[ 8.06326528 9.97043944]
[ 7.90103924 9.26937239]
[ 4.59806129 -8.28191975]
[ 8.48335298 9.27544851]
[ 9.27745897 8.20821306]
[ 9.43323104 8.61626946]
[ 3.79131125 -7.89538588]
[ 2.18809797 -8.67136836]
[ 8.22021414 9.84349258]
[ 4.20373867 -10.16628358]
[ 2.49532158 -7.90224922]
[ 4.34213995 -7.71632031]
[ 8.99497767 9.67206088]
[ 2.32636392 -8.50557182]
[ 3.54678388 -7.54218573]
[ 9.11802572 11.54065254]
[ 3.11620005 -7.55287811]
[ 3.55112647 -6.24650294]
[ 2.33221745 -7.44507088]
[ -6.75276884 -5.47180945]
[ 5.49777598 -8.97042451]
[ 2.88320675 -7.01387697]
[ -5.65060663 -8.15050371]
[ 9.65124143 8.79529544]
[ -5.15453426 -5.26414948]
[ -5.89042596 -7.71697968]
[ 3.43598122 -9.27241805]
[ 6.6773825 9.33694274]
[ 8.9237238 9.80815102]
[ 2.02021482 -10.12175901]
[ -6.83701235 -6.95518246]
[ -6.13688258 -7.27710626]
[ -5.52900517 -6.12941057]
[ 9.84790676 11.67213138]
[ -5.26072352 -6.67435566]
[ 8.89548533 9.31082552]
[ 3.80867113 -9.29292117]
[ 2.69777319 -8.95656029]
[ 0.98534355 -8.95935225]
[ -5.13337288 -7.07455859]
[ -6.83534647 -8.63788616]]
n_clusters:3
[[ 3.02646644 8.96311605]
[-3.91623975 -0.34736316]
[ 4.65563666 -6.59711839]]
[1. 1. 1. 1. 0. 2. 0. 1. 1. 0. 1. 1. 0. 1. 2. 2. 0. 0. 2. 0. 1. 0. 1. 0.
0. 0. 1. 1. 2. 2. 1. 2. 2. 0. 1. 0. 1. 0. 0. 1. 0. 1. 0. 0. 1. 2. 0. 1.
1. 1. 1. 2. 0. 2. 2. 2. 0. 2. 2. 0. 0. 2. 0. 0. 0. 2. 2. 0. 2. 2. 2. 0.
2. 2. 0. 2. 2. 2. 1. 2. 2. 1. 0. 1. 1. 2. 0. 0. 2. 1. 1. 1. 0. 1. 0. 2.
2. 2. 1. 1.]
上一篇: spss实现k均值聚类