人工智能考试——聚类分析
程序员文章站
2022-06-12 16:54:32
...
一、题目
使用kmeans聚类算法对所提供的数据集进行聚类分析。通过修改提供的kmeans.c文件,读取2维的clusterSet.txt数据集,设定k为2和4两种情况,分别对该数据集进行聚类,并将最终的聚类中心和每个数据行所属的簇或组输出。
二、代码
#include <stdio.h>
#include "maths.h"
#include "rand.h"
#define MAX_LINE 1024
/*
* Features and Feature Vector Symbolic Constants and Types
*/
#define MAX_FEATURE_VECTORS 80
#define MAX_FEATURES 2
typedef struct {
int class;
float features[MAX_FEATURES];
} feature_vector_t;
/* Prototype Feature Vectors */
feature_vector_t fvec[MAX_FEATURE_VECTORS];
float feature_strings[MAX_FEATURES][MAX_FEATURES];
/* Number of clusters */
#define K 2
/* Cluster Centroids */
double centroids[K][MAX_FEATURES];
void initialize_prototype_vectors(void)
{
int vector, feature;
char buf[MAX_LINE]; /*缓冲区*/
FILE *fp; /*文件指针*/
if ((fp = fopen("C:\\Users\\15721\\Desktop\\大三计算机课程PPt\\人工智能\\工程\\机器学习\\机器学习算法\\clusterSet.txt", "r")) == NULL)
{
perror("fail to read");
//exit(1);
return 0;
}
else
{
printf("读取clusterSet.txt文件成功!\n");
}
for (vector = 0; vector < MAX_FEATURE_VECTORS; vector++) {
fscanf(fp, "%f %f", &fvec[vector].features[0],&fvec[vector].features[1]);
}
return;
}
void initialize_membership(void)
{
int i;
for (i = 0; i < MAX_FEATURE_VECTORS; i++) {
if (i < K) {
fvec[i].class = i;
}
else {
fvec[i].class = -1;
}
}
return;
}
double geometricDistance(int proto_vector, int centroid)
{
int feature;
double gd = 0.0;
for (feature = 0; feature < MAX_FEATURES; feature++) {
gd += (((double)fvec[proto_vector].features[feature] -
centroids[centroid][feature]) *
((double)fvec[proto_vector].features[feature] -
centroids[centroid][feature]));
}
return(sqrt(gd));
}
void compute_centroids(int cluster)
{
int proto_vector, feature;
int total = 0;
/* Clear the centroid vector */
for (feature = 0; feature < MAX_FEATURES; feature++) {
centroids[cluster][feature] = 0.0;
}
/* Calculate the centroid vector for the current cluster */
for (proto_vector = 0; proto_vector < MAX_FEATURE_VECTORS;
proto_vector++) {
if (fvec[proto_vector].class == cluster) {
for (feature = 0; feature < MAX_FEATURES; feature++) {
centroids[cluster][feature] +=
(double)fvec[proto_vector].features[feature];
}
total++;
}
}
/* Compute the average for the centroid */
for (feature = 0; feature < MAX_FEATURES; feature++) {
centroids[cluster][feature] /= (double)total;
}
return;
}
int partition_feature_vector(int proto_vector)
{
int cluster, best = 0;
double gdCur, gdBest = 999.99;
/* Find the centroid that best matches the prototype feature vector */
for (cluster = 0; cluster < K; cluster++) {
gdCur = geometricDistance(proto_vector, cluster);
if (gdCur < gdBest) {
best = cluster;
gdBest = gdCur;
}
}
return best;
}
void k_means_clustering(void)
{
int done = 0;
int old, new;
int proto_vector;
while (!done) {
done = 1;
/* Iterate through the available prototype feature vectors */
for (proto_vector = MAX_FEATURE_VECTORS - 1; proto_vector >= 0;
proto_vector--) {
/* Find the cluster to which this prototype vector belongs */
new = partition_feature_vector(proto_vector);
/* Did the vector change classes */
if (new != fvec[proto_vector].class) {
old = fvec[proto_vector].class;
fvec[proto_vector].class = new;
/* Recompute the affected centroids (-1 = not yet clustered) */
if (old != -1) {
compute_centroids(old);
}
compute_centroids(new);
done = 0;
}
}
}
}
void emit_clusters(void)
{
int class, i, j;
for (class = 0; class < K; class++) {
printf("Class %d contains:\n\n", class);
/* Show classification */
for (i = 0; i < MAX_FEATURE_VECTORS; i++) {
if (fvec[i].class == class) {
printf("\t%2d [", i);
for (j = 0; j < MAX_FEATURES; j++) {
printf("%f ", fvec[i].features[j]);
//printf("%f ", feature_strings[j][(fvec[i].features[j])]);
}
printf("]\n");
}
}
printf("\n");
}
return;
}
int main()
{
int i;
initialize_prototype_vectors();
initialize_membership();
for (i = 0; i < K; i++) {
compute_centroids(i);
}
k_means_clustering();
printf("下面是聚类中心:\n");
for (int i = 0; i < K; i++)
printf("class %d 中心是: %f\n",i, centroids[K][MAX_FEATURES]);
printf("下面是具体的分类:\n");
emit_clusters();
system("pause");
return 0;
}
上一篇: 艾灸足三里穴驱寒 调理手脚冰凉艾灸法