欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

人工智能考试——聚类分析

程序员文章站 2022-06-12 16:54:32
...

一、题目

使用kmeans聚类算法对所提供的数据集进行聚类分析。通过修改提供的kmeans.c文件,读取2维的clusterSet.txt数据集,设定k为2和4两种情况,分别对该数据集进行聚类,并将最终的聚类中心和每个数据行所属的簇或组输出。

二、代码

#include <stdio.h>
#include "maths.h"
#include "rand.h"
#define MAX_LINE 1024
/*
* Features and Feature Vector Symbolic Constants and Types
*/

#define MAX_FEATURE_VECTORS	80
#define MAX_FEATURES		2


typedef struct {
	int class;
	float features[MAX_FEATURES];
} feature_vector_t;

/* Prototype Feature Vectors */
feature_vector_t fvec[MAX_FEATURE_VECTORS];



float feature_strings[MAX_FEATURES][MAX_FEATURES];

/* Number of clusters */
#define K		2

/* Cluster Centroids */
double centroids[K][MAX_FEATURES];



void initialize_prototype_vectors(void)
{
	int vector, feature;


	char buf[MAX_LINE];  /*缓冲区*/
	FILE *fp;            /*文件指针*/

	if ((fp = fopen("C:\\Users\\15721\\Desktop\\大三计算机课程PPt\\人工智能\\工程\\机器学习\\机器学习算法\\clusterSet.txt", "r")) == NULL)
	{
		perror("fail to read");
		//exit(1);
		return 0;
	}
	else
	{
		printf("读取clusterSet.txt文件成功!\n");
	}
	
	
	for (vector = 0; vector < MAX_FEATURE_VECTORS; vector++) {
			fscanf(fp, "%f	%f", &fvec[vector].features[0],&fvec[vector].features[1]);		
	}

	return;
}


void initialize_membership(void)
{
	int i;

	for (i = 0; i < MAX_FEATURE_VECTORS; i++) {

		if (i < K) {

			fvec[i].class = i;

		}
		else {

			fvec[i].class = -1;

		}

	}

	return;
}


double geometricDistance(int proto_vector, int centroid)
{
	int feature;
	double gd = 0.0;

	for (feature = 0; feature < MAX_FEATURES; feature++) {

		gd += (((double)fvec[proto_vector].features[feature] -
			centroids[centroid][feature]) *
			((double)fvec[proto_vector].features[feature] -
				centroids[centroid][feature]));

	}

	return(sqrt(gd));
}


void compute_centroids(int cluster)
{
	int proto_vector, feature;
	int total = 0;

	/* Clear the centroid vector */
	for (feature = 0; feature < MAX_FEATURES; feature++) {
		centroids[cluster][feature] = 0.0;
	}

	/* Calculate the centroid vector for the current cluster */
	for (proto_vector = 0; proto_vector < MAX_FEATURE_VECTORS;
		proto_vector++) {

		if (fvec[proto_vector].class == cluster) {

			for (feature = 0; feature < MAX_FEATURES; feature++) {

				centroids[cluster][feature] +=
					(double)fvec[proto_vector].features[feature];

			}

			total++;

		}

	}

	/* Compute the average for the centroid */
	for (feature = 0; feature < MAX_FEATURES; feature++) {

		centroids[cluster][feature] /= (double)total;

	}

	return;
}


int partition_feature_vector(int proto_vector)
{
	int cluster, best = 0;
	double gdCur, gdBest = 999.99;

	/* Find the centroid that best matches the prototype feature vector */
	for (cluster = 0; cluster < K; cluster++) {

		gdCur = geometricDistance(proto_vector, cluster);

		if (gdCur < gdBest) {

			best = cluster;
			gdBest = gdCur;

		}

	}

	return best;
}


void k_means_clustering(void)
{
	int done = 0;
	int old, new;
	int proto_vector;

	while (!done) {

		done = 1;

		/* Iterate through the available prototype feature vectors */
		for (proto_vector = MAX_FEATURE_VECTORS - 1; proto_vector >= 0;
			proto_vector--) {

			/* Find the cluster to which this prototype vector belongs */
			new = partition_feature_vector(proto_vector);

			/* Did the vector change classes */
			if (new != fvec[proto_vector].class) {

				old = fvec[proto_vector].class;

				fvec[proto_vector].class = new;

				/* Recompute the affected centroids (-1 = not yet clustered) */
				if (old != -1) {
					compute_centroids(old);
				}

				compute_centroids(new);

				done = 0;

			}

		}

	}

}


void emit_clusters(void)
{
	int class, i, j;

	for (class = 0; class < K; class++) {

		printf("Class %d contains:\n\n", class);

		/* Show classification */
		for (i = 0; i < MAX_FEATURE_VECTORS; i++) {

			if (fvec[i].class == class) {

				printf("\t%2d  [", i);

				for (j = 0; j < MAX_FEATURES; j++) {

					printf("%f ", fvec[i].features[j]);
					//printf("%f ", feature_strings[j][(fvec[i].features[j])]);

				}

				printf("]\n");

			}

		}

		printf("\n");

	}

	return;
}


int main()
{
	int i;

	initialize_prototype_vectors();

	initialize_membership();

	for (i = 0; i < K; i++) {
		compute_centroids(i);
	}

	k_means_clustering();

	printf("下面是聚类中心:\n");
	for (int i = 0; i < K; i++)
		printf("class %d 中心是: %f\n",i, centroids[K][MAX_FEATURES]);
	printf("下面是具体的分类:\n");
	emit_clusters();

	system("pause");
	return 0;
}


 

相关标签: AI