您现在的位置是: 首页


程序员文章站 2022-07-13 11:28:25




# X_train.shape[0]表示的是样本的个数,X_train.shape[1]表示的是特征维度
# y_train是一维数组,它只有.shape[0],即y_train.shape[0]表示的是样本个数
# x也是一维数组,x.shape[0]表示的是测试样本的维度

import numpy as np
from math import sqrt
from collections import Counter

def kNN_classify(K, X_train, y_train, x):
    assert 1<= K <= X_train.shape[0], "K should be in range of [1, 样本个数]"
    assert X_train.shape[0] == y_train.shape[0], "the number of sample should correspond to the number of label"
    assert X_train.shape[1] == x.shape[0], "the feature number of train should correspond to the feature number of test"
    distances = [sqrt(np.sum((x - x_train)**2)) for x_train in X_train]
    sort_index = np.argsort(distances)
    nearest_y = [y_train[i] for i in sort_index[0:K]]
    votes = Counter(nearest_y)
    predict_y = votes.most_common(1)[0][0]
    return predict_y
raw_data_X = [[3.39, 2.33],
              [3.11, 1.78],
              [1.34, 3.36],
              [3.58, 4.67],
              [2.28, 2.86],
              [7.42, 4.69],
              [5.74, 3.53],
              [9.17, 2.51],
              [7.79, 3.42],
              [7.93, 0.79]
raw_data_y = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]

X_train = np.array(raw_data_X)
y_train = np.array(raw_data_y)

x = np.array([8.09, 3.36])

label = kNN_classify(3, X_train, y_train, x)
print label


import numpy as np
from sklearn.neighbors import KNeighborsClassifier

kNN_classifier = KNeighborsClassifier(n_neighbors=6)
kNN_classifier.fit(X_train, y_train)
kNN_classifier.predict(x.reshape(1, -1))


# -*- coding: UTF-8 -*-
import numpy as np
from math import sqrt
from collections import Counter

class KNNClassifier(object):
    def __init__(self, k):
        assert isinstance(k, int), "k must be an int value"
        assert k >= 1, "k must be valid"
        self.k = k
        self._X_train = None
        self._y_train = None

    def fit(self, X_train, y_train):
        assert X_train.shape[0] == y_train.shape[0], "number of sample should be the same"
        assert self.k <= X_train.shape[0], "k must be less than the number of sample"

        self._X_train = X_train
        self._y_train = y_train

        #chain call
        return self

    def predict(self, X_predict):
        assert self._X_train is not None and self._y_train is not None, "train data shouldn't be None"
        assert X_predict.shape[1] == self._X_train.shape[1], "number of features must be same between train and test samples"
        predict_y = [self._predict(x) for x in X_predict]
        return np.array(predict_y)

    def _predict(self, x):
        assert self._X_train.shape[1] == x.shape[0], "the feature of train samples and x must be equal"

        distances = [sqrt(np.sum(((x - x_train) ** 2))) for x_train in self._X_train]
        sort_index_k = np.argsort(distances)[: self.k]
        nearest_y = [self._y_train[i] for i in sort_index_k]
        votes = Counter(nearest_y)
        predict_y = votes.most_common(1)[0][0]

        return predict_y

    def __repr__(self):
        return ("KNN(k={})".format(self.k))