欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

机器学习--逻辑回归

程序员文章站 2022-05-02 16:32:31
...

机器学习–逻辑回归

逻辑回归

解决线性二元分类的算法

Python实现逻辑回归分类算法

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


class LogisticRegressionGD(object):
    def __init__(self, eta=0.05, n_iter=100, random_state=1):
        """
        :param eta: 学习率
        :param n_iter: 训练次数
        :param random_state: 随机种子
        """
        self.eta = eta
        self.n_iter = n_iter
        self.random_state = random_state

    def fit(self, X, y):
        """
        :param X: 数据
        :param y: 分类标签
        :return:
        """
        rgen = np.random.RandomState(self.random_state)
        self.w_ = rgen.normal(loc=0.0, scale=0.01, size=1 + X.shape[1])  # 初始化权重
        self.cost_ = []

        for i in range(self.n_iter):
            net_input = self.net_input(X)
            output = self.activation(net_input)
            errors = (y - output)
            self.w_[1:] += self.eta * X.T.dot(errors)  # 更新权重
            self.w_[0] += self.eta * errors.sum()

            cost = -y.dot(np.log(output)) - ((1 - y).dot(np.log(1 - output)))  # 代价函数
            self.cost_.append(cost)
        return self

    def net_input(self, X):
        """净输入函数,输入值和权重点积"""
        return np.dot(X, self.w_[1:]) + self.w_[0]

    def activation(self, z):
        """sigmoid**函数"""
        return 1. / (1. + np.exp(-np.clip(z, -250, 250)))

    def predict(self, X):
        """控制函数,返回类标签"""
        return np.where(self.net_input(X) >= 0.0, 1, 0)

Python实现逻辑回归分类算法,随机梯度下降

class LogisticRegressionSGD(object):
    def __init__(self, eta=0.01, n_iter=10, shuffle=True, random_state=None):
        """
        :param eta: 学习率
        :param n_iter: 训练次数
        :param shuffle: 随机洗牌训练数据
        :param random_state: 随机种子
        """
        self.eta = eta
        self.n_iter = n_iter
        self.w_initialized = False
        self.shuffle = shuffle
        self.random_state = random_state

    def fit(self, X, y):
        """
        :param X: 数据
        :param y: 分类标签
        :return:
        """
        self._initialize_weights(X.shape[1])
        self.cost_ = []
        for i in range(self.n_iter):
            if self.shuffle:
                X, y = self._shuffle(X, y)
            cost = []
            for xi, target in zip(X, y):
                cost.append(self._update_weights(xi, target))
            avg_cost = sum(cost) / len(y)  # 计算平均成本
            self.cost_.append(avg_cost)
        return self

    def partial_fit(self, X, y):
        """流式数据在线学习,不需要重新初始化权重"""
        if not self.w_initialized:
            self._initialize_weights(X.shape[1])
        if y.ravel().shape[0] > 1:
            for xi, target in zip(X, y):
                self._update_weights(xi, target)
        else:
            self._update_weights(X, y)
        return self

    def _shuffle(self, X, y):
        """随机洗牌训练数据"""
        r = self.rgen.permutation(len(y))
        return X[r], y[r]

    def _initialize_weights(self, m):
        """初始化权重"""
        self.rgen = np.random.RandomState(self.random_state)
        self.w_ = self.rgen.normal(loc=0.0, scale=0.01, size=1 + m)
        self.w_initialized = True

    def _update_weights(self, xi, target):
        """更新权重"""
        output = self.activation(self.net_input(xi))
        error = (target - output)
        self.w_[1:] += self.eta * xi.dot(error)
        self.w_[0] += self.eta * error

        cost = -target*(np.log(output)) - ((1 - target)*(np.log(1 - output)))  # 代价函数
        return cost

    def net_input(self, X):
        """净输入函数,输入值和权重点积"""
        return np.dot(X, self.w_[1:]) + self.w_[0]

    def activation(self, z):
        """sigmoid**函数"""
        return 1. / (1. + np.exp(-np.clip(z, -250, 250)))

    def predict(self, X):
        """控制函数,返回类标签"""
        return np.where(self.net_input(X) >= 0.0, 1, 0)

准备数据

iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

分类效果

X_train_01_subset = X_train[(y_train == 0) | (y_train == 1)]
y_train_01_subset = y_train[(y_train == 0) | (y_train == 1)]

lrgd = LogisticRegressionSGD(eta=0.05, n_iter=1000, random_state=1)
lrgd.fit(X_train_01_subset,
         y_train_01_subset)
plot_decision_regions(X=X_train_01_subset,
                      y=y_train_01_subset,
                      classifier=lrgd)
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()

机器学习--逻辑回归

scikit-learn训练逻辑回归模型

lr = LogisticRegression(C=100.0, random_state=1)
lr.fit(X_train_std, y_train)

X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))
plot_decision_regions(X_combined_std, y_combined,
                      classifier=lr)
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()

机器学习--逻辑回归

相关标签: python