机器学习--逻辑回归
程序员文章站
2022-05-02 16:32:31
...
机器学习–逻辑回归
逻辑回归
解决线性二元分类的算法
Python实现逻辑回归分类算法
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
class LogisticRegressionGD(object):
def __init__(self, eta=0.05, n_iter=100, random_state=1):
"""
:param eta: 学习率
:param n_iter: 训练次数
:param random_state: 随机种子
"""
self.eta = eta
self.n_iter = n_iter
self.random_state = random_state
def fit(self, X, y):
"""
:param X: 数据
:param y: 分类标签
:return:
"""
rgen = np.random.RandomState(self.random_state)
self.w_ = rgen.normal(loc=0.0, scale=0.01, size=1 + X.shape[1]) # 初始化权重
self.cost_ = []
for i in range(self.n_iter):
net_input = self.net_input(X)
output = self.activation(net_input)
errors = (y - output)
self.w_[1:] += self.eta * X.T.dot(errors) # 更新权重
self.w_[0] += self.eta * errors.sum()
cost = -y.dot(np.log(output)) - ((1 - y).dot(np.log(1 - output))) # 代价函数
self.cost_.append(cost)
return self
def net_input(self, X):
"""净输入函数,输入值和权重点积"""
return np.dot(X, self.w_[1:]) + self.w_[0]
def activation(self, z):
"""sigmoid**函数"""
return 1. / (1. + np.exp(-np.clip(z, -250, 250)))
def predict(self, X):
"""控制函数,返回类标签"""
return np.where(self.net_input(X) >= 0.0, 1, 0)
Python实现逻辑回归分类算法,随机梯度下降
class LogisticRegressionSGD(object):
def __init__(self, eta=0.01, n_iter=10, shuffle=True, random_state=None):
"""
:param eta: 学习率
:param n_iter: 训练次数
:param shuffle: 随机洗牌训练数据
:param random_state: 随机种子
"""
self.eta = eta
self.n_iter = n_iter
self.w_initialized = False
self.shuffle = shuffle
self.random_state = random_state
def fit(self, X, y):
"""
:param X: 数据
:param y: 分类标签
:return:
"""
self._initialize_weights(X.shape[1])
self.cost_ = []
for i in range(self.n_iter):
if self.shuffle:
X, y = self._shuffle(X, y)
cost = []
for xi, target in zip(X, y):
cost.append(self._update_weights(xi, target))
avg_cost = sum(cost) / len(y) # 计算平均成本
self.cost_.append(avg_cost)
return self
def partial_fit(self, X, y):
"""流式数据在线学习,不需要重新初始化权重"""
if not self.w_initialized:
self._initialize_weights(X.shape[1])
if y.ravel().shape[0] > 1:
for xi, target in zip(X, y):
self._update_weights(xi, target)
else:
self._update_weights(X, y)
return self
def _shuffle(self, X, y):
"""随机洗牌训练数据"""
r = self.rgen.permutation(len(y))
return X[r], y[r]
def _initialize_weights(self, m):
"""初始化权重"""
self.rgen = np.random.RandomState(self.random_state)
self.w_ = self.rgen.normal(loc=0.0, scale=0.01, size=1 + m)
self.w_initialized = True
def _update_weights(self, xi, target):
"""更新权重"""
output = self.activation(self.net_input(xi))
error = (target - output)
self.w_[1:] += self.eta * xi.dot(error)
self.w_[0] += self.eta * error
cost = -target*(np.log(output)) - ((1 - target)*(np.log(1 - output))) # 代价函数
return cost
def net_input(self, X):
"""净输入函数,输入值和权重点积"""
return np.dot(X, self.w_[1:]) + self.w_[0]
def activation(self, z):
"""sigmoid**函数"""
return 1. / (1. + np.exp(-np.clip(z, -250, 250)))
def predict(self, X):
"""控制函数,返回类标签"""
return np.where(self.net_input(X) >= 0.0, 1, 0)
准备数据
iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)
分类效果
X_train_01_subset = X_train[(y_train == 0) | (y_train == 1)]
y_train_01_subset = y_train[(y_train == 0) | (y_train == 1)]
lrgd = LogisticRegressionSGD(eta=0.05, n_iter=1000, random_state=1)
lrgd.fit(X_train_01_subset,
y_train_01_subset)
plot_decision_regions(X=X_train_01_subset,
y=y_train_01_subset,
classifier=lrgd)
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()
scikit-learn训练逻辑回归模型
lr = LogisticRegression(C=100.0, random_state=1)
lr.fit(X_train_std, y_train)
X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))
plot_decision_regions(X_combined_std, y_combined,
classifier=lr)
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()
上一篇: 024 UNIX再学习 -- 进程关系
下一篇: CUDA编程四(评估CUDA程序的好坏)