python实现逻辑回归
程序员文章站
2022-12-20 18:18:06
1.自定义代码实现import numpy as npimport matplotlib.pyplot as pltimport pandas as pdfrom sklearn.datasets import load_breast_cancerfrom sklearn.model_selection import train_test_splitdef sigmoid(z): s = 1 / (1 + np.exp(-z)) s = s.reshape(s.shape[0...
1.自定义代码实现
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
def sigmoid(z):
s = 1 / (1 + np.exp(-z))
s = s.reshape(s.shape[0], 1) # s.shape[0]表示求数组的长度
return s
def draw_sigmoid():
x = np.arange(-6, 6, .01) # 返回一个有起点有终点且固定步长的排列,左闭右开
y = sigmoid(x)
plt.plot(x, y, color='red', lw=2)
plt.show()
def model(theta, X):
z = np.sum(theta.T * X, axis=1) # 压缩列
return sigmoid(z)
# 定义损失函数
# h(x)
def cross_entropy(y, y_hat):
n_samples = y.shape[0]
return sum(-y * np.log(y_hat) - (1 - y) * np.log(1 - y_hat)) / n_samples
def cost_function(theta, X, y):
y_hat = model(theta, X)
return cross_entropy(y, y_hat)
# 梯度下降
def optimize(theta, X, y):
n = X.shape[0]
alpha = 1e-1
y_hat = model(theta, X)
dtheta = (1.0 / n) * ((y_hat - y) * X)
dtheta = np.sum(dtheta, axis=0) # 压缩行
dtheta = dtheta.reshape((31, 1))
theta = theta - alpha * dtheta
return theta
# 对数据进行迭代
def iterate(theta, X, y, times):
costs = []
accs = []
for i in range(times):
theta = optimize(theta, X, y)
costs.append(cost_function(theta, X, y))
accs.append(accuracy(theta, X, y))
return theta, costs, accs
# 对数据进行评估
def predict_proba(theta, X):
y_hat = model(theta, X)
return y_hat
def predict(X, theta):
y_hat = predict_proba(theta, X)
y_hard = (y_hat > 0.5) * 1
return y_hard
def accuracy(theta, X, y):
y_hard = predict(X, theta)
count_right = sum(y_hard == y)
return count_right * 1.0 / len(y)
# 载入数据
dataset = load_breast_cancer()
data = pd.DataFrame(data=dataset.data, columns=dataset.feature_names)
data['cancer'] = [dataset.target_names[t] for t in dataset.target]
# 赋值数据 shape[0] shape[1]代表数据的维度
X = dataset.data
y = dataset.target
n_features = X.shape[1]
std = X.std(axis=0) # 按照行 竖直方向计算标准差
mean = X.mean(axis=0) # 按照行 竖直方向计算均值
X_norm = (X - mean) / std # 标准差标准化,经过处理的数据符合标准正态分布
def add_ones(X):
ones = np.ones((X.shape[0], 1))
X_with_ones = np.hstack((ones, X))
return X_with_ones
X_with_ones = add_ones(X_norm)
X_train, X_test, y_train, y_test = train_test_split(X_with_ones, y, test_size=0.3, random_state=12345)
y_train = y_train.reshape((y_train.shape[0], 1))
y_test = y_test.reshape((y_test.shape[0], 1))
# 应用算法
theta = np.ones((n_features+1,1))
theta, costs, accs = iterate(theta, X_train, y_train, 1500)
plt.plot(costs) # 画出代价函数
plt.plot(accs) # 画出准确率变化
plt.show()
print(accuracy(theta, X_test, y_test))
2.库函数调用
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
dataset = load_breast_cancer()
X = dataset.data
y = dataset.target
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=12345)
lr=LogisticRegression()
lr.fit(X_train,y_train)
print(lr.score(X_train,y_train))
print(lr.score(X_test,y_test))
本文地址:https://blog.csdn.net/qq_40690199/article/details/107466555
下一篇: cdr怎么设计海蓝色的水晶挂件?