欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

python实现逻辑回归

程序员文章站 2022-05-18 18:07:28
1.自定义代码实现import numpy as npimport matplotlib.pyplot as pltimport pandas as pdfrom sklearn.datasets import load_breast_cancerfrom sklearn.model_selection import train_test_splitdef sigmoid(z): s = 1 / (1 + np.exp(-z)) s = s.reshape(s.shape[0...

1.自定义代码实现

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split


def sigmoid(z):
    s = 1 / (1 + np.exp(-z))
    s = s.reshape(s.shape[0], 1)  # s.shape[0]表示求数组的长度
    return s


def draw_sigmoid():
    x = np.arange(-6, 6, .01)  # 返回一个有起点有终点且固定步长的排列,左闭右开
    y = sigmoid(x)

    plt.plot(x, y, color='red', lw=2)
    plt.show()


def model(theta, X):
    z = np.sum(theta.T * X, axis=1)  # 压缩列
    return sigmoid(z)


# 定义损失函数
# h(x)
def cross_entropy(y, y_hat):
    n_samples = y.shape[0]
    return sum(-y * np.log(y_hat) - (1 - y) * np.log(1 - y_hat)) / n_samples


def cost_function(theta, X, y):
    y_hat = model(theta, X)
    return cross_entropy(y, y_hat)


# 梯度下降
def optimize(theta, X, y):
    n = X.shape[0]
    alpha = 1e-1
    y_hat = model(theta, X)
    dtheta = (1.0 / n) * ((y_hat - y) * X)
    dtheta = np.sum(dtheta, axis=0)  # 压缩行
    dtheta = dtheta.reshape((31, 1))
    theta = theta - alpha * dtheta
    return theta


# 对数据进行迭代
def iterate(theta, X, y, times):
    costs = []
    accs = []
    for i in range(times):
        theta = optimize(theta, X, y)
        costs.append(cost_function(theta, X, y))
        accs.append(accuracy(theta, X, y))

    return theta, costs, accs


# 对数据进行评估
def predict_proba(theta, X):
    y_hat = model(theta, X)
    return y_hat


def predict(X, theta):
    y_hat = predict_proba(theta, X)
    y_hard = (y_hat > 0.5) * 1
    return y_hard


def accuracy(theta, X, y):
    y_hard = predict(X, theta)
    count_right = sum(y_hard == y)
    return count_right * 1.0 / len(y)


# 载入数据
dataset = load_breast_cancer()
data = pd.DataFrame(data=dataset.data, columns=dataset.feature_names)
data['cancer'] = [dataset.target_names[t] for t in dataset.target]

# 赋值数据  shape[0] shape[1]代表数据的维度
X = dataset.data
y = dataset.target
n_features = X.shape[1]

std = X.std(axis=0)  # 按照行 竖直方向计算标准差
mean = X.mean(axis=0)  # 按照行 竖直方向计算均值
X_norm = (X - mean) / std  # 标准差标准化,经过处理的数据符合标准正态分布


def add_ones(X):
    ones = np.ones((X.shape[0], 1))
    X_with_ones = np.hstack((ones, X))
    return X_with_ones


X_with_ones = add_ones(X_norm)

X_train, X_test, y_train, y_test = train_test_split(X_with_ones, y, test_size=0.3, random_state=12345)
y_train = y_train.reshape((y_train.shape[0], 1))
y_test = y_test.reshape((y_test.shape[0], 1))

# 应用算法
theta = np.ones((n_features+1,1))
theta, costs, accs = iterate(theta, X_train, y_train, 1500)
plt.plot(costs)    # 画出代价函数
plt.plot(accs)     # 画出准确率变化
plt.show()
print(accuracy(theta, X_test, y_test))

2.库函数调用

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
dataset = load_breast_cancer()
X = dataset.data
y = dataset.target
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=12345)

lr=LogisticRegression()
lr.fit(X_train,y_train)
print(lr.score(X_train,y_train))
print(lr.score(X_test,y_test))

本文地址:https://blog.csdn.net/qq_40690199/article/details/107466555