逻辑回归

程序员文章站 2023-12-27 10:22:15

...

1. 读取数据

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

df_X = pd.read_csv('./logistic_x.txt', sep='\ +',header=None, engine='python') #读取X值
ys = pd.read_csv('./logistic_y.txt', sep='\ +',header=None, engine='python') #读取y值
ys = ys.astype(int)
df_X['label'] = ys[0].values #将X按照y值的结果一一打标签

ax = plt.axes()
#在二维图中描绘X点所处位置，直观查看数据点的分布情况
df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue')
df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red')

逻辑回归

#提取用于学习的数据
Xs = df_X[[0, 1]].values
ys = df_X['label'].values

2. 代码实现

2.1 代码

import numpy as np
from sklearn.metrics import accuracy_score

class LogisticRegression:

    def __init__(self):
        self.coef_ = None
        self.intercept_ = None
        self._theta = None

    def _sigmoid(self, t):
        return 1. / (1. + np.exp(-t))

    def fit(self, X_train, y_train, eta=0.01, n_iters=1e4):
        assert X_train.shape[0] == y_train.shape[0], \
            "the size of X_train must be equal to the size of y_train"

        def J(theta, X_b, y):
            y_hat = self._sigmoid(X_b.dot(theta))
            try:
                return - np.sum(y*np.log(y_hat) + (1-y)*np.log(1-y_hat)) / len(y)
            except:
                return float('inf')

        def dJ(theta, X_b, y):
            return X_b.T.dot(self._sigmoid(X_b.dot(theta)) - y) / len(y)

        def gradient_descent(X_b, y, initial_theta, eta, n_iters=1e4, epsilon=1e-8):

            theta = initial_theta
            cur_iter = 0

            while cur_iter < n_iters:
                gradient = dJ(theta, X_b, y)
                last_theta = theta
                theta = theta - eta * gradient
                if (abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon):
                    break

                cur_iter += 1

            return theta

        X_b = np.hstack([np.ones((len(X_train), 1)), X_train])
        initial_theta = np.zeros(X_b.shape[1])
        self._theta = gradient_descent(X_b, y_train, initial_theta, eta, n_iters)

        self.intercept_ = self._theta[0]
        self.coef_ = self._theta[1:]

        return self

    def predict_proba(self, X_predict):
        assert self.intercept_ is not None and self.coef_ is not None, \
            "must fit before predict!"
        assert X_predict.shape[1] == len(self.coef_), \
            "the feature number of X_predict must be equal to X_train"

        X_b = np.hstack([np.ones((len(X_predict), 1)), X_predict])
        return self._sigmoid(X_b.dot(self._theta))

    def predict(self, X_predict):
        assert self.intercept_ is not None and self.coef_ is not None, \
            "must fit before predict!"
        assert X_predict.shape[1] == len(self.coef_), \
            "the feature number of X_predict must be equal to X_train"

        proba = self.predict_proba(X_predict)
        return np.array(proba >= 0.5, dtype='int')

    def score(self, X_test, y_test):
        y_predict = self.predict(X_test)
        return accuracy_score(y_test, y_predict)

logistic_regression = LogisticRegression() #因为前面已经将截距项的值合并到变量中，此处参数设置不需要截距项
logistic_regression.fit(Xs, ys) #拟合
score = logistic_regression.score(Xs, ys) #结果评价
print("Coefficient: %s" % logistic_regression.coef_)
print("Score: %s" % score)

逻辑回归

def x2(x1):
    return (-logistic_regression.coef_[0] * x1 - logistic_regression.intercept_) / logistic_regression.coef_[1]

x1_plot = np.linspace(0, 8, 1000)
x2_plot = x2(x1_plot)

ax = plt.axes()

df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue')
df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red')

plt.plot(x1_plot, x2_plot)
plt.show()

逻辑回归

2.2 sklearn对比

Xs = df_X[[0, 1]].values
Xs = np.hstack([np.ones((Xs.shape[0], 1)), Xs]) 
ys = df_X['label'].values

from __future__ import print_function
import numpy as np
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(fit_intercept=False) #因为前面已经将截距项的值合并到变量中，此处参数设置不需要截距项
lr.fit(Xs, ys) #拟合
score = lr.score(Xs, ys) #结果评价
print("Coefficient: %s" % lr.coef_)
print("Score: %s" % score)

逻辑回归

ax = plt.axes()

df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue')
df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red')

_xs = np.array([np.min(Xs[:,1]), np.max(Xs[:,1])])

#将数据以二维图形式描点，并用学习得出的参数结果作为阈值，划分数据区域
_ys = (lr.coef_[0][0] + lr.coef_[0][1] * _xs) / (- lr.coef_[0][2])
plt.plot(_xs, _ys, lw=1)

逻辑回归
将自己实现的代码和sklearn进行对比（准确率和绘制图），结果相差无几。

逻辑回归

1. 读取数据

2. 代码实现

2.1 代码

2.2 sklearn对比

逻辑回归

python评价回归模型指标：决定系数R2，相关系数R，均方误差MSE，均方根误差RMSE

JS交互逻辑详解——data数据变更时同步（setData 函数） & 视图层更新是异步

JS交互逻辑——获取页面数据{{}}和全局数据const app = getApp()

Task01：线性回归

sklearn实现多项式回归

Python：逻辑判断与运算符实例

sklearn之逻辑回归

诺基亚7.2新渲染图曝光：超薄设计、水滴屏、奥利奥镜头设计回归

用 PHP 实现的简单线性回归：(一)