二次回归和线性回归的拟合效果的对比
程序员文章站
2024-03-16 11:06:58
...
二次回归和线性回归的拟合效果的对比
0 导入相关库
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from matplotlib.font_manager import FontProperties
font_set = FontProperties(fname=r"/usr/share/fonts/vista/YaHeiConsolas.ttf", size=20)
1 预处理
1.1 绘图相关参数设置
def runplt():
plt.figure()# 定义figure
plt.title(u’披萨的价格和直径’,fontproperties=font_set)
plt.xlabel(u’直径(inch)’,fontproperties=font_set)
plt.ylabel(u’价格(美元)’,fontproperties=font_set)
plt.axis([0, 25, 0, 25])
plt.grid(True)
return plt
1.2 训练集和测试集数据
X_train = [[6], [8], [10], [14], [18]]
y_train = [[7], [9], [13], [17.5], [18]]
X_test = [[7], [9], [11], [15]]
y_test = [[8], [12], [15], [18]]
1.3 画出横纵坐标以及若干散点图
plt = runplt()
plt.scatter(X_train, y_train,s=40)
2 线性回归
2.1 训练模型&预测
xx = np.linspace(0, 26, 5) # 给出一些点
regressor = LinearRegression() # 创建模型
regressor.fit(X_train, y_train) # 训练
yy = regressor.predict(xx.reshape(xx.shape[0], 1)) #预测
2.2 画出线性回归的曲线
plt = runplt()
plt.scatter(X_train, y_train, s=40, label='orginal')
plt.plot(xx, yy, 'g-', label='linear equation')
plt.legend(loc='upper left')
plt.show()
3 多项式回归(二次回归)
3.1 生成多项式特征
quadratic_featurizer = PolynomialFeatures(degree=2)
X_train_quadratic = quadratic_featurizer.fit_transform(X_train)
regressor_quadratic = LinearRegression()
regressor_quadratic.fit(X_train_quadratic, y_train)
X_train
X_train_quadratic
PolynomialFeatures(degree=2)
3.2 画出多项式回归的曲线
numpy.reshape # 给数组一个新的形状而不改变其数据
xx = np.linspace(0, 26, 5)
print('xx.shape', xx.shape)
print('xx.reshape', xx.reshape(xx.shape[0], 1).shape)
xx_quadratic = quadratic_featurizer.transform(xx.reshape(xx.shape[0], 1))
print('xx_quadratic', xx_quadratic.shape)
xx
xx.reshape(xx.shape[0], 1)
xx_quadratic
plt = runplt()
plt.scatter(X_train, y_train, s=40, label='orginal')
plt.plot(xx, yy, 'g-', label='linear equation')
plt.plot(xx, regressor_quadratic.predict(xx_quadratic), 'r--',label="quadratic equation")
plt.legend(loc='upper left')
plt.show()
4 评估模型
4.1 线性回归在训练集&测试集的表现
plt = runplt()
plt.scatter(X_train, y_train, s=40, label='orginal')
plt.plot(xx, yy, 'g-', label='linear equation')
plt.scatter(X_test, y_test, c='r', s=100, label='test')
plt.legend(loc='upper left')
plt.show()
4.2 二次回归在训练集&测试集的表现
plt = runplt()
plt.scatter(X_train, y_train, s=40, label='orginal')
plt.plot(xx, regressor_quadratic.predict(xx_quadratic), 'g--',label="quadratic equation")
plt.scatter(X_test, y_test, c='r', s=100, label='test')
plt.legend(loc='upper left')
plt.show()
4.3 r-squared
X_test_quadratic = quadratic_featurizer.transform(X_test)
print('linear equation r-squared', regressor.score(X_test, y_test))
print('quadratic equation r-squared', regressor_quadratic.score(X_test_quadratic, y_test))
import pandas as pd
vs = pd.DataFrame({'r-squared': pd.Series(['linear equation', 'quadratic equation']),
'score': pd.Series([regressor.score(X_test, y_test), regressor_quadratic.score(X_test_quadratic, y_test)])})
vs
vs.set_index('r-squared', inplace=True)
vs.plot.barh()
上一篇: AJAX入门?有这一篇就够了!