scikitlearn之 LDA,QDA
程序员文章站
2022-07-14 21:37:40
...
1.2. Linear and Quadratic Discriminant Analysis
Linear and Quadratic Discriminant Analysis with covariance ellipsoid
from scipy import linalg
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import colors
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
cmap = colors.LinearSegmentedColormap('red_blue_classes',
{'red':[(0,1,1),
(1,0.7,0.7)],
'green':[(0,0.7,0.7),
(1,0.7,0.7)],
'blue':[(0,0.7,0.7),
(1,1,1)]
})
plt.cm.register_cmap(cmap=cmap)
aa = [(0, 1, 1), (1, 0.7, 0.7)]#一个列表
aa1= [[0, 1, 1], [1, 0.7, 0.7]]#也是一个列表
bb = np.array([[0, 1, 1], [1, 0.7, 0.7]])#一个数组
cc = np.mat([[0, 1, 1], [1, 0.7, 0.7]])#一个矩阵
cc2 = cc*2
np.dot(cc,cc2.transpose())==cc*cc2.transpose()#都是矩阵相乘
def dataset_fix_cov():
#生成两个协方差矩阵相同的数据集
n,dim = 300,2
np.random.seed(0)
C = np.array([[0,-0.23],[0.83,0.23]])
X = np.r_[np.dot(np.random.randn(n,dim),C),
np.dot(np.random.randn(n, dim), C)+np.array([1,1])]
#np.dot 数组小于等于二维的时候执行矩阵的乘法,等价于*
Y = np.hstack((np.zeros(n),np.ones(n)))
return X,Y
def dataset_cov():
#生成两个个方差不相等的数据集
n, dim = 300, 2
np.random.seed(0)
C = np.array([[0., -1.], [2.5, .7]]) * 2.#这就是设置方差的
X = np.r_[np.dot(np.random.randn(n, dim), C),
np.dot(np.random.randn(n, dim), C.T) + np.array([1, 4])]
Y = np.hstack((np.zeros(n), np.ones(n)))
return X, Y
def plot_data(lda,X,Y,Y_pred,fig_index):
splot = plt.subplot(2,2,fig_index)
if fig_index==1:
plt.title('Linear Discriminant Analysis')
plt.ylabel('Data with\n fixed covariance')
elif fig_index==2:
plt.title('Quadratic Discriminant Analysis')
elif fig_index==3:
plt.ylabel('Data with\n varying covariance')
tp = (Y==Y_pred)#找到预测成功的样本点
tp0,tp1 = tp[Y==0],tp[Y==1]#将预测成功的样本点根据y分类,得到的是索引
X0,X1= X[Y==0],X[Y==1]#将所有样本点按照y分类
X0_tp,X0_fp = X0[tp0],X0[~tp0]
#得到预测成功的y=0样本点的X值和预测失败的样本点的X值
X1_tp,X1_fp = X1[tp1],X1[~tp1]
alpha = 0.5
plt.plot(X0_tp[:,0],X0_tp[:,1],'o',alpha=alpha,color='red',
markeredgecolor='k')#y=0预测成功的点
plt.plot(X0_fp[:,0],X0_fp[:,1],'*',alpha=alpha,color='#990000',
markeredgecolor = 'k')#y=0预测失败的点
plt.plot(X1_tp[:,0],X1_tp[:,1],'o',alpha=alpha,color='blue',
markeredgecolor='k')#y=1预测成功的点
plt.plot(X1_fp[:,0],X1_fp[:,1],'*',alpha=alpha,color='#000099',
markeredgecolor = 'k')#y=1预测失败的点
nx,ny = 200,100
x_min,x_max = plt.xlim()
y_min,y_max = plt.ylim()
xx,yy = np.meshgrid(np.linspace(x_min,x_max,nx),
np.linspace(y_min,y_max,ny))
z = lda.predict_proba(np.c_[xx.ravel(),yy.ravel()])
z = z[:,1].reshape(xx.shape)
plt.pcolormesh(xx,yy,z,cmap='red_blue_classes',
norm=colors.Normalize(0,1))
plt.contour(xx,yy,z,[0.5],linewidths = 2,colors='k')
plt.plot(lda.means_[0][0], lda.means_[0][1],
'o', color='black', markersize=10, markeredgecolor='k')
plt.plot(lda.means_[1][0], lda.means_[1][1],
'o', color='black', markersize=10, markeredgecolor='k')
return splot
def plot_ellipse(splot, mean, cov, color):
v, w = linalg.eigh(cov)
u = w[0] / linalg.norm(w[0])
angle = np.arctan(u[1] / u[0])
angle = 180 * angle / np.pi # convert to degrees
# filled Gaussian at 2 standard deviation
ell = mpl.patches.Ellipse(mean, 2 * v[0] ** 0.5, 2 * v[1] ** 0.5,
180 + angle, facecolor=color,
edgecolor='yellow',
linewidth=2, zorder=2)
ell.set_clip_box(splot.bbox)
ell.set_alpha(0.5)
splot.add_artist(ell)
splot.set_xticks(())
splot.set_yticks(())
def plot_lda_cov(lda, splot):
plot_ellipse(splot, lda.means_[0], lda.covariance_, 'red')
plot_ellipse(splot, lda.means_[1], lda.covariance_, 'blue')
def plot_qda_cov(qda, splot):
plot_ellipse(splot, qda.means_[0], qda.covariance_[0], 'red')
plot_ellipse(splot, qda.means_[1], qda.covariance_[1], 'blue')
for i,(X,Y) in enumerate([dataset_fix_cov(),dataset_cov()]):
#1,固定方差的样本;2,不同方差的样本
lda = LinearDiscriminantAnalysis(solver='svd',store_covariance=True)
Y_pred = lda.fit(X,Y).predict(X)
splot = plot_data(lda,X,Y,Y_pred,fig_index=2*i+1)
plot_lda_cov(lda,splot)
plt.axis('tight')
qda = QuadraticDiscriminantAnalysis(store_covariance=True)
Y_pred = qda.fit(X,Y).predict(X)
splot = plot_data(qda,X,Y,Y_pred,fig_index=2*i+2)
plot_qda_cov(qda,splot)
plt.axis('tight')
plt.suptitle('Linear Discriminant Analysis vs Quadratic Discriminant'
'Analysis')
plt.show()
Comparison of LDA and PCA 2D projection of Iris dataset
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
iris = datasets.load_iris()
x = iris.data
y = iris.target
target_names = iris.target_names
pca = PCA(n_components=2)#两个主成分
x_r = pca.fit(x).transform(x)#PCA不需要y值,只用X的值
lda = LinearDiscriminantAnalysis(n_components=2)
x_r2 = lda.fit(x,y).transform(x)#注意LDA需要输入y值
print('explained variance ratio (first two components): %s'
% str(pca.explained_variance_ratio_))
plt.figure()
colors = ['navy', 'turquoise', 'darkorange']
lw = 2
for color,i,target_name in zip(colors,[0,1,2],target_names):
plt.scatter(x_r[y==i,0],x_r[y==i,1],c = color,alpha=0.5,linewidths=lw,label = target_name)
plt.legend(loc = 'best',shadow = False,scatterpoints = 1)
plt.title('PCA of IRIS dataset')
for color,i,target_name in zip(colors,[0,1,2],target_names):
plt.scatter(x_r2[y==i,0],x_r2[y==i,1],c = color,alpha=0.5,linewidths=lw,label = target_name)
plt.legend(loc = 'best',shadow = False,scatterpoints = 1)
#找到最好的图例位置,图例阴影为false,图例中点数量为1(具体这个参数我也不太清楚。。
#把1换成200以后,变成了一条线)
plt.title('LDA of IRIS dataset')
Normal and Shrinkage Linear Discriminant Analysis for classification
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
n_train = 20
n_test = 200
n_average = 50
n_features_max = 75
step = 4
n_samples = 11
def generate_data(n_samples,n_features):
x,y = make_blobs(n_samples = n_samples,n_features=1,centers=[[-2],[2]])
if n_features>1:
x = np.hstack([x,np.random.randn(n_samples,n_features-1)])
return x,y
acc_clf1,acc_clf2 = [],[]
n_features_range = range(1,n_features_max,step)
for n_features in n_features_range:
score_clf1,score_clf2 = 0,0
for _ in range(n_average):
x,y = generate_data(n_train,n_features)
clf1 = LinearDiscriminantAnalysis(solver='lsqr',shrinkage='auto').fit(x,y)
clf2 = LinearDiscriminantAnalysis(solver='lsqr',shrinkage=None).fit(x,y)
x,y = generate_data(n_test,n_features)
score_clf1 += clf1.score(x,y)
score_clf2 += clf2.score(x,y)
acc_clf1.append(score_clf1/n_average)
acc_clf2.append(score_clf2/n_average)
feature_samples_ratio = np.array(n_features_range)/n_train
plt.plot(feature_samples_ratio, acc_clf1, linewidth=2,
label="Linear Discriminant Analysis with shrinkage", color='navy')
plt.plot(feature_samples_ratio, acc_clf2, linewidth=2,
label="Linear Discriminant Analysis", color='gold')
plt.xlabel('n_features / n_samples')
plt.ylabel('Classification accuracy')
plt.legend(loc=1, prop={'size': 12})
plt.suptitle('Linear Discriminant Analysis vs. \
shrinkage Linear Discriminant Analysis (1 discriminative feature)')
plt.show()
上一篇: Java第四次作业
下一篇: 计算机视觉中的深度学习5: 神经网络