机器学习实战Chp13: 利用PCA简化数据
程序员文章站
2022-07-16 18:05:25
...
- 主要参考周志华的《机器学习》,P230
- PCA核心程序
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 29 21:42:05 2018
@author: muli
"""
from numpy import *
#mean(a, axis, dtype, out,keepdims )函数
# 功能:求取均值;
# 设a为m * n矩阵举例:
# axis 不设置值,对 m*n 个数求均值,返回一个实数
# axis = 0:压缩行,对各列求均值,返回 1* n 矩阵
# axis =1 :压缩列,对各行求均值,返回 m *1 矩阵
# 数据格式处理
def loadDataSet(fileName, delim='\t'):
fr = open(fileName)
stringArr = [line.strip().split(delim) for line in fr.readlines()]
datArr = [map(float,line) for line in stringArr]
return mat(datArr)
# 注意:代码中的X和W 与 周志华书上P230的X和W为转置关系
def pca(dataMat, topNfeat=9999999):
# 求取均值
meanVals = mean(dataMat, axis=0)
# 归一化
meanRemoved = dataMat - meanVals #remove mean
# 求方差
covMat = cov(meanRemoved, rowvar=0)
# 特征值、特征向量
eigVals,eigVects = linalg.eig(mat(covMat))
# 特征值排序,从小到大排序
eigValInd = argsort(eigVals) #sort, sort goes smallest to largest
# 取前topNfeat的特征值
eigValInd = eigValInd[:-(topNfeat+1):-1] #cut off unwanted dimensions
# 由特征值取特征向量,构成一个特征向量的矩阵
redEigVects = eigVects[:,eigValInd] #reorganize eig vects largest to smallest
# 映射到一个新的空间
lowDDataMat = meanRemoved * redEigVects#transform data into new dimensions
# 重构后的x值
reconMat = (lowDDataMat * redEigVects.T) + meanVals
# 返回 Z 和~x
return lowDDataMat, reconMat
# 测试模块
if __name__ == "__main__" :
dataMat = loadDataSet('testSet.txt')
lowDMat,reconMat = pca(dataMat,1)
print(shape(lowDMat))
绘图
PCA降维前
# -*- coding: utf-8 -*-
"""
Created on Mon Jul 30 20:03:32 2018
@author: muli
"""
from numpy import *
import matplotlib
import matplotlib.pyplot as plt
n = 1000 #number of points to create
xcord0 = []
ycord0 = []
xcord1 = []
ycord1 = []
markers =[]
colors =[]
fw = open('testSet.txt','w')
for i in range(n):
[r0,r1] = random.standard_normal(2)
fFlyer = r0 + 9.0
tats = 1.0*r1 + fFlyer + 0
xcord0.append(fFlyer)
ycord0.append(tats)
fw.write("%f\t%f\n" % (fFlyer, tats))
fw.close()
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xcord0,ycord0, marker='^', s=90)
plt.xlabel('hours of direct sunlight')
plt.ylabel('liters of water')
plt.show()
- PCA降维后
# -*- coding: utf-8 -*-
"""
Created on Mon Jul 30 20:04:20 2018
@author: muli
"""
from numpy import *
import matplotlib
import matplotlib.pyplot as plt
import PCA_test
dataMat = PCA_test.loadDataSet('testSet.txt')
lowDMat, reconMat = PCA_test.pca(dataMat, 1)
#print(dataMat[:,1])
#print(dataMat[:,1].shape)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(dataMat[:,0].flatten().A[0], dataMat[:,1].flatten().A[0], marker='^', s=90)
ax.scatter(reconMat[:,0].flatten().A[0], reconMat[:,1].flatten().A[0], marker='o', s=50, c='red')
plt.show()
上一篇: pcl中pca主元分析法的简单应用:计算点云主方向
下一篇: 使用python实验pca