【SVM】SVM实现与可视化 using sklearn
程序员文章站
2022-03-19 13:34:02
# -*- coding: utf-8 -*-"""Created on Sat Nov 21 11:20:42 2020@author: Haoqi"""from sklearn.datasets import make_blobsfrom sklearn.svm import SVCimport matplotlib.pyplot as pltimport numpy as npX,y = make_blobs(n_samples = 50,centers =2,random_st....
1 基本操作
note
code
# -*- coding: utf-8 -*-
"""
Created on Sat Nov 21 11:20:42 2020
@author: Haoqi
"""
from sklearn.datasets import make_blobs
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np
X,y = make_blobs(n_samples = 50,centers =2,random_state = 0,cluster_std = 0.6)
plt.scatter(X[:,0],X[:,1],c = y,cmap = 'rainbow')
# plt.xticks([])
# plt.yticks([])
def plot_svc_decision_function(model,ax = None):
if ax is None:
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
x = np.linspace(xlim[0],xlim[1],30)
y = np.linspace(ylim[0],ylim[1],30)
Y,X = np.meshgrid(y,x)
xy = np.vstack([X.ravel(),Y.ravel()]).T
p = model.decision_function(xy).reshape(X.shape)
ax.contour(X,Y,p,colors = 'k',levels = [-1,0,1],alphas = 0.5,linestyles = ['--','-','--'])
ax.set_xlim(xlim)
ax.set_ylim(ylim)
clf = SVC(kernel = "rbf").fit(X,y) # linear ...
plt.scatter(X[:,0],X[:,1],c = y,s = 50,cmap = 'rainbow')
plot_svc_decision_function(clf)
ans:
2 多种核函数测试比较
note
code
# -*- coding: utf-8 -*-
"""
Created on Sun Nov 22 14:15:53 2020
@author: Haoqi
"""
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import svm
from sklearn.datasets import make_circles,make_moons,make_blobs,make_classification
n_samples = 100
datasets = [
make_moons(n_samples = n_samples,noise = 0.2,random_state = 0),
make_circles(n_samples = n_samples,noise = 0.2,factor = 0.5,random_state = 1),
make_blobs(n_samples = n_samples,centers = 2,random_state = 5),
make_classification(n_samples = n_samples,
n_features=2,
n_informative = 2,
n_redundant = 0,
random_state = 5)
]
Kernels = ["linear","poly","rbf","sigmoid"]
for X,Y in datasets:
plt.figure(figsize = (5,4))
plt.scatter(X[:,0],X[:,1],c = Y,s = 50,cmap = 'rainbow')
nrows = len(datasets)
ncols = len(Kernels)+1
fig,axes= plt.subplots(nrows,ncols,figsize = (20,16))# 4*4
for ds_cnt,(data_x,data_y) in enumerate(datasets):
ax = axes[ds_cnt,0]
if ds_cnt==0:
ax.set_title('input data')
ax.scatter(data_x[:,0],data_x[:,1],c = data_y,cmap = plt.cm.Paired,edgecolors = 'k')
ax.set_xticks([])
ax.set_yticks([])
for est_idx,kernel in enumerate(Kernels):
ax = axes[ds_cnt,est_idx+1]
clf = svm.SVC(kernel = kernel,gamma=2).fit(data_x,data_y)
score = clf.score(data_x,data_y)
# orgin data
ax.scatter(data_x[:,0],data_x[:,1],c = data_y
,zorder= 10
,cmap = plt.cm.Paired,edgecolors = 'k')
# plot support vectors
ax.scatter(clf.support_vectors_[:,0],clf.support_vectors_[:,1],s = 50,
facecolor = 'none',
zorder = 10,
edgecolors= 'k')
x_min ,x_max = data_x[:,0].min() -0.5,data_x[:,0].max()+0.5
y_min ,y_max = data_x[:,1].min() -0.5,data_x[:,1].max()+0.5
XX,YY = np.mgrid[x_min:x_max:200j,y_min:y_max:200j]
z = clf.decision_function(np.c_[XX.ravel() ,YY.ravel()]).reshape(XX.shape)
ax.pcolormesh(XX,YY,z>0,cmap = plt.cm.Paired)
ax.contour(XX,YY,z,colors = ['k','k','k'],
linestyles =['--','-','--'],
levels = [-1,0,1])
if ds_cnt == 0:
ax.set_title(kernel)
ax.text(0.95,0.06,('%.2f'%score).lstrip('0'),
size = 15,
bbox = dict(boxstyle = 'round',alpha = 0.8,facecolor = 'white'),
transform = ax.transAxes,
horizontalalignment = 'right'
)
plt.tight_layout()
plt.show()
ans:
3 对于不平衡数据的处理
note
对于不平衡数据的处理,使用**class_weight = {a:b}**参数
图例添加
a.collections # 调用等高线中所有线
legend 参数如下
def __init__(self, parent, handles, labels,
loc=None,
numpoints=None, # the number of points in the legend line
markerscale=None, # the relative size of legend markers
# vs. original
markerfirst=True, # controls ordering (left-to-right) of
# legend marker and label
scatterpoints=None, # number of scatter points
scatteryoffsets=None,
prop=None, # properties for the legend texts
fontsize=None, # keyword to set font size directly
# spacing & pad defined as a fraction of the font-size
borderpad=None, # the whitespace inside the legend border
labelspacing=None, # the vertical space between the legend
# entries
handlelength=None, # the length of the legend handles
handleheight=None, # the height of the legend handles
handletextpad=None, # the pad between the legend handle
# and text
borderaxespad=None, # the pad between the axes and legend
# border
columnspacing=None, # spacing between columns
ncol=1, # number of columns
mode=None, # mode for horizontal distribution of columns.
# None, "expand"
fancybox=None, # True use a fancy box, false use a rounded
# box, none use rc
shadow=None,
title=None, # set a title for the legend
framealpha=None, # set frame alpha
edgecolor=None, # frame patch edgecolor
facecolor=None, # frame patch facecolor
bbox_to_anchor=None, # bbox that the legend will be anchored.
bbox_transform=None, # transform for the bbox
frameon=None, # draw frame
handler_map=None,
):
code
# -*- coding: utf-8 -*-
"""
Created on Mon Nov 23 11:43:54 2020
@author: Haoqi
"""
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.datasets import make_blobs
class_1 = 500
class_2 = 50
centers = [[0.0,0.0],[2.0,2.0]]
cluster_std = [1.5,0.5] #create unbalance data
X,y= make_blobs(n_samples = [class_1,class_2],
centers = centers,
cluster_std= cluster_std,
random_state = 0,shuffle = False
)
plt.scatter(X[:,0],X[:,1],c = y,cmap = 'rainbow',s = 10)
clf = svm.SVC(kernel = 'linear').fit(X,y)
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
a = (xlim[0]-0.5)
b = (xlim[1]+0.5)
c = (ylim[0]-0.5)
d = (ylim[1]+0.5)
XX,YY = np.mgrid[a:b:200j,c:d:200j]
z = clf.decision_function(np.c_[XX.ravel() ,YY.ravel()]).reshape(XX.shape)
ax.pcolormesh(XX,YY,z>0,cmap = plt.cm.Paired)
ax.contour(XX,YY,z,colors = ['k','k','k'],
linestyles =['--','-','--'],
levels = [-1,0,1])
plt.scatter(X[:,0],X[:,1],c = y,cmap = 'rainbow',s = 10)
plt.show()
print('----------------------------------------------------------------------')
# 3 do model in each dataset
clf = svm.SVC(kernel = 'linear',C = 1.0)
clf.fit(X,y)
# set class weight
wclf = svm.SVC(kernel = 'linear',class_weight = {1:10})
wclf.fit(X,y)
# do accuracy
print(clf.score(X,y))
print(wclf.score(X,y))
plt.subplots(figsize = (6,5))
plt.scatter(X[:,0],X[:,1],c = y,cmap = 'rainbow',s = 10)
ax = plt.gca()
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
a = (xlim[0]-0.5)
b = (xlim[1]+0.5)
c = (ylim[0]-0.5)
d = (ylim[1]+0.5)
XX,YY = np.mgrid[a:b:200j,c:d:200j]
z_clf = clf.decision_function(np.c_[XX.ravel() ,YY.ravel()]).reshape(XX.shape)
a = ax.contour(XX,YY,z_clf,colors = 'black',levels = [0],alpha = 0.5,linestyles = ['-'])
z_wclf = wclf.decision_function(np.c_[XX.ravel() ,YY.ravel()]).reshape(XX.shape)
b = ax.contour(XX,YY,z_wclf,colors = 'red',levels = [0],alpha = 0.5,linestyles = ['-'])
## legend
plt.legend([a.collections[0],b.collections[0]],['no weighted','weighted'],loc = 'upper right')
plt.show()
ANS:
本文地址:https://blog.csdn.net/baolang5032/article/details/109945898
上一篇: 企业网管必看:企业网站常见问题和解决方法
下一篇: Python实现数据分析