【SVM】SVM实现与可视化 using sklearn

程序员文章站 2022-07-09 23:23:23

# -*- coding: utf-8 -*-"""Created on Sat Nov 21 11:20:42 2020@author: Haoqi"""from sklearn.datasets import make_blobsfrom sklearn.svm import SVCimport matplotlib.pyplot as pltimport numpy as npX,y = make_blobs(n_samples = 50,centers =2,random_st....

文章目录

1 基本操作

note

【SVM】SVM实现与可视化 using sklearn

code

# -*- coding: utf-8 -*-
"""
Created on Sat Nov 21 11:20:42 2020
@author: Haoqi
"""
from sklearn.datasets import make_blobs
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np

X,y = make_blobs(n_samples = 50,centers =2,random_state = 0,cluster_std = 0.6)
plt.scatter(X[:,0],X[:,1],c = y,cmap = 'rainbow')
# plt.xticks([])
# plt.yticks([])
def plot_svc_decision_function(model,ax = None):
    if ax is None:
        ax = plt.gca()
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()
    
    x = np.linspace(xlim[0],xlim[1],30)
    y = np.linspace(ylim[0],ylim[1],30)
    
    Y,X = np.meshgrid(y,x)
    xy = np.vstack([X.ravel(),Y.ravel()]).T
    p = model.decision_function(xy).reshape(X.shape)
    
    ax.contour(X,Y,p,colors = 'k',levels = [-1,0,1],alphas = 0.5,linestyles = ['--','-','--'])
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

clf = SVC(kernel = "rbf").fit(X,y) # linear ...
plt.scatter(X[:,0],X[:,1],c = y,s = 50,cmap = 'rainbow')
plot_svc_decision_function(clf)

ans:
【SVM】SVM实现与可视化 using sklearn

2 多种核函数测试比较

note

【SVM】SVM实现与可视化 using sklearn

code

# -*- coding: utf-8 -*-
"""
Created on Sun Nov 22 14:15:53 2020

@author: Haoqi
"""
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import svm
from sklearn.datasets import make_circles,make_moons,make_blobs,make_classification

n_samples = 100

datasets = [
        make_moons(n_samples  = n_samples,noise = 0.2,random_state = 0),
        make_circles(n_samples = n_samples,noise = 0.2,factor = 0.5,random_state = 1),
        make_blobs(n_samples = n_samples,centers = 2,random_state = 5),
        make_classification(n_samples = n_samples,
                            n_features=2,
                            n_informative = 2,
                            n_redundant = 0,
                            random_state = 5)
        ]
Kernels = ["linear","poly","rbf","sigmoid"]
for X,Y in datasets:
    plt.figure(figsize = (5,4))
    plt.scatter(X[:,0],X[:,1],c = Y,s = 50,cmap = 'rainbow')
    
nrows = len(datasets)
ncols = len(Kernels)+1
fig,axes= plt.subplots(nrows,ncols,figsize = (20,16))# 4*4

for ds_cnt,(data_x,data_y) in enumerate(datasets):
    ax = axes[ds_cnt,0]
    if ds_cnt==0:
        ax.set_title('input data')
    ax.scatter(data_x[:,0],data_x[:,1],c = data_y,cmap = plt.cm.Paired,edgecolors = 'k')
    ax.set_xticks([])
    ax.set_yticks([])
    
    for est_idx,kernel in enumerate(Kernels):
        ax = axes[ds_cnt,est_idx+1]
        
        clf = svm.SVC(kernel = kernel,gamma=2).fit(data_x,data_y)
        score = clf.score(data_x,data_y)
        
        # orgin data
        ax.scatter(data_x[:,0],data_x[:,1],c = data_y
        ,zorder= 10
        ,cmap = plt.cm.Paired,edgecolors = 'k')
        
        # plot support vectors
        ax.scatter(clf.support_vectors_[:,0],clf.support_vectors_[:,1],s = 50,
                   facecolor = 'none',
                   zorder = 10,
                   edgecolors= 'k')
        x_min ,x_max = data_x[:,0].min() -0.5,data_x[:,0].max()+0.5
        y_min ,y_max = data_x[:,1].min() -0.5,data_x[:,1].max()+0.5
        
        XX,YY = np.mgrid[x_min:x_max:200j,y_min:y_max:200j]
        
        z = clf.decision_function(np.c_[XX.ravel() ,YY.ravel()]).reshape(XX.shape)
        
        ax.pcolormesh(XX,YY,z>0,cmap = plt.cm.Paired)
        ax.contour(XX,YY,z,colors = ['k','k','k'],
                      linestyles =['--','-','--'],
                      levels = [-1,0,1])
                      
        if ds_cnt == 0:
            ax.set_title(kernel)
        
        ax.text(0.95,0.06,('%.2f'%score).lstrip('0'),
                size = 15,
                bbox = dict(boxstyle = 'round',alpha = 0.8,facecolor = 'white'),
                transform = ax.transAxes,
                horizontalalignment = 'right'
                )
    
plt.tight_layout()
plt.show()

ans:
【SVM】SVM实现与可视化 using sklearn

3 对于不平衡数据的处理

note

对于不平衡数据的处理，使用**class_weight = {a:b}**参数

图例添加

a.collections # 调用等高线中所有线
legend 参数如下

def __init__(self, parent, handles, labels,
                 loc=None,
                 numpoints=None,    # the number of points in the legend line
                 markerscale=None,  # the relative size of legend markers
                                    # vs. original
                 markerfirst=True,  # controls ordering (left-to-right) of
                                    # legend marker and label
                 scatterpoints=None,    # number of scatter points
                 scatteryoffsets=None,
                 prop=None,          # properties for the legend texts
                 fontsize=None,        # keyword to set font size directly

                 # spacing & pad defined as a fraction of the font-size
                 borderpad=None,      # the whitespace inside the legend border
                 labelspacing=None,   # the vertical space between the legend
                                      # entries
                 handlelength=None,   # the length of the legend handles
                 handleheight=None,   # the height of the legend handles
                 handletextpad=None,  # the pad between the legend handle
                                      # and text
                 borderaxespad=None,  # the pad between the axes and legend
                                      # border
                 columnspacing=None,  # spacing between columns

                 ncol=1,     # number of columns
                 mode=None,  # mode for horizontal distribution of columns.
                             # None, "expand"

                 fancybox=None,  # True use a fancy box, false use a rounded
                                 # box, none use rc
                 shadow=None,
                 title=None,  # set a title for the legend

                 framealpha=None,  # set frame alpha
                 edgecolor=None,  # frame patch edgecolor
                 facecolor=None,  # frame patch facecolor

                 bbox_to_anchor=None,  # bbox that the legend will be anchored.
                 bbox_transform=None,  # transform for the bbox
                 frameon=None,  # draw frame
                 handler_map=None,
                 ):

code

# -*- coding: utf-8 -*-
"""
Created on Mon Nov 23 11:43:54 2020

@author: Haoqi
"""
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.datasets import make_blobs

class_1 = 500
class_2 = 50
centers = [[0.0,0.0],[2.0,2.0]]
cluster_std = [1.5,0.5] #create unbalance data
X,y= make_blobs(n_samples = [class_1,class_2],
                centers = centers,
                cluster_std= cluster_std,
                random_state = 0,shuffle = False
                )
plt.scatter(X[:,0],X[:,1],c = y,cmap = 'rainbow',s = 10)


clf = svm.SVC(kernel = 'linear').fit(X,y)
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
a = (xlim[0]-0.5)
b = (xlim[1]+0.5)
c = (ylim[0]-0.5)
d = (ylim[1]+0.5)
XX,YY = np.mgrid[a:b:200j,c:d:200j]
z = clf.decision_function(np.c_[XX.ravel() ,YY.ravel()]).reshape(XX.shape)
ax.pcolormesh(XX,YY,z>0,cmap = plt.cm.Paired)
ax.contour(XX,YY,z,colors = ['k','k','k'],
          linestyles =['--','-','--'],
          levels = [-1,0,1])
plt.scatter(X[:,0],X[:,1],c = y,cmap = 'rainbow',s = 10)
plt.show()
print('----------------------------------------------------------------------')
# 3 do model in each dataset
clf = svm.SVC(kernel = 'linear',C = 1.0)
clf.fit(X,y)

# set class weight
wclf = svm.SVC(kernel = 'linear',class_weight = {1:10})
wclf.fit(X,y)

# do accuracy
print(clf.score(X,y))
print(wclf.score(X,y))

plt.subplots(figsize = (6,5))
plt.scatter(X[:,0],X[:,1],c = y,cmap = 'rainbow',s = 10)
ax = plt.gca()
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
a = (xlim[0]-0.5)
b = (xlim[1]+0.5)
c = (ylim[0]-0.5)
d = (ylim[1]+0.5)
XX,YY = np.mgrid[a:b:200j,c:d:200j]
z_clf = clf.decision_function(np.c_[XX.ravel() ,YY.ravel()]).reshape(XX.shape)

a = ax.contour(XX,YY,z_clf,colors = 'black',levels = [0],alpha = 0.5,linestyles = ['-'])
z_wclf = wclf.decision_function(np.c_[XX.ravel() ,YY.ravel()]).reshape(XX.shape)
b = ax.contour(XX,YY,z_wclf,colors = 'red',levels = [0],alpha = 0.5,linestyles = ['-'])
## legend
plt.legend([a.collections[0],b.collections[0]],['no weighted','weighted'],loc = 'upper right')
plt.show()