Python SVM(支持向量机)实现方法完整示例

程序员文章站 2022-06-01 17:42:14

本文实例讲述了python svm(支持向量机)实现方法。分享给大家供大家参考，具体如下：运行环境 pyhton3 numpy(科学计算包) ma...

本文实例讲述了python svm(支持向量机)实现方法。分享给大家供大家参考，具体如下：

运行环境

pyhton3
numpy(科学计算包)
matplotlib(画图所需，不画图可不必)

计算过程

st=>start: 开始
e=>end: 结束
op1=>operation: 读入数据
op2=>operation: 格式化数据
cond=>condition: 是否达到迭代次数
op3=>operation: 寻找超平面分割最小间隔
ccond=>conditon: 数据是否改变
op4=>operation: 输出结果
st->op1->op2->cond
cond(yes)->op4->e
cond(no)->op3

啊，这markdown flow好难用，我决定就画到这吧=。=

输入样例

/* testset.txt */
3.542485 1.977398 -1
3.018896 2.556416 -1
7.551510 -1.580030 1
2.114999 -0.004466 -1
8.127113 1.274372 1
7.108772 -0.986906 1
8.610639 2.046708 1
2.326297 0.265213 -1
3.634009 1.730537 -1
0.341367 -0.894998 -1
3.125951 0.293251 -1
2.123252 -0.783563 -1
0.887835 -2.797792 -1
7.139979 -2.329896 1
1.696414 -1.212496 -1
8.117032 0.623493 1
8.497162 -0.266649 1
4.658191 3.507396 -1
8.197181 1.545132 1
1.208047 0.213100 -1
1.928486 -0.321870 -1
2.175808 -0.014527 -1
7.886608 0.461755 1
3.223038 -0.552392 -1
3.628502 2.190585 -1
7.407860 -0.121961 1
7.286357 0.251077 1
2.301095 -0.533988 -1
-0.232542 -0.547690 -1
3.457096 -0.082216 -1
3.023938 -0.057392 -1
8.015003 0.885325 1
8.991748 0.923154 1
7.916831 -1.781735 1
7.616862 -0.217958 1
2.450939 0.744967 -1
7.270337 -2.507834 1
1.749721 -0.961902 -1
1.803111 -0.176349 -1
8.804461 3.044301 1
1.231257 -0.568573 -1
2.074915 1.410550 -1
-0.743036 -1.736103 -1
3.536555 3.964960 -1
8.410143 0.025606 1
7.382988 -0.478764 1
6.960661 -0.245353 1
8.234460 0.701868 1
8.168618 -0.903835 1
1.534187 -0.622492 -1
9.229518 2.066088 1
7.886242 0.191813 1
2.893743 -1.643468 -1
1.870457 -1.040420 -1
5.286862 -2.358286 1
6.080573 0.418886 1
2.544314 1.714165 -1
6.016004 -3.753712 1
0.926310 -0.564359 -1
0.870296 -0.109952 -1
2.369345 1.375695 -1
1.363782 -0.254082 -1
7.279460 -0.189572 1
1.896005 0.515080 -1
8.102154 -0.603875 1
2.529893 0.662657 -1
1.963874 -0.365233 -1
8.132048 0.785914 1
8.245938 0.372366 1
6.543888 0.433164 1
-0.236713 -5.766721 -1
8.112593 0.295839 1
9.803425 1.495167 1
1.497407 -0.552916 -1
1.336267 -1.632889 -1
9.205805 -0.586480 1
1.966279 -1.840439 -1
8.398012 1.584918 1
7.239953 -1.764292 1
7.556201 0.241185 1
9.015509 0.345019 1
8.266085 -0.230977 1
8.545620 2.788799 1
9.295969 1.346332 1
2.404234 0.570278 -1
2.037772 0.021919 -1
1.727631 -0.453143 -1
1.979395 -0.050773 -1
8.092288 -1.372433 1
1.667645 0.239204 -1
9.854303 1.365116 1
7.921057 -1.327587 1
8.500757 1.492372 1
1.339746 -0.291183 -1
3.107511 0.758367 -1
2.609525 0.902979 -1
3.263585 1.367898 -1
2.912122 -0.202359 -1
1.731786 0.589096 -1
2.387003 1.573131 -1

代码实现

# -*- coding:utf-8 -*-
#!python3
__author__ = 'wsine'
from numpy import *
import matplotlib.pyplot as plt
import operator
import time
def loaddataset(filename):
  datamat = []
  labelmat = []
  with open(filename) as fr:
    for line in fr.readlines():
      linearr = line.strip().split('\t')
      datamat.append([float(linearr[0]), float(linearr[1])])
      labelmat.append(float(linearr[2]))
  return datamat, labelmat
def selectjrand(i, m):
  j = i
  while (j == i):
    j = int(random.uniform(0, m))
  return j
def clipalpha(aj, h, l):
  if aj > h:
    aj = h
  if l > aj:
    aj = l
  return aj
class optstruct:
  def __init__(self, datamatin, classlabels, c, toler):
    self.x = datamatin
    self.labelmat = classlabels
    self.c = c
    self.tol = toler
    self.m = shape(datamatin)[0]
    self.alphas = mat(zeros((self.m, 1)))
    self.b = 0
    self.ecache = mat(zeros((self.m, 2)))
def calcek(os, k):
  fxk = float(multiply(os.alphas, os.labelmat).t * (os.x * os.x[k, :].t)) + os.b
  ek = fxk - float(os.labelmat[k])
  return ek
def selectj(i, os, ei):
  maxk = -1
  maxdeltae = 0
  ej = 0
  os.ecache[i] = [1, ei]
  validecachelist = nonzero(os.ecache[:, 0].a)[0]
  if (len(validecachelist)) > 1:
    for k in validecachelist:
      if k == i:
        continue
      ek = calcek(os, k)
      deltae = abs(ei - ek)
      if (deltae > maxdeltae):
        maxk = k
        maxdeltae = deltae
        ej = ek
    return maxk, ej
  else:
    j = selectjrand(i, os.m)
    ej = calcek(os, j)
  return j, ej
def updateek(os, k):
  ek = calcek(os, k)
  os.ecache[k] = [1, ek]
def innerl(i, os):
  ei = calcek(os, i)
  if ((os.labelmat[i] * ei < -os.tol) and (os.alphas[i] < os.c)) or ((os.labelmat[i] * ei > os.tol) and (os.alphas[i] > 0)):
    j, ej = selectj(i, os, ei)
    alphaiold = os.alphas[i].copy()
    alphajold = os.alphas[j].copy()
    if (os.labelmat[i] != os.labelmat[j]):
      l = max(0, os.alphas[j] - os.alphas[i])
      h = min(os.c, os.c + os.alphas[j] - os.alphas[i])
    else:
      l = max(0, os.alphas[j] + os.alphas[i] - os.c)
      h = min(os.c, os.alphas[j] + os.alphas[i])
    if (l == h):
      # print("l == h")
      return 0
    eta = 2.0 * os.x[i, :] * os.x[j, :].t - os.x[i, :] * os.x[i, :].t - os.x[j, :] * os.x[j, :].t
    if eta >= 0:
      # print("eta >= 0")
      return 0
    os.alphas[j] -= os.labelmat[j] * (ei - ej) / eta
    os.alphas[j] = clipalpha(os.alphas[j], h, l)
    updateek(os, j)
    if (abs(os.alphas[j] - alphajold) < 0.00001):
      # print("j not moving enough")
      return 0
    os.alphas[i] += os.labelmat[j] * os.labelmat[i] * (alphajold - os.alphas[j])
    updateek(os, i)
    b1 = os.b - ei - os.labelmat[i] * (os.alphas[i] - alphaiold) * os.x[i, :] * os.x[i, :].t - os.labelmat[j] * (os.alphas[j] - alphajold) * os.x[i, :] * os.x[j, :].t
    b2 = os.b - ei - os.labelmat[i] * (os.alphas[i] - alphaiold) * os.x[i, :] * os.x[j, :].t - os.labelmat[j] * (os.alphas[j] - alphajold) * os.x[j, :] * os.x[j, :].t
    if (0 < os.alphas[i]) and (os.c > os.alphas[i]):
      os.b = b1
    elif (0 < os.alphas[j]) and (os.c > os.alphas[j]):
      os.b = b2
    else:
      os.b = (b1 + b2) / 2.0
    return 1
  else:
    return 0
def smop(datamatin, classlabels, c, toler, maxiter, ktup=('lin', 0)):
  """
  输入：数据集, 类别标签, 常数c, 容错率, 最大循环次数
  输出：目标b, 参数alphas
  """
  os = optstruct(mat(datamatin), mat(classlabels).transpose(), c, toler)
  iterr = 0
  entireset = true
  alphapairschanged = 0
  while (iterr < maxiter) and ((alphapairschanged > 0) or (entireset)):
    alphapairschanged = 0
    if entireset:
      for i in range(os.m):
        alphapairschanged += innerl(i, os)
      # print("fullset, iter: %d i:%d, pairs changed %d" % (iterr, i, alphapairschanged))
      iterr += 1
    else:
      nonboundis = nonzero((os.alphas.a > 0) * (os.alphas.a < c))[0]
      for i in nonboundis:
        alphapairschanged += innerl(i, os)
        # print("non-bound, iter: %d i:%d, pairs changed %d" % (iterr, i, alphapairschanged))
      iterr += 1
    if entireset:
      entireset = false
    elif (alphapairschanged == 0):
      entireset = true
    # print("iteration number: %d" % iterr)
  return os.b, os.alphas
def calcws(alphas, dataarr, classlabels):
  """
  输入：alphas, 数据集, 类别标签
  输出：目标w
  """
  x = mat(dataarr)
  labelmat = mat(classlabels).transpose()
  m, n = shape(x)
  w = zeros((n, 1))
  for i in range(m):
    w += multiply(alphas[i] * labelmat[i], x[i, :].t)
  return w
def plotfeature(datamat, labelmat, weights, b):
  dataarr = array(datamat)
  n = shape(dataarr)[0]
  xcord1 = []; ycord1 = []
  xcord2 = []; ycord2 = []
  for i in range(n):
    if int(labelmat[i]) == 1:
      xcord1.append(dataarr[i, 0])
      ycord1.append(dataarr[i, 1])
    else:
      xcord2.append(dataarr[i, 0])
      ycord2.append(dataarr[i, 1])
  fig = plt.figure()
  ax = fig.add_subplot(111)
  ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
  ax.scatter(xcord2, ycord2, s=30, c='green')
  x = arange(2, 7.0, 0.1)
  y = (-b[0, 0] * x) - 10 / linalg.norm(weights)
  ax.plot(x, y)
  plt.xlabel('x1'); plt.ylabel('x2')
  plt.show()
def main():
  traindataset, trainlabel = loaddataset('testset.txt')
  b, alphas = smop(traindataset, trainlabel, 0.6, 0.0001, 40)
  ws = calcws(alphas, traindataset, trainlabel)
  print("ws = \n", ws)
  print("b = \n", b)
  plotfeature(traindataset, trainlabel, ws, b)
if __name__ == '__main__':
  start = time.clock()
  main()
  end = time.clock()
  print('finish all in %s' % str(end - start))

输出样例