欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

[Course] Advanced Computer Programming, Homework, week 15, scikit-learn

程序员文章站 2022-06-02 12:41:45
...

scikit-learn example

[Course] Advanced Computer Programming, Homework, week 15, scikit-learn

import numpy as np
from sklearn import metrics
from sklearn import datasets
from sklearn.model_selection import cross_validate
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
data = datasets.make_classification(n_samples=2000, n_features=10)
data[0][:5]
array([[ 1.94224588, -0.90004129, -1.29373233, -2.68480168,  0.33210151,
         0.12402016, -0.85576276,  1.17830754,  0.49223751,  1.96238429],
       [ 0.12985395,  0.54406644, -0.35623636,  0.42980539, -0.36361682,
         0.43473325, -0.66051895, -0.45035764, -1.12449465, -0.82254233],
       [-0.38586157,  0.82526339, -1.11620907, -0.78525796, -0.40823213,
         0.13700943,  0.24143533, -1.02235791,  1.03706833, -1.50767359],
       [-0.48923486,  0.13611755,  0.70740195, -1.74092075, -1.15887133,
         0.96963557, -0.94061012, -2.04747774, -1.61738509, -0.41771673],
       [-0.54016059,  0.13232637,  0.62608625, -0.81078144, -0.31259478,
        -0.10926979,  0.78459312, -1.09810794,  1.88645398, -0.11686013]])
data[1][:5]
array([0, 0, 0, 0, 1])
clfs = [GaussianNB(),
        SVC(C=0.1, kernel='rbf', gamma=0.1),
        RandomForestClassifier(n_estimators=100)]

scoring = ['f1_micro', 'f1_macro']
for clf in clfs:
    scores = cross_validate(clf, data[0], data[1], scoring=scoring, cv=10)
    print('--------------------')
    print(str(clf))
    print()
    print('micro: ')
    print(scores['test_f1_micro'])
    print('macro: ')
    print(scores['test_f1_macro'])
    print('ave: ', np.mean(scores['test_f1_micro']), np.mean(scores['test_f1_macro']))
--------------------
GaussianNB(priors=None)

micro: 
[0.90547264 0.91       0.905      0.96       0.925      0.93
 0.94       0.905      0.945      0.94974874]
macro: 
[0.90546328 0.90985577 0.90499762 0.95998399 0.92498312 0.92997199
 0.93994595 0.90497862 0.94498762 0.949717  ]
ave:  0.9275221380534514 0.9274884968705537
--------------------
SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.1, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

micro: 
[0.91044776 0.945      0.93       0.985      0.935      0.965
 0.96       0.945      0.965      0.9798995 ]
macro: 
[0.91026786 0.94498762 0.92993694 0.98499962 0.93498537 0.96497811
 0.959996   0.9449656  0.96499912 0.97989899]
ave:  0.9520347258681466 0.9520015248604876
--------------------
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

micro: 
[0.92039801 0.945      0.935      0.96       0.935      0.965
 0.945      0.95       0.965      0.97487437]
macro: 
[0.9202381  0.94499862 0.93492028 0.95998399 0.93499837 0.96499212
 0.94499862 0.94995496 0.96499912 0.97487437]
ave:  0.9495272381809545 0.9494958570594563

采用 micro-f1 和 macro-f1 指标评估分类器性能,采用十折交叉验证。通过上述测试,发现在随机构造的这个二分类问题中,SVM在测试集上的效果最佳。

相关标签: Python