欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

sklearn习题

程序员文章站 2022-03-22 17:38:40
...

sklearn习题

Step1:

from sklearn import datasets
dataset = datasets.make_classification(n_samples=1000, n_features=10, n_informative=2, n_redundant=2, n_repeated=0, n_classes=2)
print(dataset)

Step2:

from sklearn.model_selection import KFold
kf = KFold(n_splits=10)
for train_index, test_index in kf.split(dataset[0]):
    X_train, X_test = dataset[0][train_index], dataset[0][test_index]
    y_train, y_test = dataset[1][train_index], dataset[1][test_index]

Step3和Step4:

from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
def test_GaussianNB(X_train, y_train, X_test, y_test):
    clf = GaussianNB()
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    print("GaussianNB: ")
    acc = metrics.accuracy_score(y_test, pred)
    print('acc: ', acc)
    f1 = metrics.f1_score(y_test, pred)
    print('f1: ', f1)
    auc = metrics.roc_auc_score(y_test, pred)
    print('auc: ', auc)


from sklearn.svm import SVC
def test_SVC(X_train, y_train, X_test, y_test):
    clf = SVC(C=1e-01, kernel='rbf', gamma=0.1)
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    print("SVC: ")
    acc = metrics.accuracy_score(y_test, pred)
    print('acc: ', acc)
    f1 = metrics.f1_score(y_test, pred)
    print('f1: ', f1)
    auc = metrics.roc_auc_score(y_test, pred)
    print('auc: ', auc)


from sklearn.ensemble import RandomForestClassifier
def test_nRandomForest(X_train, y_train, X_test, y_test):
    clf = RandomForestClassifier(n_estimators=6)
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    print("RandomForest: ")
    acc = metrics.accuracy_score(y_test, pred)
    print('acc: ', acc)
    f1 = metrics.f1_score(y_test, pred)
    print('f1: ', f1)
    auc = metrics.roc_auc_score(y_test, pred)
    print('auc: ', auc)

测试:

k = 1
for train_index, test_index in kf.split(dataset[0]):
    X_train, X_test = dataset[0][train_index], dataset[0][test_index]
    y_train, y_test = dataset[1][train_index], dataset[1][test_index]
    print('test ', k, ': ')
    test_GaussianNB(X_train, y_train, X_test, y_test)
    test_SVC(X_train, y_train, X_test, y_test)
    test_nRandomForest(X_train, y_train, X_test, y_test)
    print('\n')
    k += 1

结果:

test  1 : 
GaussianNB: 
acc:  0.93
f1:  0.9391304347826087
auc:  0.9242424242424243
SVC: 
acc:  0.93
f1:  0.9380530973451328
auc:  0.9262626262626262
RandomForest: 
acc:  0.93
f1:  0.9369369369369368
auc:  0.9282828282828283


test  2 : 
GaussianNB: 
acc:  0.91
f1:  0.9158878504672897
auc:  0.9089635854341738
SVC: 
acc:  0.94
f1:  0.9423076923076923
auc:  0.9395758303321329
RandomForest: 
acc:  0.95
f1:  0.9514563106796117
auc:  0.9497799119647861


test  3 : 
GaussianNB: 
acc:  0.97
f1:  0.9696969696969697
auc:  0.9701880752300921
SVC: 
acc:  0.91
f1:  0.9052631578947369
auc:  0.9093637454981992
RandomForest: 
acc:  0.93
f1:  0.9263157894736843
auc:  0.9293717486994799


test  4 : 
GaussianNB: 
acc:  0.89
f1:  0.8865979381443299
auc:  0.8914090726615816
SVC: 
acc:  0.91
f1:  0.9072164948453608
auc:  0.9114813327980731
RandomForest: 
acc:  0.93
f1:  0.9263157894736843
auc:  0.930349257326375


test  5 : 
GaussianNB: 
acc:  0.91
f1:  0.9203539823008849
auc:  0.9053945249597423
SVC: 
acc:  0.97
f1:  0.9724770642201834
auc:  0.9690016103059581
RandomForest: 
acc:  0.95
f1:  0.9514563106796117
auc:  0.9537037037037037


test  6 : 
GaussianNB: 
acc:  0.9
f1:  0.8936170212765957
auc:  0.9009661835748792
SVC: 
acc:  0.93
f1:  0.9230769230769231
auc:  0.928743961352657
RandomForest: 
acc:  0.95
f1:  0.945054945054945
auc:  0.9488727858293077


test  7 : 
GaussianNB: 
acc:  0.96
f1:  0.9545454545454546
auc:  0.9594155844155845
SVC: 
acc:  0.96
f1:  0.9545454545454546
auc:  0.9594155844155845
RandomForest: 
acc:  0.98
f1:  0.9767441860465117
auc:  0.9772727272727273


test  8 : 
GaussianNB: 
acc:  0.93
f1:  0.9278350515463919
auc:  0.9335748792270531
SVC: 
acc:  0.93
f1:  0.9278350515463919
auc:  0.9335748792270531
RandomForest: 
acc:  0.96
f1:  0.9574468085106383
auc:  0.9613526570048309


test  9 : 
GaussianNB: 
acc:  0.94
f1:  0.9400000000000001
auc:  0.9403761504601841
SVC: 
acc:  0.95
f1:  0.9494949494949495
auc:  0.9501800720288115
RandomForest: 
acc:  0.95
f1:  0.9494949494949495
auc:  0.9501800720288115


test  10 : 
GaussianNB: 
acc:  0.95
f1:  0.9557522123893805
auc:  0.9480519480519481
SVC: 
acc:  0.94
f1:  0.9464285714285714
auc:  0.9391233766233766
RandomForest: 
acc:  0.96
f1:  0.9636363636363636
auc:  0.9618506493506493

Step5:

通过观察这十个测试,可以知道RandomForest是三个算法中最好的,而SVC差一些,朴素贝叶斯是最差的。


相关标签: sklearn