sklearn习题
程序员文章站
2022-03-22 17:38:40
...
Step1:
from sklearn import datasets
dataset = datasets.make_classification(n_samples=1000, n_features=10, n_informative=2, n_redundant=2, n_repeated=0, n_classes=2)
print(dataset)
Step2:
from sklearn.model_selection import KFold
kf = KFold(n_splits=10)
for train_index, test_index in kf.split(dataset[0]):
X_train, X_test = dataset[0][train_index], dataset[0][test_index]
y_train, y_test = dataset[1][train_index], dataset[1][test_index]
Step3和Step4:
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
def test_GaussianNB(X_train, y_train, X_test, y_test):
clf = GaussianNB()
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
print("GaussianNB: ")
acc = metrics.accuracy_score(y_test, pred)
print('acc: ', acc)
f1 = metrics.f1_score(y_test, pred)
print('f1: ', f1)
auc = metrics.roc_auc_score(y_test, pred)
print('auc: ', auc)
from sklearn.svm import SVC
def test_SVC(X_train, y_train, X_test, y_test):
clf = SVC(C=1e-01, kernel='rbf', gamma=0.1)
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
print("SVC: ")
acc = metrics.accuracy_score(y_test, pred)
print('acc: ', acc)
f1 = metrics.f1_score(y_test, pred)
print('f1: ', f1)
auc = metrics.roc_auc_score(y_test, pred)
print('auc: ', auc)
from sklearn.ensemble import RandomForestClassifier
def test_nRandomForest(X_train, y_train, X_test, y_test):
clf = RandomForestClassifier(n_estimators=6)
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
print("RandomForest: ")
acc = metrics.accuracy_score(y_test, pred)
print('acc: ', acc)
f1 = metrics.f1_score(y_test, pred)
print('f1: ', f1)
auc = metrics.roc_auc_score(y_test, pred)
print('auc: ', auc)
测试:
k = 1
for train_index, test_index in kf.split(dataset[0]):
X_train, X_test = dataset[0][train_index], dataset[0][test_index]
y_train, y_test = dataset[1][train_index], dataset[1][test_index]
print('test ', k, ': ')
test_GaussianNB(X_train, y_train, X_test, y_test)
test_SVC(X_train, y_train, X_test, y_test)
test_nRandomForest(X_train, y_train, X_test, y_test)
print('\n')
k += 1
结果:
test 1 :
GaussianNB:
acc: 0.93
f1: 0.9391304347826087
auc: 0.9242424242424243
SVC:
acc: 0.93
f1: 0.9380530973451328
auc: 0.9262626262626262
RandomForest:
acc: 0.93
f1: 0.9369369369369368
auc: 0.9282828282828283
test 2 :
GaussianNB:
acc: 0.91
f1: 0.9158878504672897
auc: 0.9089635854341738
SVC:
acc: 0.94
f1: 0.9423076923076923
auc: 0.9395758303321329
RandomForest:
acc: 0.95
f1: 0.9514563106796117
auc: 0.9497799119647861
test 3 :
GaussianNB:
acc: 0.97
f1: 0.9696969696969697
auc: 0.9701880752300921
SVC:
acc: 0.91
f1: 0.9052631578947369
auc: 0.9093637454981992
RandomForest:
acc: 0.93
f1: 0.9263157894736843
auc: 0.9293717486994799
test 4 :
GaussianNB:
acc: 0.89
f1: 0.8865979381443299
auc: 0.8914090726615816
SVC:
acc: 0.91
f1: 0.9072164948453608
auc: 0.9114813327980731
RandomForest:
acc: 0.93
f1: 0.9263157894736843
auc: 0.930349257326375
test 5 :
GaussianNB:
acc: 0.91
f1: 0.9203539823008849
auc: 0.9053945249597423
SVC:
acc: 0.97
f1: 0.9724770642201834
auc: 0.9690016103059581
RandomForest:
acc: 0.95
f1: 0.9514563106796117
auc: 0.9537037037037037
test 6 :
GaussianNB:
acc: 0.9
f1: 0.8936170212765957
auc: 0.9009661835748792
SVC:
acc: 0.93
f1: 0.9230769230769231
auc: 0.928743961352657
RandomForest:
acc: 0.95
f1: 0.945054945054945
auc: 0.9488727858293077
test 7 :
GaussianNB:
acc: 0.96
f1: 0.9545454545454546
auc: 0.9594155844155845
SVC:
acc: 0.96
f1: 0.9545454545454546
auc: 0.9594155844155845
RandomForest:
acc: 0.98
f1: 0.9767441860465117
auc: 0.9772727272727273
test 8 :
GaussianNB:
acc: 0.93
f1: 0.9278350515463919
auc: 0.9335748792270531
SVC:
acc: 0.93
f1: 0.9278350515463919
auc: 0.9335748792270531
RandomForest:
acc: 0.96
f1: 0.9574468085106383
auc: 0.9613526570048309
test 9 :
GaussianNB:
acc: 0.94
f1: 0.9400000000000001
auc: 0.9403761504601841
SVC:
acc: 0.95
f1: 0.9494949494949495
auc: 0.9501800720288115
RandomForest:
acc: 0.95
f1: 0.9494949494949495
auc: 0.9501800720288115
test 10 :
GaussianNB:
acc: 0.95
f1: 0.9557522123893805
auc: 0.9480519480519481
SVC:
acc: 0.94
f1: 0.9464285714285714
auc: 0.9391233766233766
RandomForest:
acc: 0.96
f1: 0.9636363636363636
auc: 0.9618506493506493
Step5:
通过观察这十个测试,可以知道RandomForest是三个算法中最好的,而SVC差一些,朴素贝叶斯是最差的。
上一篇: Atom的安装,配置
下一篇: Nginx学习笔记-认识和安装Nginx