Python:基于二分类分解的序分类方法
程序员文章站
2022-05-26 19:13:56
...
参考文献:[1] Frank E, Hall M. A simple approach to ordinal classification[C]//European Conference on Machine Learning. Springer, Berlin, Heidelberg, 2001: 145-156.
没有进行类封装,仅是做了函数复现~
分类效果比单独使用多分类‘OVR’效果要好1-2%个精度
import numpy as np
from copy import deepcopy
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,f1_score
from sklearn.model_selection import StratifiedKFold,KFold
from sklearn import datasets
from collections import OrderedDict
def BDOC_train(X,y):
rowNum = len(y)
labelNum = len(set(y))
labels = np.unique(y)
###---------split the label list into K-1 label list-------------###
labels_ = np.delete(labels,max(labels))
target = OrderedDict()
for i,ele in enumerate(labels_):
target[i] = deepcopy(y)
for j in range(rowNum):
if target[i][j] <= ele:
target[i][j] = ele
else:
target[i][j] = ele + 1
modelDict = OrderedDict()
###----------------initial K-1 learning model----------------------###
for i,ele in enumerate(labels_):
modelDict[i] = LogisticRegression(solver='newton-cg',penalty='l2')
###----------------fit the K-1 learning model----------------------###
for i,model in modelDict.items():
model.fit(X,target[i])
return modelDict
def BDOC_prob(modelDict,test_X):
probDict = OrderedDict()
for i,model in modelDict.items():
probDict[i] = model.predict_proba(test_X)
return probDict
def BDOC_pred(probDict,len_test,labelNum,labels):
y_pred = np.zeros(len_test)
for i in range(len_test):
prob = np.zeros(labelNum)
for ele in labels:
if ele == min(labels):
prob[ele] = probDict[ele][i][0]
elif ele == max(labels):
prob[ele] = probDict[ele-1][i][1]
else:
prob[ele] = probDict[ele-1][i][1] - probDict[ele][i][1]
y_pred[i] = np.argmax(prob)
return y_pred
if __name__ == '__main__':
X,y = datasets.load_iris(return_X_y=True)
labelNum = len(set(y))
labels = np.unique(y)
AccList = []
SKF = StratifiedKFold(n_splits=10,shuffle=True,random_state=0)
for train_idx, test_idx in SKF.split(X, y):
train_X, train_y = X[train_idx, :], y[train_idx]
test_X, test_y = X[test_idx, :], y[test_idx]
len_test = len(test_y)
modelDict = BDOC_train(train_X,train_y)
probDict = BDOC_prob(modelDict,test_X)
# print(probDict[0])
# print(probDict[0][1])
# print(probDict[2][1][0])
# print(probDict[2][1][1])
y_pred = BDOC_pred(probDict, len_test, labelNum, labels)
acc = accuracy_score(y_true=test_y,y_pred=y_pred)
AccList.append(acc)
print("Mean Acc =",np.mean(AccList))
不懂就问嘛~
想要类封装的版本,读者有需求再写~