欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

Python:基于二分类分解的序分类方法

程序员文章站 2022-05-26 19:13:56
...

参考文献:[1] Frank E, Hall M. A simple approach to ordinal classification[C]//European Conference on Machine Learning. Springer, Berlin, Heidelberg, 2001: 145-156.

没有进行类封装,仅是做了函数复现~

分类效果比单独使用多分类‘OVR’效果要好1-2%个精度

import numpy as np
from copy import deepcopy
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,f1_score
from sklearn.model_selection import StratifiedKFold,KFold
from sklearn import datasets
from collections import OrderedDict



def BDOC_train(X,y):
    rowNum = len(y)
    labelNum = len(set(y))
    labels = np.unique(y)
    ###---------split the label list into K-1 label list-------------###
    labels_ = np.delete(labels,max(labels))
    target = OrderedDict()
    for i,ele in enumerate(labels_):
        target[i] = deepcopy(y)
        for j in range(rowNum):
            if target[i][j] <= ele:
                target[i][j] = ele
            else:
                target[i][j] = ele + 1
    modelDict = OrderedDict()
    ###----------------initial K-1 learning model----------------------###
    for i,ele in enumerate(labels_):
        modelDict[i] = LogisticRegression(solver='newton-cg',penalty='l2')
    ###----------------fit the K-1 learning model----------------------###
    for i,model in modelDict.items():
        model.fit(X,target[i])
    return modelDict

def BDOC_prob(modelDict,test_X):
    probDict = OrderedDict()
    for i,model in modelDict.items():
        probDict[i] = model.predict_proba(test_X)
    return probDict

def BDOC_pred(probDict,len_test,labelNum,labels):
    y_pred = np.zeros(len_test)
    for i in range(len_test):
        prob = np.zeros(labelNum)
        for ele in labels:
            if ele == min(labels):
                prob[ele] = probDict[ele][i][0]
            elif ele == max(labels):
                prob[ele] = probDict[ele-1][i][1]
            else:
                prob[ele] = probDict[ele-1][i][1] - probDict[ele][i][1]
        y_pred[i] = np.argmax(prob)
    return y_pred


if __name__ == '__main__':
    X,y = datasets.load_iris(return_X_y=True)
    labelNum = len(set(y))
    labels = np.unique(y)
    AccList = []
    SKF = StratifiedKFold(n_splits=10,shuffle=True,random_state=0)
    for train_idx, test_idx in SKF.split(X, y):
        train_X, train_y = X[train_idx, :], y[train_idx]
        test_X, test_y = X[test_idx, :], y[test_idx]
        len_test = len(test_y)
        modelDict = BDOC_train(train_X,train_y)
        probDict = BDOC_prob(modelDict,test_X)
        # print(probDict[0])
        # print(probDict[0][1])
        # print(probDict[2][1][0])
        # print(probDict[2][1][1])
        y_pred = BDOC_pred(probDict, len_test, labelNum, labels)
        acc = accuracy_score(y_true=test_y,y_pred=y_pred)
        AccList.append(acc)
    print("Mean Acc =",np.mean(AccList))

 

不懂就问嘛~

想要类封装的版本,读者有需求再写~

相关标签: Python学习 算法