欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

决策树之实现

程序员文章站 2022-05-21 23:29:25
...
import  pandas as pd
from sklearn.feature_extraction import DictVectorizer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

##http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/titanic.txt

data = pd.read_csv('aaa.txt')
# print(data)

#2,pclass, age, sex
x = data[['pclass','age','sex']]
y = data['survived']

#3.数据清洗:
# print(x.isnull().sum())
#填充缺失值
x['age'].fillna(x['age'].mean(),inplace = True)
# print(x.isnull().sum())

#one-hot编码: —— 替代类别号
y1= x.to_dict(orient='records') ##转换为字典类型
# print(y1)

#转化为数组类型
dict1 = DictVectorizer(sparse=False)
# print(dict1)
x = dict1.fit_transform(y1)
# print(x)
print(dict1.get_feature_names())

##测试集、训练集的分割:
x_train,x_test,y_train,y_test = train_test_split(x,y,train_size=0.7)

#决策树 :
dc = DecisionTreeClassifier(criterion='entropy',max_depth=5)
y = dc.fit(x_train,y_train)
# print(y)

y1 = dc.predict(x_test)
print('测试结果:\n',y1)
print('真实结果:\n',y_test)

#准确度的方法:
print(dc.score(x_test, y_test))

from sklearn.tree import export_graphviz

export_graphviz(dc,out_file='tree.dot',feature_names=['age', 'pclass=1st', 'sex=female', 'sex=male'])
相关标签: 决策树