欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

python备忘录

程序员文章站 2024-03-23 23:11:58
...

数值型数据处理_preprocessing


缩放特征数组

import numpy as np
from sklearn import preprocessing

feature=np.array([[-100],[100]])
minmax_scale=preprocessing.MinMaxScaler(feature_range=(0,1))
minmax_scale=prepeocessing.MinMaxScaler(feature_range=(0,1))

scale_feature=minmax_scale.fit_transform(feature)  

#根据中位数/四分位数进行缩放
robust_scaler=preprocessing.RobustScaler()

robust_scaler.fit_transform(x)

特征标准化

x=np.array([[-100],[100]])
scaler=preprocessing.StandardScaler()
standardized=scaler.fit_transform(x)

归一化

from sklearn.prerocessing import Normalizer

normalizer=Normalizer(norm='l2')
normalizer.transform(features)

features_l2_norm=Normalizer(norm='l2').transform(features)
#norm='l1'时,元素总和为1

创建多项式特征
当特征和目标值之间非线性关系时,创建多项式特征

from sklearn.preprocessing import PolynomialFeatures

features=np.array([[-100],[100]])

polynomial_interaction=PolynomialFeatures(degree=2,incluse_bias=Fale)
#degree表示最高项数
interaction=PolynomialFeatures(degree=2,interaction_only=True,include_bias=False)
#仅包含交互特征
polynomial_interaction.fit_transform(features)
interaction.fit_transforms(features)

特征转换

from sklearn.preprocessing import FunctionTransformer
features=np.array([[-100],[100]])

def add_ten(x):
    return x+10

ten_transformer=FunctionTransformer(add_ten)

ten_transformer.transform(features)

特征离散化

from sklearn.preprocessing import Binarizer

#二值化
age=np.array([[6],[12],[18],[36],[64]])
binarizer=Binarizer(18)
binarizer.fit_transform(age)

#离散化
np.digitize(age,bins=[20,30,60])
#bins中数值代表每个区间左边界(左闭右开)
#需要时加入参数right=True,左闭右闭

缺失值填充

#KNN预测缺失值
from fancyimpute import KNN
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_blobs

features,_=make_blobs(n_sample=1000,n_feature=2,random_state=1)

scaler=StandardScaler()
standardized_features=scaler.fit_transform(features)

#设定一个缺失值
true_value=standardized_features[0,0]
standardized_features[0,0]=np.nan

features_knn_imputed=KNN(k=5,verbose=0).complete(standardized_features)


#imputer模块,平均数/中位数/众数填充
from sklearn.preprocessing import Imputer

mean_imputer=Imputer(strategy='mean',axis=0)
features_mean_imputed=mean_imputer.fit_transform(features)

相关标签: python