原始因子处理之手写标准化函数
程序员文章站
2022-07-13 15:18:29
...
一、标准化
代码实现(z_score方法标准化)
def standardlize(data, inf2nan=True, axis=1):
'''
参数
-----------
data: pd.Series/pd.DataFrame/np.array, 待标准化的序列
inf2nan: 是否将 np.inf 和 -np.inf 替换成 np.nan。默认为 True
axis=1: 在 data 为 pd.DataFrame 时使用,如果 series 为 pd.DataFrame,沿哪个方向做标准化。0 为对每列做标准化,1 为对每行做标准化
返回
-----------
标准化后的因子数据
'''
if isinstance(data,pd.DataFrame):
value = data.copy()
if axis==1:
long = value.shape[0]
for i in range(long):
s = value.iloc[i,:]
if inf2nan==True:
s[np.isinf(s)]=np.nan
mean = np.mean(s.dropna())
std = np.std(s.dropna(),ddof=1)
value.iloc[i,:] = (s-mean)/std
else:
s1 = s[~np.isinf(s)]
mean = np.mean(s1)
std = np.std(s1,ddof=1)
value.iloc[i,:] = (s-mean)/std
return value
elif axis==0:
width = value.shape[1]
for j in range(width):
s = value.iloc[:,j]
if inf2nan==True:
s[np.isinf(s)]=np.nan
mean = np.mean(s.dropna())
std = np.std(s.dropna(),ddof=1)
value.iloc[:,j] = (s-mean)/std
else:
s1 = s[~np.isinf(s)]
mean = np.mean(s1)
std = np.std(s1,ddof=1)
value.iloc[:,j] = (s-mean)/std
return value
else:
return('axis值有误')
elif isinstance(data,pd.Series):
value = data.copy()
if inf2nan==True:
value[np.isinf(value)]=np.nan
mean = np.mean(value.dropna())
std = np.std(value.dropna(),ddof=1)
value = (value-mean)/std
return value
else:
s = value[~np.isinf(value)]
mean = np.mean(s)
std = np.std(s,ddof=1)
value = (value-mean)/std
return value
else:
print('data不是pd.Series和pd.DataFrame类型')
return