python实现RFM模型分析
程序员文章站
2024-01-21 11:12:46
...
python实现RFM模型分析
import numpy as np
import pandas as pd
import pyreadstat
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D # 空间三维画图
import time
import datetime as dt
#================================================
path = 'E:/data/'#数据文件路径
file_name = 'eledata.sav'#数据集文件名
output_file_name = 'rfm_result' #输出文件名
#================================================
#注意:使用本模型尽量将不同用户群分开各自打分,例如,按照居民,商业用电,工业用电分开使用
'''设置rfm取值参数观察期(3个月,6个月,12个月)'''
time_window = 12
#观察期的长短,用户近3/6/12个月内的缴费次数和缴费总额确定F与M分值
'''设置R参数(最近一次缴费时间距离今天的天数)'''
param_r = [15,30,60,90]
#exp:param_r = [15,30,60,90]
#小于15天得分5,15-30天得分4,30-60得分3,60-90得分2,90以上得分1
'''自定义函数:'''
#1.计算Recency分值
def R_score(param_r,x):
#param_r: R分值划分区间
#x: 需要被计算的具体数值
if x<=param_r[0]:
return 5
elif x>param_r[0] and x<=param_r[1]:
return 4
elif x>param_r[1] and x<=param_r[2]:
return 3
elif x>param_r[2] and x<=param_r[3]:
return 2
elif x>param_r[3]:
return 1
#2.计算f,m得分
def RFM_score(series,q,score_labels):
a = series.quantile(q=q)
if len(set(a))>len(score_labels):
return pd.qcut(series,q=len(score_labels),labels=score_labels)
else:
return pd.cut(series,bins=len(score_labels),labels=score_labels)
#2.计算rfm用户分群
def RFM_group(df,idx,x,r,f,m):
#df: DataFrame of data set
#idx: column name of user Index
#x: user index
#r: column name of Recency score
#f: column name of Frequency score
#m: column name of Monetary score
df1 = df[df[idx]==x]
if df1[r].values>=df[r].mean() and df1[f].values>=df[f].mean() and df1[m].values>=df[m].mean():
return '重要价值客户'
elif df1[r].values>=df[r].mean() and df1[f].values<df[f].mean() and df1[m].values>=df[m].mean():
return '重要发展客户'
elif df1[r].values<df[r].mean() and df1[f].values>=df[f].mean() and df1[m].values>=df[m].mean():
return '重要保持客户'
elif df1[r].values<df[r].mean() and df1[f].values<df[f].mean() and df1[m].values>=df[m].mean():
return '重要挽留客户'
elif df1[r].values>=df[r].mean() and df1[f].values>=df[f].mean() and df1[m].values<df[m].mean():
return '一般价值客户'
elif df1[r].values>=df[r].mean() and df1[f].values<df[f].mean() and df1[m].values<df[m].mean():
return '一般发展客户'
elif df1[r].values<df[r].mean() and df1[f].values>=df[f].mean() and df1[m].values<df[m].mean():
return '一般保持客户'
elif df1[r].values<df[r].mean() and df1[f].values<df[f].mean() and df1[m].values<df[m].mean():
return '一般挽留客户'
df,meta = pyreadstat.read_sav(path+file_name)
print(meta.file_encoding)
'''确定now是今天还是19年底'''
# now = pd.to_datetime('today')
now = dt.datetime(2019,12,31)
'''重新给列命名'''
if '合计' in df.columns:
df.rename(columns={'单位':'unit','交费ID':'payfee_id','交费方式':'pay_chnl','用户号':'acct_id','区编号':'tq_id','区名称':'tq_name',
'交费金额':'pay_fee','收费日期':'pay_date','等级':'vol_lvl','用户分类':'user_cat','合计':'total_use'},
inplace=True)
else:
df.rename(columns={'单位':'unit','交费ID':'payfee_id','交费方式':'pay_chnl','用户号':'acct_id','区编号':'tq_id','区名称':'tq_name',
'交费金额':'pay_fee','收费日期':'pay_date','等级':'vol_lvl','用户分类':'user_cat'},
inplace=True)
'''取出有用字段'''
df = df[['acct_id', 'tq_id', 'voltage_lvl', 'user_cat' ,'pay_chnl',
'pay_fee', 'pay_date']]
'''对id进行取整'''
df['acct_id'] = df['acct_id'].astype('int64')
df['tq_id'] = df['tq_id'].astype('str')
UTF-8
'''一、加工自变量'''
starttime = dt.datetime.now()
df_fin = pd.DataFrame()
# df_mid = pd.DataFrame()
for i in df['acct_id'].unique():
df1 = df[df['acct_id']==i].sort_values(by='pay_date',ascending=True)
df1['time_span'] = df1['pay_date'].map(lambda x:(now-x).days)
'''1.基础信息'''
df_mid = df1[['acct_id','tq_id','voltage_lvl','user_cat']][:1]
'''2.缴费渠道个数'''
df_mid['pay_chnl_cnt'] = df1['pay_chnl'].unique().shape[0]
'''3.近一次缴费时间,间隔'''
df_mid['last_pay_date'] = df1[-1:]['pay_date'].values
df_mid['last_pay_span'] = df1[-1:]['time_span'].values
'''4.按照日为单位汇总金额'''
df1 = df1[['acct_id','time_span','pay_fee']].groupby(['acct_id','time_span']).agg({'pay_fee':lambda x:x.sum()}).sort_values(by='time_span',ascending=False).reset_index()
'''5.加工时间切片内缴费金额的总额,次数'''
#1.近3个月
df_mid['last_3m_fee_sum'] = df1[df1['time_span']<=90]['pay_fee'].sum()
df_mid['last_3m_fee_cnt'] = df1[df1['time_span']<=90]['pay_fee'].count()
#2.近6个月
df_mid['last_6m_fee_sum'] = df1[df1['time_span']<=180]['pay_fee'].sum()
df_mid['last_6m_fee_cnt'] = df1[df1['time_span']<=180]['pay_fee'].count()
#3.近12个月
df_mid['last_12m_fee_sum'] = df1[df1['time_span']<=365]['pay_fee'].sum()
df_mid['last_12m_fee_cnt'] = df1[df1['time_span']<=365]['pay_fee'].count()
'''6.保存加工好的数据'''
df_fin = df_fin.append(df_mid)
df_fin.reset_index(drop=True,inplace=True)
endtime = dt.datetime.now()
print('用时',(endtime - starttime).seconds/60,'分钟')
用时 0.0 分钟
'''三、建立rfm模型进行用户群体划分'''
#1.计算R,F,M得分
df_fin['Recency'] = df_fin['last_pay_span'].map(lambda x:R_score(param_r,x))
df_fin['Frequency'] = RFM_score(df_fin['last_'+str(time_window)+'m_fee_cnt'],[0,0.2,0.4,0.6,0.8,1],[1,2,3,4,5])
df_fin['Monetary'] = RFM_score(df_fin['last_'+str(time_window)+'m_fee_sum'],[0,0.2,0.4,0.6,0.8,1],[1,2,3,4,5])
df_fin['Frequency'] = df_fin['Frequency'].astype('int')
df_fin['Monetary'] = df_fin['Monetary'].astype('int')
#2.计算rfm得分:
df_fin['RFM_score'] = df_fin['Recency']*100+df_fin['Frequency']*10+df_fin['Monetary']*1
#3.用户分群
# df_fin['user_group'] = df_fin['acct_id'].map(lambda x:RFM_group(df_fin,'acct_id',x,'Recency','Frequency','Monetary'))
rfm_q = [0,1/8,2/8,3/8,4/8,5/8,6/8,7/8,1]
rfm_cat = ['一般挽留客户','一般保持客户','一般发展客户','一般价值客户','重要挽留客户','重要保持客户','重要发展客户','重要价值客户']
df_fin['user_group'] = RFM_score(df_fin['RFM_score'],rfm_q,rfm_cat)
#输出结果到文件
df_fin.to_excel(path+output_file_name+'.xlsx')
#做三维散点图
fig = plt.figure()
ax = Axes3D(fig)
color_dict = {'一般挽留客户':'k','一般保持客户':'darkgreen','一般发展客户':'tan','一般价值客户':'midnightblue',
'重要挽留客户':'pink','重要保持客户':'yellow','重要发展客户':'cyan','重要价值客户':'r'}
ax.scatter(df_fin['Recency'], df_fin['Frequency'], df_fin['Monetary'],zdir='y',c=df_fin['user_group'].map(lambda x:color_dict[x]))
# 添加坐标轴(顺序是x,y,z)
ax.set_xlabel('Recency', fontdict={'size': 15, 'color': 'k'})
ax.set_ylabel('Frequency', fontdict={'size': 15, 'color': 'k'})
ax.set_zlabel('Monetary', fontdict={'size': 15, 'color': 'k'})
plt.show()
fig.savefig(path+'rfm.png')
df_fin['user_group']
0 一般挽留客户
1 重要挽留客户
2 重要价值客户
3 重要价值客户
4 一般价值客户
5 重要价值客户
6 重要保持客户
7 重要价值客户
8 重要保持客户
9 重要价值客户
10 一般价值客户
Name: user_group, dtype: category
Categories (8, object): [一般挽留客户 < 一般保持客户 < 一般发展客户 < 一般价值客户 < 重要挽留客户 < 重要保持客户 < 重要发展客户 < 重要价值客户]
上一篇: 秒杀系统设计参考
下一篇: 织梦5.7登陆注册实现