欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

python实现RFM模型分析

程序员文章站 2024-01-21 11:12:46
...

python实现RFM模型分析

import numpy as np
import pandas as pd
import pyreadstat
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # 空间三维画图
import time
import datetime as dt


#================================================
path = 'E:/data/'#数据文件路径
file_name = 'eledata.sav'#数据集文件名
output_file_name = 'rfm_result' #输出文件名
#================================================
#注意:使用本模型尽量将不同用户群分开各自打分,例如,按照居民,商业用电,工业用电分开使用


'''设置rfm取值参数观察期(3个月,6个月,12个月)'''
time_window = 12
#观察期的长短,用户近3/6/12个月内的缴费次数和缴费总额确定F与M分值

'''设置R参数(最近一次缴费时间距离今天的天数)'''
param_r = [15,30,60,90]
#exp:param_r = [15,30,60,90]
#小于15天得分5,15-30天得分4,30-60得分3,60-90得分2,90以上得分1

'''自定义函数:'''
#1.计算Recency分值
def R_score(param_r,x):
    #param_r:    R分值划分区间
    #x: 需要被计算的具体数值
    if x<=param_r[0]:
        return 5
    elif x>param_r[0] and x<=param_r[1]:
        return 4
    elif x>param_r[1] and x<=param_r[2]:
        return 3
    elif x>param_r[2] and x<=param_r[3]:
        return 2
    elif x>param_r[3]:
        return 1

#2.计算f,m得分
def RFM_score(series,q,score_labels):
    a = series.quantile(q=q)
    if len(set(a))>len(score_labels):
        return pd.qcut(series,q=len(score_labels),labels=score_labels)
    else:
        return pd.cut(series,bins=len(score_labels),labels=score_labels)
    
#2.计算rfm用户分群
def RFM_group(df,idx,x,r,f,m):
    #df:  DataFrame of data set
    #idx: column name of user Index
    #x:   user index
    #r:   column name of Recency score
    #f:   column name of Frequency score
    #m:   column name of Monetary score
    df1 = df[df[idx]==x]
    if df1[r].values>=df[r].mean() and df1[f].values>=df[f].mean() and df1[m].values>=df[m].mean():
        return '重要价值客户'
    elif df1[r].values>=df[r].mean() and df1[f].values<df[f].mean() and df1[m].values>=df[m].mean():
        return '重要发展客户'
    elif df1[r].values<df[r].mean() and df1[f].values>=df[f].mean() and df1[m].values>=df[m].mean():
        return '重要保持客户'
    elif df1[r].values<df[r].mean() and df1[f].values<df[f].mean() and df1[m].values>=df[m].mean():
        return '重要挽留客户'
    elif df1[r].values>=df[r].mean() and df1[f].values>=df[f].mean() and df1[m].values<df[m].mean():
        return '一般价值客户'
    elif df1[r].values>=df[r].mean() and df1[f].values<df[f].mean() and df1[m].values<df[m].mean():
        return '一般发展客户'
    elif df1[r].values<df[r].mean() and df1[f].values>=df[f].mean() and df1[m].values<df[m].mean():
        return '一般保持客户'
    elif df1[r].values<df[r].mean() and df1[f].values<df[f].mean() and df1[m].values<df[m].mean():
        return '一般挽留客户'


df,meta = pyreadstat.read_sav(path+file_name)
print(meta.file_encoding)

'''确定now是今天还是19年底'''
# now = pd.to_datetime('today')
now = dt.datetime(2019,12,31)

'''重新给列命名'''
if '合计' in df.columns:
    df.rename(columns={'单位':'unit','交费ID':'payfee_id','交费方式':'pay_chnl','用户号':'acct_id','区编号':'tq_id','区名称':'tq_name',
                   '交费金额':'pay_fee','收费日期':'pay_date','等级':'vol_lvl','用户分类':'user_cat','合计':'total_use'},
         inplace=True)
else:
    df.rename(columns={'单位':'unit','交费ID':'payfee_id','交费方式':'pay_chnl','用户号':'acct_id','区编号':'tq_id','区名称':'tq_name',
                   '交费金额':'pay_fee','收费日期':'pay_date','等级':'vol_lvl','用户分类':'user_cat'},
         inplace=True)
    
    
'''取出有用字段'''
df = df[['acct_id', 'tq_id',  'voltage_lvl', 'user_cat' ,'pay_chnl', 
       'pay_fee', 'pay_date']]
'''对id进行取整'''
df['acct_id'] = df['acct_id'].astype('int64')
df['tq_id'] = df['tq_id'].astype('str')
UTF-8
'''一、加工自变量'''
starttime = dt.datetime.now()
df_fin = pd.DataFrame()
# df_mid = pd.DataFrame()
for i in df['acct_id'].unique():
    df1 = df[df['acct_id']==i].sort_values(by='pay_date',ascending=True)

    df1['time_span'] = df1['pay_date'].map(lambda x:(now-x).days)

    '''1.基础信息'''
    df_mid = df1[['acct_id','tq_id','voltage_lvl','user_cat']][:1]

    '''2.缴费渠道个数'''
    df_mid['pay_chnl_cnt'] = df1['pay_chnl'].unique().shape[0]

    '''3.近一次缴费时间,间隔'''
    df_mid['last_pay_date'] = df1[-1:]['pay_date'].values
    df_mid['last_pay_span'] = df1[-1:]['time_span'].values

    '''4.按照日为单位汇总金额'''
    df1 = df1[['acct_id','time_span','pay_fee']].groupby(['acct_id','time_span']).agg({'pay_fee':lambda x:x.sum()}).sort_values(by='time_span',ascending=False).reset_index()

    '''5.加工时间切片内缴费金额的总额,次数'''
    #1.近3个月
    df_mid['last_3m_fee_sum'] = df1[df1['time_span']<=90]['pay_fee'].sum()
    df_mid['last_3m_fee_cnt'] = df1[df1['time_span']<=90]['pay_fee'].count()
    #2.近6个月
    df_mid['last_6m_fee_sum'] = df1[df1['time_span']<=180]['pay_fee'].sum()
    df_mid['last_6m_fee_cnt'] = df1[df1['time_span']<=180]['pay_fee'].count()
    #3.近12个月
    df_mid['last_12m_fee_sum'] = df1[df1['time_span']<=365]['pay_fee'].sum()
    df_mid['last_12m_fee_cnt'] = df1[df1['time_span']<=365]['pay_fee'].count()
    
    '''6.保存加工好的数据'''
    df_fin = df_fin.append(df_mid)
df_fin.reset_index(drop=True,inplace=True)
endtime = dt.datetime.now()
print('用时',(endtime - starttime).seconds/60,'分钟')
用时 0.0 分钟
'''三、建立rfm模型进行用户群体划分'''
#1.计算R,F,M得分
df_fin['Recency'] = df_fin['last_pay_span'].map(lambda x:R_score(param_r,x))
df_fin['Frequency'] = RFM_score(df_fin['last_'+str(time_window)+'m_fee_cnt'],[0,0.2,0.4,0.6,0.8,1],[1,2,3,4,5])
df_fin['Monetary'] = RFM_score(df_fin['last_'+str(time_window)+'m_fee_sum'],[0,0.2,0.4,0.6,0.8,1],[1,2,3,4,5])   
df_fin['Frequency'] = df_fin['Frequency'].astype('int')
df_fin['Monetary'] = df_fin['Monetary'].astype('int')
#2.计算rfm得分:
df_fin['RFM_score'] = df_fin['Recency']*100+df_fin['Frequency']*10+df_fin['Monetary']*1
#3.用户分群
# df_fin['user_group'] = df_fin['acct_id'].map(lambda x:RFM_group(df_fin,'acct_id',x,'Recency','Frequency','Monetary'))
rfm_q = [0,1/8,2/8,3/8,4/8,5/8,6/8,7/8,1]
rfm_cat = ['一般挽留客户','一般保持客户','一般发展客户','一般价值客户','重要挽留客户','重要保持客户','重要发展客户','重要价值客户']
df_fin['user_group'] = RFM_score(df_fin['RFM_score'],rfm_q,rfm_cat)
#输出结果到文件
df_fin.to_excel(path+output_file_name+'.xlsx')
#做三维散点图
fig = plt.figure()
ax = Axes3D(fig)
color_dict = {'一般挽留客户':'k','一般保持客户':'darkgreen','一般发展客户':'tan','一般价值客户':'midnightblue',
              '重要挽留客户':'pink','重要保持客户':'yellow','重要发展客户':'cyan','重要价值客户':'r'}
ax.scatter(df_fin['Recency'], df_fin['Frequency'], df_fin['Monetary'],zdir='y',c=df_fin['user_group'].map(lambda x:color_dict[x]))


# 添加坐标轴(顺序是x,y,z)
ax.set_xlabel('Recency', fontdict={'size': 15, 'color': 'k'})
ax.set_ylabel('Frequency', fontdict={'size': 15, 'color': 'k'})
ax.set_zlabel('Monetary', fontdict={'size': 15, 'color': 'k'})
plt.show()
fig.savefig(path+'rfm.png')

python实现RFM模型分析

df_fin['user_group']
0     一般挽留客户
1     重要挽留客户
2     重要价值客户
3     重要价值客户
4     一般价值客户
5     重要价值客户
6     重要保持客户
7     重要价值客户
8     重要保持客户
9     重要价值客户
10    一般价值客户
Name: user_group, dtype: category
Categories (8, object): [一般挽留客户 < 一般保持客户 < 一般发展客户 < 一般价值客户 < 重要挽留客户 < 重要保持客户 < 重要发展客户 < 重要价值客户]