python做APP/小程序会员数据分析及可视化
最近学习了python的matplotlib模块做数据可视化,正好用小程序的会员数据练习下motplotlib模块的各种画图方法。数据来自公司数据库,数据提取和清洗的代码就不显示了~
使用思维导图做出大体逻辑
首先用思维导图做了个大体逻辑及需要分析的选项,我是从会员结构、会员属性、会员标签3各方面下手:
会员结构
将目前已有的会员分为以下三种(其实可以分的更细,比如首单会员,复购1次会员,复购2次会员等):
1.活跃会员:180天内有购买记录的会员;
2.流失/沉睡会员:180天内无购买记录的会员;
3.未动销会员:注册后无购买记录的会员。
针对这3种类型的会员做目前人数占比及历史增长曲线。这里使用了matplotlib的饼图和单元素条形图。
import matplotlib.pyplot as plt
import matplotlib
from matplotlib.font_manager import FontProperties
font = FontProperties(fname=r"/usr/local/anaconda3/lib/python3.6/site-packages/matplotlib/mpl-data/fonts/ttf/SimHei.ttf", size=14)
# 设置matplotlib能够识别中文字体,需要自己提前把字体安装到matplotlib模块中,具体百度。。。
matplotlib.rcParams['font.sans-serif']=['SimHei']
plt.figure(figsize=(15,5),facecolor='w',edgecolor='b',dpi=80)
activepies = ['活跃会员数','流失会员数','未动销会员数']
data = df.loc[0].values[1:].tolist()
color = ['c','m','r']
ax1 = plt.subplot(121)
plt.pie(data,colors = color, shadow = True, explode = (0.05,0.05,0.05), autopct='%1.1f%%',textprops={'fontsize': 20, 'color': 'b'},labeldistance = 1.1,pctdistance = 0.6)
plt.title('会员占比总览',fontsize = 15)
plt.legend(labels = activepies)
plt.axis('equal')
ax2 = plt.subplot(122)
activepies1 = ['会员总数','活跃会员数','流失会员数','未动销会员数']
data1 = df.loc[0].values.tolist()
rect_1 = plt.bar(activepies1,data1,color = 'b', width = 0.4)
plt.ylabel('人数(千万)')
plt.title('会员人数总览',fontsize = 15)
for rect_1s in rect_1:
height = rect_1s.get_height()
plt.text(rect_1s.get_x() + rect_1s.get_width()/2,1.03*height,'%s'%int(height),ha='center')
plt.yticks([10000000,20000000,30000000,40000000],[1,2,3,4],color = 'k')
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
当前会员结构如下,目前活跃的人数占比才27.2%,未动销的会员占比25%,沉睡会员占比47.9%。
历史会员增长曲线
大体了解了目前的会员结构后,可以继续深入了解下今年1-6月份各结构会员的数量及增长趋势。数量可以继续使用条形图体现,趋势可以使用折线图体现,代码如下(这里的代码写得有些啰嗦,没有继续去优化~~):
plt.figure(figsize = (10,6), facecolor='w', edgecolor='k', dpi=100)
# 建立第一张子图
ax1 = plt.subplot(211)
x = (1,2,3,4,5,6)
bar_1 = plt.bar(x = x, height = data.loc[0].values[1:].tolist(), color = 'b', width = 0.2, label = '会员总数')
for bar in bar_1:
height = bar.get_height()
plt.text(bar.get_x() + bar.get_width() / 2, height+2, str(int(height/10000)), ha="center", va="bottom")
bar_2 = plt.bar(x = [i - 0.4 for i in x], height = data.loc[1].values[1:].tolist(), color = 'c', width = 0.2, label = '活跃人数')
for bar in bar_2:
height = bar.get_height()
plt.text(bar.get_x() + bar.get_width() / 2, height+2, str(int(height/10000)), ha="center", va="bottom")
bar_3 = plt.bar(x = [i - 0.2 for i in x], height = data.loc[2].values[1:].tolist(), color = 'y', width = 0.2, label = '沉睡人数')
for bar in bar_3:
height = bar.get_height()
plt.text(bar.get_x() + bar.get_width() / 2, height+2, str(int(height/10000)), ha="center", va="bottom")
bar_4 = plt.bar(x = [i - 0.6 for i in x], height = data.loc[4].values[1:].tolist(), color = 'g', width = 0.2, label = '未动销人数')
for bar in bar_4:
height = bar.get_height()
plt.text(bar.get_x() + bar.get_width() / 2, height+2, str(int(height/10000)), ha="center", va="bottom")
plt.ylim((0,40000000))
plt.yticks(range(5000000,40000000,5000000),range(50,400,50))
plt.xticks([index - 0.3 for index in x],x)
plt.ylabel('会员人数(万)')
plt.xlabel('月份')
ax1.spines['top'].set_visible(False)
ax1.spines['right'].set_visible(False)
ax1.set_title('小程序会员总览')
plt.legend(ncol = 4, fontsize = 8, frameon = False, handletextpad = 0.2, labelspacing = 0, handleheight = 0.3, columnspacing = 0.2)
# 创建第2张子图
ax2 = plt.subplot(212)
x = [2,3,4,5,6]
plt.plot(x,new_data.loc[0].values[1:].tolist(),linewidth = 1.0, color = 'b', label = '会员总数增长率')
plt.plot(x,new_data.loc[0].values[1:].tolist(),'o', color = 'b')
for i in new_data.loc[0].values[1:].tolist():
plt.annotate(s = round(i,2), xy = (x[(new_data.loc[0].values[1:].tolist()).index(i)],i),xytext = (x[(new_data.loc[0].values[1:].tolist()).index(i)],i))
plt.plot(x,new_data.loc[1].values[1:].tolist(),linewidth = 1.0, color = 'c', label = '活跃人数增长率')
plt.plot(x,new_data.loc[1].values[1:].tolist(),'o', color = 'c')
for i in new_data.loc[1].values[1:].tolist():
plt.annotate(s = round(i,2), xy = (x[(new_data.loc[1].values[1:].tolist()).index(i)],i),xytext = (x[(new_data.loc[1].values[1:].tolist()).index(i)],i))
plt.plot(x,new_data.loc[2].values[1:].tolist(),linewidth = 1.0, color = 'y', label = '沉睡人数增长率')
plt.plot(x,new_data.loc[2].values[1:].tolist(),'o', color = 'y')
for i in new_data.loc[2].values[1:].tolist():
plt.annotate(s = round(i,2), xy = (x[(new_data.loc[2].values[1:].tolist()).index(i)],i),xytext = (x[(new_data.loc[2].values[1:].tolist()).index(i)],i))
plt.plot(x,new_data.loc[3].values[1:].tolist(),linewidth = 1.0, color = 'g', label = '新增人数增长率')
plt.plot(x,new_data.loc[3].values[1:].tolist(),'o', color = 'g')
for i in new_data.loc[3].values[1:].tolist():
plt.annotate(s = round(i,2), xy = (x[(new_data.loc[3].values[1:].tolist()).index(i)],i),xytext = (x[(new_data.loc[3].values[1:].tolist()).index(i)],i))
plt.legend(ncol = 4, fontsize = 8, frameon = False, handletextpad = 0.2, labelspacing = 0, handleheight = 0.3, columnspacing = 0.2)
plt.ylabel('增长率(%)')
plt.xlabel('月份')
plt.xticks([2,3,4,5,6],['2月','3月','4月','5月','6月'])
ax2.set_title('会员增长曲线')
ax2.spines['top'].set_visible(False)
ax2.spines['right'].set_visible(False)
plt.tight_layout()
结果如下,虽然会员总数每月增长平稳,但活跃人数有所下跌,沉睡人数每月上涨。个人觉得应从用户体验感、产品质量、售后服务等方面下手,提高用户粘性,防止用户流失。
小程序新客分析
从整体和历史数据了解了会员结构变化后,还可以从继续了解下小程序的新客来源渠道及新客的及时动销率,这里使用了的twinx()方法构造了一个YXY坐标轴,将条形图和折线图做在了一张图上:
plt.figure(figsize=(20,6),facecolor='w',edgecolor='k',dpi=100)
x = range(1,7,1)
colors = ['g','b','y','m']
n = 1
for field in ['未知','SIEBEL','WEB','ZNPOS']:
data = df[df.channel == field]['siebel_num'].values.tolist()
bar = plt.bar(x = [i - 0.2*(n-1) for i in x],height = data,width = 0.2,color = colors[n-1],label = '渠道%s'%n)
for b in bar:
height = b.get_height()
plt.text(x = b.get_x() + b.get_width()/2,y = height + 2,s = height,ha = 'center',va = 'bottom')
n += 1
plt.xticks([index - 0.3 for index in x],x)
plt.yticks([x for x in range(0,1200000,200000)],[0,20,40,60,80,100])
plt.xlabel('月份')
plt.ylabel('新增人数(万)')
plt.title('1-6月各渠道新增人数即及时动销率',fontsize = 20,fontweight = 'bold')
plt.legend(ncol = 4,loc = 'upper center')
# 将轴对象化
ax = plt.gca()
ax.spines['top'].set_visible(False)
x1 = [0.7,1.7,2.7,3.7,4.7,5.7]
ax2 = ax.twinx()
ax2.plot(x1,d2['rate'].values.tolist(),linewidth=1.0,color='r',linestyle='-',label='新客及时动销率',marker='.',markersize=10)
ax2.set_ylim(0.5,0.85)
ax2.spines['top'].set_visible(False)
plt.legend()
结果如下,小程序的新客来源渠道比较集中,基本集中在渠道3上,新客的及时动销率还算稳定,基本在65%~80%之间。
会员属性分析
从整体了解会员结构后,我们可以从会员的各种基本属性下手分析各类属性的会员的占比及客贡献、动销率(以性别为例)。这里使用了matplotlib的图与表想结合。
fig = plt.figure(figsize=(15,5),facecolor='w',edgecolor='k',dpi=100)
plt.subplot(131)
pie = plt.pie(d3['siebel_total_num'],colors=['g','b','y'],autopct='%1.2f%%',startangle=90,radius=1,labels=('未知','女','男'),
explode = (0.01,0.01,0.01),textprops={'fontsize':10,'color':'k'})
rowLabels = ['客贡献','动销率']
colLabels = ['未知','女','男']
cellText = d3.iloc[:,4:].T.values.tolist()
table = plt.table(rowLabels = rowLabels,colLabels = colLabels,cellText = cellText,cellLoc = 'center', rowLoc = 'center',
loc = 'top', bbox = [0,-0.15,1,0.2],rowColours = plt.cm.BuPu(np.linspace(0, 0.5,5))[::-1],
colColours = plt.cm.Reds(np.linspace(0, 0.5,5))[::-1])
plt.axis('equal')
plt.title('会员性别占比')
结果如下,未知数据是基础数据维护不到位造成。我们通过男女数据对比,可以看到女性的动销率和客贡献会比男性高。这是大部分app/小程序的正常现象,现在最好赚的钱是女人的钱,其次是孩子,然后是老人,接着是宠物,最后才是男人~
会员标签
根据会员的基本属性及会员的行为为会员贴标签和根据各类指标对会员进行会员画像,这个比较复杂。以后有空再写~~哈哈
本文地址:https://blog.csdn.net/King_liyueqiao/article/details/107357158
下一篇: AutoLayout2