欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

python数据分析--------电商打折套路为例

程序员文章站 2024-03-26 09:01:17
...

如今大数据行业十分火热,本人认为python是比较强大的分析工具,在网易云课堂上学习了python数据分析。做了案例,写下代码分析过程以及分析结论。
以下是电商打折套路的python数据分析项目。

# -*- coding: utf-8 -*-
"""
Created on Wed Jan  9 15:31:45 2019

@author: Administrator
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
from datetime import datetime

from bokeh.transform import jitter
warnings.filterwarnings('ignore')
from bokeh.plotting import figure ,show,output_file
from bokeh.models import ColumnDataSource
#导入数据
import os
os.chdir('C:\\Users\\Administrator\\Desktop\\python项目\\2电商打折')
#工作路径
df=pd.read_excel('双十一数据.xlsx',sheetname=0)
df.fillna(0,inplace=True)
df.index=df['update-time']
df['date']=df.index.day
#双十一当天在售商品占比数
data1=df[["id","title","店名","date"]]

d1=data1[["id","date"]].groupby(by="id").agg(["max","min"])["date"]
#统计不同商品的销售开始和结束日期
id_11=data1[data1["date"]==11]["id"]

d2=pd.DataFrame({"id":id_11,"双十一是否售卖":True})

id_data=pd.merge(d1,d2,left_index=True,right_on="id",how="left")
id_data.fillna(False,inplace=True)
#双十一当天参与活动的商品个数与比例
m=len(d1)
m_11=len(id_11)
m_pre=m_11/m
print("双十一当天参与活动的商品个数是%i个,比例是%.2f%%"%(m_11,m_pre*100))

结论:双十一当天参与活动的商品个数是405个,比例是74.18%

#------------------------------------------------------------------------
#商品销售分类
id_data["type"]="待分类"
id_data["type"][(id_data["min"]<11)&(id_data["max"]>11)]="A"
id_data["type"][(id_data["min"]<11)&(id_data["max"]==11)]="B"
id_data["type"][(id_data["min"]==11)&(id_data["max"]>11)]="C"
id_data["type"][(id_data["min"]==11)&(id_data["max"]==11)]="D"
id_data["type"][(id_data["双十一是否售卖"]==False)]="F"
id_data["type"][(id_data["max"]<11)]="E"
id_data["type"][(id_data["min"]>11)]="G"
result1=id_data["type"].value_counts()
result1=result1.loc[["A","B","C","D","E","F","G"]]
#不同类别商品比例
from bokeh.palettes import brewer
colori=brewer["YlGn"][7]
plt.axis("equal")
plt.pie(result1,labels=result1.index,autopct="%.2f%%",colors=colori,
        startangle=90,radius=1.5,counterclock=True)
#------------------------------------------------------------------------


#未参与双十一活动的商品去向如何

id_not11=id_data[id_data["双十一是否售卖"]==False]#暂时下架商品----id_con2
df_not11=id_not11[["id","type"]]

data_not11=pd.merge(df_not11,df,on="id",how="left")#分组字段不够用需要从原始总数据里借,所以要合并
#不合并就没法分组,没法分组,就没法统计

id_con1=id_data["id"][id_data["type"]=="F"].values

data_con2=data_not11[["id","title","date"]].groupby(by=["id","title"]).count()
title_count=data_con2.reset_index()["id"].value_counts()

id_con2=title_count[title_count>1].index

data_con3=data_not11[data_not11["title"].str.contains("预售")]
id_con3=data_con3["id"].value_counts().index

print("未参与双十一当天活动的商品里,%i个为暂时下架商品,%i个为重新上架商品,%i个为预售商品"%
      (len(id_con1),len(id_con2),len(id_con3))
      )

结论:未参与双十一当天活动的商品里,95个为暂时下架商品,155个为重新上架商品,69个为预售商品

#------------------------------------------------------------------------
#商品销售分类
id_data["type"]="待分类"
id_data["type"][(id_data["min"]<11)&(id_data["max"]>11)]="A"
id_data["type"][(id_data["min"]<11)&(id_data["max"]==11)]="B"
id_data["type"][(id_data["min"]==11)&(id_data["max"]>11)]="C"
id_data["type"][(id_data["min"]==11)&(id_data["max"]==11)]="D"
id_data["type"][(id_data["双十一是否售卖"]==False)]="F"
id_data["type"][(id_data["max"]<11)]="E"
id_data["type"][(id_data["min"]>11)]="G"
result1=id_data["type"].value_counts()
result1=result1.loc[["A","B","C","D","E","F","G"]]

python数据分析--------电商打折套路为例

#不同类别商品比例
from bokeh.palettes import brewer
colori=brewer["YlGn"][7]
plt.axis("equal")
plt.pie(result1,labels=result1.index,autopct="%.2f%%",colors=colori,
        startangle=90,radius=1.5,counterclock=True)
#------------------------------------------------------------------------


#未参与双十一活动的商品去向如何

id_not11=id_data[id_data["双十一是否售卖"]==False]#暂时下架商品----id_con2
df_not11=id_not11[["id","type"]]

data_not11=pd.merge(df_not11,df,on="id",how="left")#分组字段不够用需要从原始总数据里借,所以要合并
#不合并就没法分组,没法分组,就没法统计

id_con1=id_data["id"][id_data["type"]=="F"].values

data_con2=data_not11[["id","title","date"]].groupby(by=["id","title"]).count()
title_count=data_con2.reset_index()["id"].value_counts()

id_con2=title_count[title_count>1].index

data_con3=data_not11[data_not11["title"].str.contains("预售")]
id_con3=data_con3["id"].value_counts().index

print("未参与双十一当天活动的商品里,%i个为暂时下架商品,%i个为重新上架商品,%i个为预售商品"%
      (len(id_con1),len(id_con2),len(id_con3))
      )
#------------------------------------------------------------------------


data_11sale=id_11
data_11sale_final=np.hstack((data_11sale,id_con3))
result2_i=pd.DataFrame({"id":data_11sale_final})

x1=pd.DataFrame({"id":id_11})
x1_df=pd.merge(x1,df,on="id",how="left")
brand_11sale=x1_df.groupby(by="店名")["id"].count()

x2=pd.DataFrame({"id":id_con3})
x2_df=pd.merge(x2,df,on="id",how="left")
brand_ys=x2_df.groupby(by="店名")["id"].count()


result2_data=pd.DataFrame({"当天参与活动的商品数量":brand_11sale,
                           "预售商品数量":brand_ys})

result2_data["总量"]=result2_data["当天参与活动的商品数量"]+result2_data["预售商品数量"]

result2_data.sort_values(by="总量",ascending=False)


from bokeh.models import HoverTool
from bokeh.core.properties import value

lst_brand=result2_data.index.tolist()
lst_type=result2_data.columns.tolist()[:2]#result2_data的列名columns.取前2个

color=["red","green"]
result2_data.index.name="brand"

result2_data.columns=["sale_on_11","presell","sum"]

source1=ColumnDataSource(result2_data)

hover=HoverTool(
        tooltips=[
                ("品牌","@brand"),
                ("双十一当天参与活动商品数量","@sale_on_11"),
                ("预售商品数量","@presell"),
                ("商品总数","@sum")
                ])


output_file("project08.html")

p=figure(x_range=lst_brand,plot_width=900,plot_height=350,
         title="各个品牌参与双十一活动的情况",
  tools=[hover,"box_select,pan,reset,wheel_zoom,crosshair"]
         )

p.vbar(top="sum",x="brand",source=source1,width=0.9,
       #color=color,alpha=0.7,
       #legend=[value(x) for x in lst_type],
       muted_color="black", muted_alpha=0.2
       )
show(p)
#不同品牌销售数量情况
#------------------------------------------------------------------------

python数据分析--------电商打折套路为例

上一篇: 逻辑回归实战--R/python代码

下一篇: