Matplotlib _ 04 柱形图与盒图
程序员文章站
2022-07-14 10:06:43
...
import pandas as pd
import matplotlib.pyplot as plt
reviews = pd.read_csv("C:/Users/Amber/Documents/唐宇迪-机器学习课程资料/Python库代码(4个)/3-可视化库matpltlib/fandango_scores.csv")
num_cols = ['FILM','RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue','Fandango_Stars']
norm_reviews = reviews[num_cols]
print(norm_reviews[:5])
FILM RT_user_norm Metacritic_user_nom \
0 Avengers: Age of Ultron (2015) 4.3 3.55
1 Cinderella (2015) 4.0 3.75
2 Ant-Man (2015) 4.5 4.05
3 Do You Believe? (2015) 4.2 2.35
4 Hot Tub Time Machine 2 (2015) 1.4 1.70
IMDB_norm Fandango_Ratingvalue Fandango_Stars
0 3.90 4.5 5.0
1 3.55 4.5 5.0
2 3.90 4.5 5.0
3 2.70 4.5 5.0
4 2.55 3.0 3.5
import pandas as pd
import matplotlib.pyplot as plt
reviews = pd.read_csv("C:/Users/Amber/Documents/唐宇迪-机器学习课程资料/Python库代码(4个)/3-可视化库matpltlib/fandango_scores.csv")
num_cols = ['FILM','RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue','Fandango_Stars']
norm_reviews = reviews[num_cols]
fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
fandango_distribution = fandango_distribution.sort_index()
imd_distribution = norm_reviews['IMDB_norm'].value_counts()
imd_distribution = imd_distribution.sort_index()
print(fandango_distribution)
print(imd_distribution)
2.7 2
2.8 2
2.9 5
3.0 4
3.1 3
3.2 5
3.3 4
3.4 9
3.5 9
3.6 8
3.7 9
3.8 5
3.9 12
4.0 7
4.1 16
4.2 12
4.3 11
4.4 7
4.5 9
4.6 4
4.8 3
Name: Fandango_Ratingvalue, dtype: int64
2.00 1
2.10 1
2.15 1
2.20 1
2.30 2
2.45 2
2.50 1
2.55 1
2.60 2
2.70 4
2.75 5
2.80 2
2.85 1
2.90 1
2.95 3
3.00 2
3.05 4
3.10 1
3.15 9
3.20 6
3.25 4
3.30 9
3.35 7
3.40 1
3.45 7
3.50 4
3.55 7
3.60 10
3.65 5
3.70 8
3.75 6
3.80 3
3.85 4
3.90 9
3.95 2
4.00 1
4.05 1
4.10 4
4.15 1
4.20 2
4.30 1
Name: IMDB_norm, dtype: int64
import pandas as pd
import matplotlib.pyplot as plt
reviews = pd.read_csv("C:/Users/Amber/Documents/唐宇迪-机器学习课程资料/Python库代码(4个)/3-可视化库matpltlib/fandango_scores.csv")
num_cols = ['FILM','RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue','Fandango_Stars']
norm_reviews = reviews[num_cols]
fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
fandango_distribution = fandango_distribution.sort_index()
imd_distribution = norm_reviews['IMDB_norm'].value_counts()
imd_distribution = imd_distribution.sort_index()
# print(fandango_distribution)
# print(imd_distribution)
fig,ax = plt.subplots()
ax.hist(norm_reviews['Fandango_Ratingvalue'],range=(4,5),bins=20)
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
reviews = pd.read_csv("C:/Users/Amber/Documents/唐宇迪-机器学习课程资料/Python库代码(4个)/3-可视化库matpltlib/fandango_scores.csv")
num_cols = ['FILM','RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue','Fandango_Stars']
norm_reviews = reviews[num_cols]
fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
fandango_distribution = fandango_distribution.sort_index()
imd_distribution = norm_reviews['IMDB_norm'].value_counts()
imd_distribution = imd_distribution.sort_index()
# print(fandango_distribution)
# print(imd_distribution)
fig,ax = plt.subplots()
ax.hist(norm_reviews['Fandango_Ratingvalue'])
ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20)
# ax.hist(norm_reviews['Fandango_Ratingvalue'],range=(4,5),bins=20)
plt.show()
不指定bins
import pandas as pd
import matplotlib.pyplot as plt
reviews = pd.read_csv("C:/Users/Amber/Documents/唐宇迪-机器学习课程资料/Python库代码(4个)/3-可视化库matpltlib/fandango_scores.csv")
num_cols = ['FILM','RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue','Fandango_Stars']
norm_reviews = reviews[num_cols]
fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
fandango_distribution = fandango_distribution.sort_index()
imd_distribution = norm_reviews['IMDB_norm'].value_counts()
imd_distribution = imd_distribution.sort_index()
# print(fandango_distribution)
# print(imd_distribution)
fig,ax = plt.subplots()
ax.hist(norm_reviews['Fandango_Ratingvalue'])
# ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20)
# ax.hist(norm_reviews['Fandango_Ratingvalue'],range=(4,5),bins=20)
plt.show()
指定bins=20
import pandas as pd
import matplotlib.pyplot as plt
reviews = pd.read_csv("C:/Users/Amber/Documents/唐宇迪-机器学习课程资料/Python库代码(4个)/3-可视化库matpltlib/fandango_scores.csv")
num_cols = ['FILM','RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue','Fandango_Stars']
norm_reviews = reviews[num_cols]
fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
fandango_distribution = fandango_distribution.sort_index()
imd_distribution = norm_reviews['IMDB_norm'].value_counts()
imd_distribution = imd_distribution.sort_index()
# print(fandango_distribution)
# print(imd_distribution)
fig,ax = plt.subplots()
# ax.hist(norm_reviews['Fandango_Ratingvalue'])
ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20)
# ax.hist(norm_reviews['Fandango_Ratingvalue'],range=(4,5),bins=20)
plt.show()
指定4-5 区间内的内容,则4以前的不再显示
import pandas as pd
import matplotlib.pyplot as plt
reviews = pd.read_csv("C:/Users/Amber/Documents/唐宇迪-机器学习课程资料/Python库代码(4个)/3-可视化库matpltlib/fandango_scores.csv")
num_cols = ['FILM','RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue','Fandango_Stars']
norm_reviews = reviews[num_cols]
fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
fandango_distribution = fandango_distribution.sort_index()
imd_distribution = norm_reviews['IMDB_norm'].value_counts()
imd_distribution = imd_distribution.sort_index()
# print(fandango_distribution)
# print(imd_distribution)
fig,ax = plt.subplots()
# ax.hist(norm_reviews['Fandango_Ratingvalue'])
# ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20)
ax.hist(norm_reviews['Fandango_Ratingvalue'],range=(4,5),bins=20)
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
reviews = pd.read_csv("C:/Users/Amber/Documents/唐宇迪-机器学习课程资料/Python库代码(4个)/3-可视化库matpltlib/fandango_scores.csv")
num_cols = ['FILM','RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue','Fandango_Stars']
norm_reviews = reviews[num_cols]
fig = plt.figure(figsize=(5,20))
ax1 = fig.add_subplot(4,1,1)
ax2 = fig.add_subplot(4,1,2)
ax3 = fig.add_subplot(4,1,3)
ax4 = fig.add_subplot(4,1,4)
ax1.hist(norm_reviews['Fandango_Ratingvalue'],bins=20,range=(0,5))
ax1.set_title('Distribution of Fandango Rating')
ax1.set_ylim(0,50) # Define y limitations.
ax2.hist(norm_reviews['RT_user_norm'],bins=20,range=(0,5))
ax2.set_title('Distribution of Rotten Tomatoes Rating')
ax2.set_ylim(0,50)
ax3.hist(norm_reviews['Metacritic_user_nom'],bins=20,range=(0,5))
ax3.set_title('Distribution of Metacritic Rating')
ax3.set_ylim(0,50)
ax4.hist(norm_reviews['IMDB_norm'],bins=20,range=(0,5))
ax4.set_title('Distribution of IMDB Rating')
ax4.set_ylim(0,50)
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
reviews = pd.read_csv("C:/Users/Amber/Documents/唐宇迪-机器学习课程资料/Python库代码(4个)/3-可视化库matpltlib/fandango_scores.csv")
num_cols = ['FILM','RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue','Fandango_Stars']
norm_reviews = reviews[num_cols]
fig,ax = plt.subplots()
ax.boxplot(norm_reviews['RT_user_norm'])
ax.set_xticklabels(['Rotten Tomatoes'])
ax.set_ylim(0.5)
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
reviews = pd.read_csv("C:/Users/Amber/Documents/唐宇迪-机器学习课程资料/Python库代码(4个)/3-可视化库matpltlib/fandango_scores.csv")
num_cols = ['RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue']
norm_reviews = reviews[num_cols]
fig,ax = plt.subplots()
ax.boxplot(norm_reviews[num_cols].values)
ax.set_xticklabels(num_cols,rotation=90)
ax.set_ylim(0.5)
plt.show()
上一篇: Qt编写数据可视化大屏界面电子看板11-自定义控件
下一篇: Seaborn--多变量分析(四)
推荐阅读
-
python matplotlib画盒图、子图解决坐标轴标签重叠的问题
-
Matplotlib _ 04 柱形图与盒图
-
D3.js(v3)+react 实现带坐标与比例尺的柱形图 (V3版本)
-
D3.js(v3)+react 制作 一个带坐标与比例尺的柱形图 (V3版本)
-
Python matplotlib 绘制3D柱形图, 并修改颜色
-
用matplotlib画双柱形图,并画出横纵轴的箭头
-
matplotlib入门-盒图
-
D3.js(v3)+react 制作 一个带坐标与比例尺的柱形图 (V3版本)
-
matplotlib线型与坐标轴与四图(六)
-
C数据结构与算法-基础整理-图-04:深度优先搜索和广度优先