欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

Matplotlib _ 04 柱形图与盒图

程序员文章站 2022-07-14 10:06:43
...
import pandas as pd
import matplotlib.pyplot as plt

reviews = pd.read_csv("C:/Users/Amber/Documents/唐宇迪-机器学习课程资料/Python库代码(4个)/3-可视化库matpltlib/fandango_scores.csv")
num_cols = ['FILM','RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue','Fandango_Stars']
norm_reviews = reviews[num_cols]
print(norm_reviews[:5])
                             FILM  RT_user_norm  Metacritic_user_nom  \
0  Avengers: Age of Ultron (2015)           4.3                 3.55   
1               Cinderella (2015)           4.0                 3.75   
2                  Ant-Man (2015)           4.5                 4.05   
3          Do You Believe? (2015)           4.2                 2.35   
4   Hot Tub Time Machine 2 (2015)           1.4                 1.70   

   IMDB_norm  Fandango_Ratingvalue  Fandango_Stars  
0       3.90                   4.5             5.0  
1       3.55                   4.5             5.0  
2       3.90                   4.5             5.0  
3       2.70                   4.5             5.0  
4       2.55                   3.0             3.5  
import pandas as pd
import matplotlib.pyplot as plt

reviews = pd.read_csv("C:/Users/Amber/Documents/唐宇迪-机器学习课程资料/Python库代码(4个)/3-可视化库matpltlib/fandango_scores.csv")
num_cols = ['FILM','RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue','Fandango_Stars']
norm_reviews = reviews[num_cols]

fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
fandango_distribution = fandango_distribution.sort_index()

imd_distribution = norm_reviews['IMDB_norm'].value_counts()
imd_distribution = imd_distribution.sort_index()

print(fandango_distribution)
print(imd_distribution)
2.7     2
2.8     2
2.9     5
3.0     4
3.1     3
3.2     5
3.3     4
3.4     9
3.5     9
3.6     8
3.7     9
3.8     5
3.9    12
4.0     7
4.1    16
4.2    12
4.3    11
4.4     7
4.5     9
4.6     4
4.8     3
Name: Fandango_Ratingvalue, dtype: int64
2.00     1
2.10     1
2.15     1
2.20     1
2.30     2
2.45     2
2.50     1
2.55     1
2.60     2
2.70     4
2.75     5
2.80     2
2.85     1
2.90     1
2.95     3
3.00     2
3.05     4
3.10     1
3.15     9
3.20     6
3.25     4
3.30     9
3.35     7
3.40     1
3.45     7
3.50     4
3.55     7
3.60    10
3.65     5
3.70     8
3.75     6
3.80     3
3.85     4
3.90     9
3.95     2
4.00     1
4.05     1
4.10     4
4.15     1
4.20     2
4.30     1
Name: IMDB_norm, dtype: int64
import pandas as pd
import matplotlib.pyplot as plt

reviews = pd.read_csv("C:/Users/Amber/Documents/唐宇迪-机器学习课程资料/Python库代码(4个)/3-可视化库matpltlib/fandango_scores.csv")
num_cols = ['FILM','RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue','Fandango_Stars']
norm_reviews = reviews[num_cols]

fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
fandango_distribution = fandango_distribution.sort_index()

imd_distribution = norm_reviews['IMDB_norm'].value_counts()
imd_distribution = imd_distribution.sort_index()

# print(fandango_distribution)
# print(imd_distribution)

fig,ax = plt.subplots()
ax.hist(norm_reviews['Fandango_Ratingvalue'],range=(4,5),bins=20)

plt.show()

Matplotlib _ 04 柱形图与盒图

import pandas as pd
import matplotlib.pyplot as plt

reviews = pd.read_csv("C:/Users/Amber/Documents/唐宇迪-机器学习课程资料/Python库代码(4个)/3-可视化库matpltlib/fandango_scores.csv")
num_cols = ['FILM','RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue','Fandango_Stars']
norm_reviews = reviews[num_cols]

fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
fandango_distribution = fandango_distribution.sort_index()

imd_distribution = norm_reviews['IMDB_norm'].value_counts()
imd_distribution = imd_distribution.sort_index()

# print(fandango_distribution)
# print(imd_distribution)

fig,ax = plt.subplots()
ax.hist(norm_reviews['Fandango_Ratingvalue'])
ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20)
# ax.hist(norm_reviews['Fandango_Ratingvalue'],range=(4,5),bins=20)

plt.show()

Matplotlib _ 04 柱形图与盒图

Matplotlib _ 04 柱形图与盒图

Matplotlib _ 04 柱形图与盒图

不指定bins

import pandas as pd
import matplotlib.pyplot as plt

reviews = pd.read_csv("C:/Users/Amber/Documents/唐宇迪-机器学习课程资料/Python库代码(4个)/3-可视化库matpltlib/fandango_scores.csv")
num_cols = ['FILM','RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue','Fandango_Stars']
norm_reviews = reviews[num_cols]

fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
fandango_distribution = fandango_distribution.sort_index()

imd_distribution = norm_reviews['IMDB_norm'].value_counts()
imd_distribution = imd_distribution.sort_index()

# print(fandango_distribution)
# print(imd_distribution)

fig,ax = plt.subplots()
ax.hist(norm_reviews['Fandango_Ratingvalue'])
# ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20)
# ax.hist(norm_reviews['Fandango_Ratingvalue'],range=(4,5),bins=20)

plt.show()

Matplotlib _ 04 柱形图与盒图

指定bins=20

import pandas as pd
import matplotlib.pyplot as plt

reviews = pd.read_csv("C:/Users/Amber/Documents/唐宇迪-机器学习课程资料/Python库代码(4个)/3-可视化库matpltlib/fandango_scores.csv")
num_cols = ['FILM','RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue','Fandango_Stars']
norm_reviews = reviews[num_cols]

fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
fandango_distribution = fandango_distribution.sort_index()

imd_distribution = norm_reviews['IMDB_norm'].value_counts()
imd_distribution = imd_distribution.sort_index()

# print(fandango_distribution)
# print(imd_distribution)

fig,ax = plt.subplots()
# ax.hist(norm_reviews['Fandango_Ratingvalue'])
ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20)
# ax.hist(norm_reviews['Fandango_Ratingvalue'],range=(4,5),bins=20)

plt.show()

Matplotlib _ 04 柱形图与盒图

指定4-5 区间内的内容,则4以前的不再显示

import pandas as pd
import matplotlib.pyplot as plt

reviews = pd.read_csv("C:/Users/Amber/Documents/唐宇迪-机器学习课程资料/Python库代码(4个)/3-可视化库matpltlib/fandango_scores.csv")
num_cols = ['FILM','RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue','Fandango_Stars']
norm_reviews = reviews[num_cols]

fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
fandango_distribution = fandango_distribution.sort_index()

imd_distribution = norm_reviews['IMDB_norm'].value_counts()
imd_distribution = imd_distribution.sort_index()

# print(fandango_distribution)
# print(imd_distribution)

fig,ax = plt.subplots()
# ax.hist(norm_reviews['Fandango_Ratingvalue'])
# ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20)
ax.hist(norm_reviews['Fandango_Ratingvalue'],range=(4,5),bins=20)

plt.show()

Matplotlib _ 04 柱形图与盒图

import pandas as pd
import matplotlib.pyplot as plt

reviews = pd.read_csv("C:/Users/Amber/Documents/唐宇迪-机器学习课程资料/Python库代码(4个)/3-可视化库matpltlib/fandango_scores.csv")
num_cols = ['FILM','RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue','Fandango_Stars']
norm_reviews = reviews[num_cols]

fig = plt.figure(figsize=(5,20))
ax1 = fig.add_subplot(4,1,1)
ax2 = fig.add_subplot(4,1,2)
ax3 = fig.add_subplot(4,1,3)
ax4 = fig.add_subplot(4,1,4)

ax1.hist(norm_reviews['Fandango_Ratingvalue'],bins=20,range=(0,5))
ax1.set_title('Distribution of Fandango Rating')
ax1.set_ylim(0,50) # Define y limitations.

ax2.hist(norm_reviews['RT_user_norm'],bins=20,range=(0,5))
ax2.set_title('Distribution of Rotten Tomatoes Rating')
ax2.set_ylim(0,50)

ax3.hist(norm_reviews['Metacritic_user_nom'],bins=20,range=(0,5))
ax3.set_title('Distribution of Metacritic Rating')
ax3.set_ylim(0,50)

ax4.hist(norm_reviews['IMDB_norm'],bins=20,range=(0,5))
ax4.set_title('Distribution of IMDB Rating')
ax4.set_ylim(0,50)

plt.show()

Matplotlib _ 04 柱形图与盒图

Matplotlib _ 04 柱形图与盒图

import pandas as pd
import matplotlib.pyplot as plt

reviews = pd.read_csv("C:/Users/Amber/Documents/唐宇迪-机器学习课程资料/Python库代码(4个)/3-可视化库matpltlib/fandango_scores.csv")
num_cols = ['FILM','RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue','Fandango_Stars']
norm_reviews = reviews[num_cols]

fig,ax = plt.subplots()
ax.boxplot(norm_reviews['RT_user_norm'])
ax.set_xticklabels(['Rotten Tomatoes'])
ax.set_ylim(0.5)

plt.show()

Matplotlib _ 04 柱形图与盒图

import pandas as pd
import matplotlib.pyplot as plt

reviews = pd.read_csv("C:/Users/Amber/Documents/唐宇迪-机器学习课程资料/Python库代码(4个)/3-可视化库matpltlib/fandango_scores.csv")
num_cols = ['RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue']
norm_reviews = reviews[num_cols]

fig,ax = plt.subplots()
ax.boxplot(norm_reviews[num_cols].values)
ax.set_xticklabels(num_cols,rotation=90)
ax.set_ylim(0.5)

plt.show()

Matplotlib _ 04 柱形图与盒图

相关标签: Python