欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

panda例子, 解决中文乱码问题, 繪圖

程序员文章站 2022-06-05 20:17:18
...

解决中文乱码问题

双击 shift 搜索 matplotlibrc
194font.family         : sans-serif
206font.sans-serif      : SimHei, Microsoft YaHei, 系统文件自带

繪圖

from pandas import read_csv
import matplotlib.pyplot as plt

df = read_csv('./gapminder.tsv', sep='\t')

global_year1 = df.groupby('year')['lifeExp'].mean()
global_year2 = df.groupby('year')['gdpPercap'].mean()

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4))

ax1.plot(global_year1)
ax2.plot(global_year2)

ax1.set_title('全球按年平均年龄')
ax2.set_title('全球按年平均GDP')

ax1.legend(['平均年龄'])
ax2.legend(['平均GDP'])
#
plt.show()

panda例子, 解决中文乱码问题, 繪圖

import pandas
import matplotlib.pyplot as plot

df = pandas.read_csv("./gapminder.tsv", sep='\t')

# 全球年平均寿命
global_yearly_life_expectancy = df.groupby("year")["lifeExp"].mean()

print(global_yearly_life_expectancy)
# year
# 1952    49.057620
# 1957    51.507401
# ................
# 2007    67.007423
# Name: lifeExp, dtype: float64


# 使用matplotlib可视化显示--一维表数据
global_yearly_life_expectancy.plot()

# 显示示例  或 plot.legend()   不放列表时,用默认的lifeExp
plot.legend(["平均寿命"])
# plot.legend()
# 显示标题
plot.title("全球年平均寿命")
# 显示
plot.show()

panda例子, 解决中文乱码问题, 繪圖

panda例子, 解决中文乱码问题, 繪圖

df = pandas.read_csv('./gapminder.tsv', sep='\t')

for item in zip(df.columns, df.dtypes):
    print(item)

    #output:
    # ('country', dtype('O'))
    # ('continent', dtype('O'))
    # ('year', dtype('int64'))
    # ('lifeExp', dtype('float64'))
    # ('pop', dtype('int64'))
    # ('gdpPercap', dtype('float64'))

res = dict(zip(df.columns, df.dtypes))
print(res)

#output:
# {'country': dtype('O'), 'continent': dtype('O'),
#  'year': dtype('int64'), 'lifeExp': dtype('float64'),
# 'pop': dtype('int64'), 'gdpPercap': dtype('float64')}

country = res.get('country')
print(country)

#output:
# object

平均值,出現次數?

global_year_lifeExp = df.groupby('year')['lifeExp'].mean()
# print(global_year_lifeExp)


#按year, continent分组后的  lifeExp,gdpPercap的平均值, 即每个年份下,每个大洲的  lifeExp,gdpPercap的平均值 
num_global_year_lifeExp = df.groupby(['year','continent'])[['lifeExp','gdpPercap']].mean()
# print(num_global_year_lifeExp)


# print(num_global_year_lifeExp.reset_index())  数据还原到原来格式


# 按continent分组后, 每个country下出现的次数统计
print(df.groupby('continent')['country'].nunique())

一,二维数据


print(df.loc[0:3, ['year', 'gdpPercap']])  #str loc   得4行
print(df.iloc[0:3, [2, 5]])                #int loc   得3行
#--------------------------------------

# x, y 轴连续数
print(df.iloc[0:6,3:6])       # x, y 轴连续数,int loc

# x, y 轴的非连续数
print(df.loc[:,['country','year']])

print(df.iloc[:,[2,6]].head())

print(df.loc[0,['year']])  #单行Series
print(type(df.loc[0,['year']])) #<class 'pandas.core.series.Series'>
haha = df['country']
print(haha)
# ----many-------    Name: country, Length: 1704, dtype: object


print(df.shape)
#  (1704, 6)



print(df.tail(3))
print(df.tail(100))

print(df.head())
print(df.head(18))
print(df.loc[0])
# country      Afghanistan
# continent           Asia
# year                1952
# lifeExp           28.801
# pop              8425333
# gdpPercap        779.445
# Name: 0, dtype: object


print(df.loc[3])
# country      Afghanistan
# continent           Asia
# year                1967
# lifeExp            34.02
# pop             11537966
# gdpPercap        836.197
# Name: 3, dtype: object


print(df.iloc[-1])     # last = df.shape[0]-1,  print(df.loc[last])

# country      Zimbabwe
# continent      Africa
# year             2007
# lifeExp        43.487
# pop          12311143
# gdpPercap     469.709
# Name: 1703, dtype: object



print(df.iloc[[0,-2,-1]])   ## last = df.shape[0]-1,  print(df.iloc[[0,2,last]])

#           country continent  year  lifeExp       pop   gdpPercap
# 0     Afghanistan      Asia  1952   28.801   8425333  779.445314
# 1702     Zimbabwe    Africa  2002   39.989  11926563  672.038623
# 1703     Zimbabwe    Africa  2007   43.487  12311143  469.709298

#得到year,pop 的....n行
subset = df.loc[0:3,["year","pop"]]
# subset = df.loc[:,["year","pop"]]

#得到2,4和最后一列数据
subset2 = df.iloc[:,[2,4,-1]]
# print(subset2)

# #得到下标是3到6的列
subset3 = df.iloc[:,3:6]
print(subset3.head())
#    lifeExp       pop   gdpPercap
# 0   28.801   8425333  779.445314
# 1   30.332   9240934  820.853030
# 2   31.997  10267083  853.100710
# 3   34.020  11537966  836.197138
# 4   36.088  13079460  739.981106
相关标签: pandas