python dataframe两列分组计数并取最大
程序员文章站
2024-01-06 10:15:28
...
python dataframe两列分组计数并取最大
import pandas as pd
import numpy as np
df = pd.DataFrame({'job': ['sale', 'sale', 'sale', 'market', 'market', 'market', 'market', 'market'],
'source': ['A', 'A', 'C', 'D', 'E', 'C', 'B', 'C']})
print(df)
print('1'*60)
# #groupby count方式一
df_groupby = df.groupby(['job', 'source'])['source'].count().reset_index(name='count') #加不上set_index
print(df_groupby)
print('2'*60)
# groupby count方式二
df_groupby = df['source'].groupby(df['job']).value_counts() .reset_index(name='count')
print(df_groupby)
print('3'*60)
haha = df_groupby.sort_values(['job', 'count'], ascending=False).groupby('job').head(1)#head参数可改,分组选取前n
print(haha)
结果如下:
job source
0 sale A
1 sale A
2 sale C
3 market D
4 market E
5 market C
6 market B
7 market C
111111111111111111111111111111111111111111111111111111111111
job source count
0 market B 1
1 market C 2
2 market D 1
3 market E 1
4 sale A 2
5 sale C 1
222222222222222222222222222222222222222222222222222222222222
job source count
0 market C 2
1 market B 1
2 market D 1
3 market E 1
4 sale A 2
5 sale C 1
333333333333333333333333333333333333333333333333333333333333
job source count
4 sale A 2
0 market C 2
Process finished with exit code 0