df 列操作,行操作 :增,删,改,查,计算,列过滤
程序员文章站
2022-06-13 21:16:55
1 ,列操作,查 :data[“Age”]代码 :if __name__ == '__main__': # 全列显示 : pd.set_option('display.max_columns', None) # 读文件 csv data = pd.read_csv("titanic_train.csv") df_age = data["Age"] print(df_age)=======================================...
1 ,列操作,查 :data[“Age”]
- 代码 :
if __name__ == '__main__':
# 全列显示 :
pd.set_option('display.max_columns', None)
# 读文件 csv
data = pd.read_csv("titanic_train.csv")
df_age = data["Age"]
print(df_age)
==============================================
0 22.0
1 38.0
2 26.0
2 ,列操作,计算 :res = df_age * 2
- 目的 : 字段 * 2
- 代码 :
if __name__ == '__main__':
# 全列显示 :
pd.set_option('display.max_columns', None)
# 读文件 csv
data = pd.read_csv("titanic_train.csv")
df_age = data["Age"]
res = df_age * 2
print(df_age)
print(res)
==============================================
0 22.0
1 38.0
2 26.0
....
==================
0 44.0
1 76.0
2 52.0
...
3 ,列操作 : 增 data[“double_age”] = res
- 目的 : 将 double_age 列新增到原数据中
- 代码 :
if __name__ == '__main__':
# 全列显示 :
pd.set_option('display.max_columns', None)
# 读文件 csv
data = pd.read_csv("titanic_train.csv")
df_age = data["Age"]
res = df_age * 2
data["double_age"] = res
print(data.head(3))
=========================================
Age double_age ....
22.0 44.0
38.0 76.0
26.0 52.0
....
4 ,列操作,删除列 :data.drop([“PassengerId”],axis=1)
- 代码 :
if __name__ == '__main__':
# 全列显示 :
# pd.set_option('display.max_columns', None)
# 读文件 csv
data = pd.read_csv("titanic_train.csv")
print(data.head(5))
res = data.drop(["PassengerId","Survived"],axis=1)
print(res.head(5))
==================================================
PassengerId Survived Pclass ... Fare Cabin Embarked
0 1 0 3 ... 7.2500 NaN S
1 2 1 1 ... 71.2833 C85 C
2 3 1 3 ... 7.9250 NaN S
3 4 1 1 ... 53.1000 C123 S
4 5 0 3 ... 8.0500 NaN S
[5 rows x 12 columns]
Pclass Name ... Cabin Embarked
0 3 Braund, Mr. Owen Harris ... NaN S
1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... ... C85 C
2 3 Heikkinen, Miss. Laina ... NaN S
3 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) ... C123 S
4 3 Allen, Mr. William Henry ... NaN S
[5 rows x 10 columns]
5 ,列操作,改列名 :data.rename(…)
- 精华代码 :
data.rename(columns={"PassengerId":"PassengerIdOMG"},inplace=True)
- 目的 :
1 ,将 PassengerId 列名修改为 PassengerIdOMG - 代码 :
if __name__ == '__main__':
# 全列显示 :
# pd.set_option('display.max_columns', None)
# 读文件 csv
data = pd.read_csv("titanic_train.csv")
print(data.head(5))
data.rename(columns={"PassengerId":"PassengerIdOMG"},inplace=True)
print(data.head(5))
===========================================
PassengerId Survived Pclass ... Fare Cabin Embarked
0 1 0 3 ... 7.2500 NaN S
1 2 1 1 ... 71.2833 C85 C
2 3 1 3 ... 7.9250 NaN S
3 4 1 1 ... 53.1000 C123 S
4 5 0 3 ... 8.0500 NaN S
[5 rows x 12 columns]
PassengerIdOMG Survived Pclass ... Fare Cabin Embarked
0 1 0 3 ... 7.2500 NaN S
1 2 1 1 ... 71.2833 C85 C
2 3 1 3 ... 7.9250 NaN S
3 4 1 1 ... 53.1000 C123 S
4 5 0 3 ... 8.0500 NaN S
[5 rows x 12 columns]
6 ,行操作,查 1 行 : data.loc[0]
- 代码 :
if __name__ == '__main__':
# 全列显示 :
# pd.set_option('display.max_columns', None)
# 读文件 csv
data = pd.read_csv("titanic_train.csv")
res = data.loc[0]
print(data.head(3))
print(res)
========================================================================
PassengerId Survived Pclass ... Fare Cabin Embarked
0 1 0 3 ... 7.2500 NaN S
1 2 1 1 ... 71.2833 C85 C
2 3 1 3 ... 7.9250 NaN S
[3 rows x 12 columns]
==========================================
PassengerId 1
Survived 0
Pclass 3
Name Braund, Mr. Owen Harris
Sex male
Age 22
SibSp 1
Parch 0
Ticket A/5 21171
Fare 7.25
Cabin NaN
Embarked S
Name: 0, dtype: object
7 ,行操作,计算 : res01 = res * 2
- 代码 : 乘 2
if __name__ == '__main__':
# 全列显示 :
# pd.set_option('display.max_columns', None)
# 读文件 csv
data = pd.read_csv("titanic_train.csv")
res = data.loc[0]
res01 = res * 2
print(res)
print(res01)
================================
PassengerId 1
Survived 0
Pclass 3
Name Braund, Mr. Owen Harris
Sex male
Age 22
SibSp 1
Parch 0
Ticket A/5 21171
Fare 7.25
Cabin NaN
Embarked S
Name: 0, dtype: object
==================================================================
PassengerId 2
Survived 0
Pclass 6
Name Braund, Mr. Owen HarrisBraund, Mr. Owen Harris
Sex malemale
Age 44
SibSp 2
Parch 0
Ticket A/5 21171A/5 21171
Fare 14.5
Cabin NaN
Embarked SS
Name: 0, dtype: object
8 ,行操作,增 : data.append(res01, ignore_index=True)
- 目的 :
1 ,将最后一行 * 2
2 ,再添加回去,成为新的最后一行 - 代码 :
if __name__ == '__main__':
# 全列显示 :
# pd.set_option('display.max_columns', None)
# 读文件 csv
data = pd.read_csv("titanic_train.csv")
res = data.loc[890]
# 将数据 * 2
res01 = res * 2
# 将数据加入到 data 中
data = data.append(res01, ignore_index=True)
print(data.tail(3))
==============================================
PassengerId Survived Pclass ... Fare Cabin Embarked
889 890 1 1 ... 30.00 C148 C
890 891 0 3 ... 7.75 NaN Q
891 1782 0 6 ... 15.50 NaN QQ
9 ,行操作,删 : res02 = res01.drop(2)
- 目的 : 利用索引删除指定行
- 代码 :
if __name__ == '__main__':
# 全列显示 :
# pd.set_option('display.max_columns', None)
# 读文件 csv
data = pd.read_csv("titanic_train.csv")
res = data.loc[890]
# 将数据 * 2
res01 = res * 2
# 将数据加入到 data 中
data = data.append(res01, ignore_index=True)
res01 = data.tail(3)
print(res01)
res01.reset_index(inplace=True,drop=True)
print(res01)
# 删除第三行 ( 索引为 2 的那行 )
res02 = res01.drop(2)
print(res02)
===================================================
889 890 1 1 ... 30.00 C148 C
890 891 0 3 ... 7.75 NaN Q
891 1782 0 6 ... 15.50 NaN QQ
[3 rows x 12 columns]
PassengerId Survived Pclass ... Fare Cabin Embarked
0 890 1 1 ... 30.00 C148 C
1 891 0 3 ... 7.75 NaN Q
2 1782 0 6 ... 15.50 NaN QQ
[3 rows x 12 columns]
PassengerId Survived Pclass ... Fare Cabin Embarked
0 890 1 1 ... 30.00 C148 C
1 891 0 3 ... 7.75 NaN Q
[2 rows x 12 columns]
10 ,过滤列 : res01[res01[“Age”]%2==0]
- 目的 : 留下年龄为偶数的数
- 思想 : True 留下,False 剔除
- 代码 :
if __name__ == '__main__':
# 读文件 csv
data = pd.read_csv("titanic_train.csv")
# 年龄字段 :
df_age = data["Age"].to_frame()
# 清除空值
res01 = df_age.dropna()
# 留下偶数
res02 = res01[res01["Age"]%2==0]
print(res02)
==============================
Age
0 22.0
1 38.0
2 26.0
本文地址:https://blog.csdn.net/qq_34319644/article/details/107117338