数据分析-01
程序员文章站
2024-03-07 18:08:39
...
import pandas as pd
import numpy as np
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
# 文件目录,相对路径
INPUT_PATH = './'
# 文件读取行数
#MAX_ROWS = 100000
#数据处理
#巡游车gps
taxigps2019 = pd.read_csv(INPUT_PATH + 'taxiGps20190603.csv', #nrows=MAX_ROWS,
dtype = {
'DRIVING_DIRECTION': np.uint16,
'OPERATING_STATUS': np.uint8,
'LONGITUDE': np.float32,
'LATITUDE': np.float32,
'GPS_SPEED': np.float16
})
#taxigps2019.info()
taxigps2019 = taxigps2019[taxigps2019.columns[::-1]]
taxigps2019.sort_values(by=['CARNO','GPS_TIME'], inplace=True)
taxigps2019.reset_index(inplace=True, drop=True)
#taxigps2019.head()
#巡游车订单
taxiorder2019 = pd.read_csv(INPUT_PATH + 'taxiOrder20190603.csv', #nrows=MAX_ROWS,
dtype = {
'GETON_LONGITUDE': np.float32,
'GETON_LATITUDE': np.float32,
'GETOFF_LONGITUDE': np.float32,
'GETOFF_LATITUDE': np.float32,
'PASS_MILE': np.float16,
'NOPASS_MILE': np.float16,
'WAITING_TIME': np.float16
})
taxiorder2019 = taxiorder2019.rename(columns={'CAR_NO':'CARNO'})
taxiorder2019.sort_values(by=['CARNO','GETON_DATE'], inplace=True)
taxiorder2019.reset_index(inplace=True, drop=True)
#网约车gps
wycgps2019 = pd.read_csv(INPUT_PATH + 'wycGps20190603.csv', #nrows=MAX_ROWS,
dtype={
'LONGITUDE': np.float32,
'LATITUDE': np.float32,
'SPEED': np.float16
})
wycgps2019 = wycgps2019.rename(columns={'CAR_NO':'CARNO'})
wycgps2019 = wycgps2019[wycgps2019.columns[::-1]]
wycgps2019.sort_values(by=['CARNO','POSITION_TIME'], inplace=True)
wycgps2019['BIZ_STATUS'] = wycgps2019['BIZ_STATUS'].fillna(-1).astype(np.int8)
wycgps2019['ENCRYPT'] = wycgps2019['ENCRYPT'].fillna(-1).astype(np.int8)
#网约车订单
wycorder2019 = pd.read_csv(INPUT_PATH + 'wycOrder20190603.csv', #nrows=MAX_ROWS,
dtype={
'DEP_LONGITUDE': np.float32,
'DEP_LATITUDE': np.float32,
'DEST_LONGITUDE': np.float32,
'DEST_LATITUDE': np.float32,
})
wycorder2019 = wycorder2019.rename(columns={'CAR_NO':'CARNO'})
wycorder2019.sort_values(by=['CARNO','DEP_TIME'], inplace=True)
#统计巡游车GPS数据在20190603中包含多少俩出租车
print("1.")
print(taxigps2019['CARNO'].nunique())
#统计网约车GPS数据在20190603中包含多少俩网约车
print("2.")
print(wycgps2019['CARNO'].nunique())
#统计巡游车订单数据在20190603中上车经纬度的最大最小值
print("3.1")
print(taxiorder2019['GETON_LONGITUDE'].max())
print(taxiorder2019['GETON_LATITUDE'].max())
print("3.2")
print(min(taxiorder2019[taxiorder2019['GETON_LONGITUDE']>0]['GETON_LONGITUDE']))
print(min(taxiorder2019[taxiorder2019['GETON_LATITUDE']>0]['GETON_LATITUDE']))
#统计网约车订单数据集在20190603中下车经纬度最常见的位置
print("4.")
for item in taxiorder2019['GETOFF_LATITUDE']:
round(item,3)
for item in taxiorder2019['GETOFF_LONGITUDE']:
round(item,3)
position=pd.concat([taxiorder2019['GETOFF_LONGITUDE'],
taxiorder2019['GETOFF_LATITUDE']])
print(position.value_counts())
第四题最后的结果还是不太对