欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

python读取多种格式文件(txt,csv,json,sqlite)

程序员文章站 2022-05-28 15:18:10
...

1、txt
①读取全部内容

txt_filename = './files/python_baidu.txt'
# 打开文件
file_obj = open(txt_filename, 'r', encoding='utf-8')
# 读取整个文件内容
all_content = file_obj.read()
# 关闭文件
file_obj.close()
print(all_content)

python读取多种格式文件(txt,csv,json,sqlite)
②按行读取

txt_filename = './files/python_baidu.txt'
# 打开文件
file_obj = open(txt_filename, 'r', encoding='utf-8')
# 逐行读取
line1 = file_obj.readline()
print(line1)
# 继续读下一行,游标自动指向第二行
line2 = file_obj.readline()
print(line2)
# 关闭文件
file_obj.close()

③读取返回列表

txt_filename = './files/python_baidu.txt'
# 打开文件
file_obj = open(txt_filename, 'r', encoding='utf-8')
lines = file_obj.readlines()
for i, line in enumerate(lines):
    print ('{}: {}'.format(i, line))
# 关闭文件
file_obj.close()

python读取多种格式文件(txt,csv,json,sqlite)
④写操作

txt_filename = './files/test_write.txt'
# 打开文件
file_obj = open(txt_filename, 'w', encoding='utf-8')
# 写入全部内容
file_obj.write("《Python数据分析》")
file_obj.close()

⑤按行写入

txt_filename = './files/test_write.txt'
# 打开文件
file_obj = open(txt_filename, 'w', encoding='utf-8')
# 写入字符串列表
lines = ['这是第%i行\n' %n for n in range(100)]
file_obj.writelines(lines)
file_obj.close()

⑦with

txt_filename = './files/test_write.txt'
with open(txt_filename, 'r', encoding='utf-8') as f_obj:
    print(f_obj.read())
#不需要关闭,会自动异常处理

2、CSV(以纯文本存储表格,逗号为分隔符)
①pandas读CSV

import pandas as pd
filename = './files/gender_country.csv'
df = pd.read_csv(filename, encoding='utf-16')
print(type(df))
print(df.head())#head预览前五行

python读取多种格式文件(txt,csv,json,sqlite)
②pandas写入CSV

filename = './files/pandas_output.csv'
df.to_csv(filename, index=None, encoding='utf-8')

3、JSON {key1:val1,key2:val2}
①读取

import json
filename = './files/global_temperature.json'
with open(filename, 'r') as f_obj:
    json_data = json.load(f_obj)
# 返回值是dict类型
print(type(json_data))

②读取keys和values

#print(json_data['data'].keys())
print(json_data['data'].values())

③转换成CSV

year_str_lst = json_data['data'].keys()#读出来都是str
year_lst = [int(year_str) for year_str in year_str_lst]#如果有数字,转换成int
print(year_lst)
import pandas as pd
# 构建 dataframe
year_se = pd.Series(year_lst, name = 'year')#得到一维列数据
temp_se = pd.Series(temp_lst, name = 'temperature')
result_df = pd.concat([year_se, temp_se], axis = 1)#得到二维dataframe,axis为0是两列竖着排,1是并排放
print(result_df.head())
# 保存csv
result_df.to_csv('./files/json_to_csv.csv', index = None)#index=none没有行索引

python读取多种格式文件(txt,csv,json,sqlite)
④写成json

book_dict = [{'书名':'无声告白', '作者':'伍绮诗'}, {'书名':'我不是潘金莲', '作者':'刘震云'}, {'书名':'沉默的大多数 (王小波集)', '作者':'王小波'}]
filename = './files/json_output.json'
with open(filename, 'w', encoding='utf-8') as f_obj:
    f_obj.write(json.dumps(book_dict, ensure_ascii=False))

4、sqlite

import sqlite3
db_path = './files/test.sqlite'
conn = sqlite3.connect(db_path)
cur = conn.cursor()
conn.text_factory = str  # 处理中文

cur.execute("DROP TABLE IF EXISTS book")
cur.execute("CREATE TABLE book(id INT, name TEXT, price DOUBLE)")

#逐条插入数据
cur.execute("INSERT INTO book VALUES(1,'肖秀荣考研书系列:肖秀荣(2017)考研政治命题人终极预测4套卷',14.40)")
cur.execute("INSERT INTO book VALUES(2,'法医秦明作品集:幸存者+清道夫+尸语者+无声的证词+第十一根手指(套装共5册) (两种封面随机发货)',100.00)")
cur.execute("INSERT INTO book VALUES(3,'活着本来单纯:丰子恺散文漫画精品集(收藏本)',30.90)")
cur.execute("INSERT INTO book VALUES(4,'自在独行:贾平凹的独行世界',26.80)")
cur.execute("INSERT INTO book VALUES(5,'当你的才华还撑不起你的梦想时',23.00)")
cur.execute("INSERT INTO book VALUES(6,'巨人的陨落(套装共3册)',84.90)")
cur.execute("INSERT INTO book VALUES(7,'孤独深处(收录雨果奖获奖作品《北京折叠》)',21.90)")
cur.execute("INSERT INTO book VALUES(8,'世界知名企业员工指定培训教材:所谓情商高,就是会说话',22.00)")

#批量插入数据
books = (
    (9, '人间草木', 30.00),
    (10,'你的善良必须有点锋芒', 20.50),
    (11, '这么慢,那么美', 24.80),
    (12, '考拉小巫的英语学习日记:写给为梦想而奋斗的人(全新修订版)', 23.90)
)#元组里套元组
cur.executemany("INSERT INTO book VALUES(?, ?, ?)", books)

#提交
conn.commit()

#查找数据
cur.execute('SELECT * FROM book')
rows = cur.fetchall()
# 通过索引号访问
for row in rows:
    print('序号: {}, 书名: {}, 价格: {}'.format(row[0], row[1], row[2]))

#关闭
conn.close()