荐 【MOOC嵩天Python语言程序设计】第七周 文件和数据格式化 7.1~7.6
第七周 文件和数据格式化 7.1~7.6
-7.1文件的使用
tf = open("f.txt", "rt")
print(tf.readline())
tf.close()
print(tf.readline())
UnicodeDecodeError: ‘gbk’ codec can’t decode byte 0xad in position 3: illegal multibyte sequence
tf = open("f.txt", "rt", encoding='utf-8')
print(tf.readline())
tf.close()
“中国是个伟大的国家!”
tf = open("f.txt", "rb")
print(tf.readline())
tf.close()
b’"\xe4\xb8\xad\xe5\x9b\xbd\xe6\x98\xaf\xe4\xb8\xaa\xe4\xbc\x9f\xe5\xa4\xa7\xe7\x9a\x84\xe5\x9b\xbd\xe5\xae\xb6"’
>>> tf = open("C://Users//admin//Desktop//f.txt", "rt", encoding='utf-8')
>>> print(tf.read(2))
"中
>>> tf.readline()
'国是个伟大的国家!"'
>>> tf.read()
''
>>> tf.readline()
''
>>> tf.readlines()
[]
文本的全文操作
遍历全文本 一
fname = input("请输入要打开的文件名称:")
fo = open(fname, "r", encoding='utf-8')
txt = fo.read()
fo.close()
遍历全文本 二
fname = input("请输入要打开的文件名称:")
fo = open(fname, "r", encoding='utf-8')
txt = fo.read(2)
while txt != "":
txt = fo.read(2)
fo.close()
文件的逐行操作
逐行遍历文件 一
fname = input("请输入要打开的文件名称:")
fo = open(fname, "r", encoding='utf-8')
for line in fo.readlines():
print(line)
fo.close()
逐行遍历文件 二
fname = input("请输入要打开的文件名称:")
fo = open(fname, "r", encoding='utf-8')
for line in fo:
print(line)
fo.close()
请输入要打开的文件名称:f.txt
“中国是个伟大的国家!”
第二行!!
第三行@@
数据文件的写入
fo.write("你给我的giao giao!!")
ls = ["A", "B", "C"]
fo.writelines(ls)
fo = open("f.txt", "w+", encoding='utf-8')
fo.write("你给我的giao giao!!")
fo.seek(0)
for line in fo.readlines():
print(line)
fo.close()
你给我的giao giao!!
fo = open("f.txt", "w+", encoding='utf-8')
ls = ["A\n", "B\\n", "C"]
fo.writelines(ls)
fo.seek(0)
for line in fo.readlines():
print(line)
fo.close()
A
B\nC
-7.2实例11:自动轨迹绘制
# AutoTraceDraw.py
import turtle as t
t.title('自动轨迹绘制')
t.setup(800, 600, 0, 0)
t.pencolor("red")
t.pensize(5)
# 数据读取
datals = []
f = open("data.txt")
for line in f:
line = line.replace("\n", "")
datals.append(list(map(eval, line.split(","))))
f.close()
# 自动绘制
for i in range(len(datals)):
t.pencolor(datals[i][3], datals[i][4], datals[i][5])
t.fd(datals[i][0])
if datals[i][1]:
t.rt(datals[i][2])
else:
t.lt(datals[i][2])
t.done()
#data.txt
300,0,144,1,0,0
300,0,144,0,1,0
300,0,144,0,0,1
300,0,144,1,1,0
300,0,108,0,1,1
184,0,72,1,0,1
184,0,72,0,0,0
184,0,72,0,0,0
184,0,72,0,0,0
184,1,72,1,0,1
184,1,72,0,0,0
184,1,72,0,0,0
184,1,72,0,0,0
184,1,72,0,0,0
184,1,720,0,0,0
-7.3一维数据的格式化和处理
f = open("1.txt").read()
ls = f.split()
print(ls)
[‘1’, ‘2’, ‘3’, ‘4$5$6’, ‘7#8#9’]
f = open("1.txt").read()
ls = f.split("$")
print(ls)
[‘1 2 3\n4’, ‘5’, ‘6\n7#8#9’]
ls = ['中国', '美国', '日本']
f = open("1.txt", 'w+')
f.write(' '.join(ls))
f.seek(0)
for line in f:
print(line)
f.close()
中国 美国 日本
ls = ['中国', '美国', '日本']
f = open("1.txt", 'w+')
f.write('$$'.join(ls))
f.seek(0)
for line in f:
print(line)
f.close()
中国日本
-7.4二维数据的格式化和处理
二维数据的读入
f = open("1.txt", "w+", encoding='utf-8')
ls = []
for line in f:
line = line.replace("\n", "")
ls.append(line.split(","))
f.close()
二维数据的写入
ls = [[], [], []]
f = open("1.txt", "w", encoding='utf-8')
for item in ls:
f.write(','.join("#123# ") + '\n')
f.close()
#,1,2,3,#,
#,1,2,3,#,
#,1,2,3,#,
二维数据的逐一处理
ls = [[1,2], [3,4], [5,6]]
for row in ls:
for column in row:
print(column)
1
2
3
4
5
6
-7.5模块6:wordcloud库的使用
import wordcloud
w =wordcloud.WordCloud()
w.generate("wo ai ni")
w.to_file("1.png")
#coding=utf-8
import wordcloud
txt = "ao ai ni LOVE 爱"
w = wordcloud.WordCloud(background_color='skyblue')
w.generate(txt)
w.to_file("1.png")
中文显示不出来
import jieba
import wordcloud
txt = "我喜欢你很久了,你好!我很想你。"
w = wordcloud.WordCloud(background_color='skyblue', width=2300, height=2200, font_path="C://Windows//Fonts//msyh.ttc")
w.generate(" ".join(jieba.lcut(txt)))
w.to_file("1.png")
font_path="C://Windows//Fonts//msyh.ttc"找到了系统里的中文字体
Building prefix dict from the default dictionary …
Loading model from cache C:\Users\Admin\AppData\Local\Temp\jieba.cache
Loading model cost 1.107 seconds.
Prefix dict has been built successfully.
-7.6实例12:*工作报告词云
https://python123.io/resources/pye/新时代中国特色*.txt
常规矩形词云
# GovRptWordCloudv1.py
import jieba
import wordcloud
f = open("新时代中国特色*.txt", "r", encoding="utf-8")
t = f.read()
f.close()
ls = jieba.lcut(t)
txt = " ".join(ls)
w = wordcloud.WordCloud( \
width=1000, height=700, \
background_color="white",
font_path="msyh.ttc"
)
w.generate(txt)
w.to_file("grwordcloud.png")
https://python123.io/resources/pye/关于实施乡村振兴战略的意见.txt
# GovRptWordCloudv1.py
import jieba
import wordcloud
f = open("关于实施乡村振兴战略的意见.txt", "r", encoding="utf-8")
t = f.read()
f.close()
ls = jieba.lcut(t)
txt = " ".join(ls)
w = wordcloud.WordCloud( \
width=1000, height=700, \
background_color="white",
font_path="msyh.ttc"
)
w.generate(txt)
w.to_file("grwordcloud.png")
# GovRptWordCloudv1.py
import jieba
import wordcloud
f = open("新时代中国特色*.txt", "r", encoding="utf-8")
t = f.read()
f.close()
ls = jieba.lcut(t)
txt = " ".join(ls)
w = wordcloud.WordCloud( \
width=1000, height=700, \
background_color="white",
font_path="msyh.ttc",
max_words=15
)
w.generate(txt)
w.to_file("grwordcloud.png")
# GovRptWordCloudv1.py
import jieba
import wordcloud
f = open("关于实施乡村振兴战略的意见.txt", "r", encoding="utf-8")
t = f.read()
f.close()
ls = jieba.lcut(t)
txt = " ".join(ls)
w = wordcloud.WordCloud( \
width=1000, height=700, \
background_color="white",
font_path="msyh.ttc",
max_words=15
)
w.generate(txt)
w.to_file("grwordcloud.png")
不规则图形词云
# GovRptWordCloudv2.py
import jieba
import wordcloud
from scipy.misc import imread
mask = imread("chinamap.jpg")
excludes = {}
f = open("新时代中国特色*.txt", "r", encoding="utf-8")
t = f.read()
f.close()
ls = jieba.lcut(t)
txt = " ".join(ls)
w = wordcloud.WordCloud( \
width=1000, height=700, \
background_color="white",
font_path="msyh.ttc", mask=mask
)
w.generate(txt)
w.to_file("grwordcloudm.png")
(示例代码中的from scipy.misc import imread已被淘汰,
上一篇: 用JS来操作数组,平常遇到的问题