python操作excel
程序员文章站
2022-05-14 09:29:49
长期以来都想用python对Excel进行一些列的操作,但由于某种神秘的力量控制着我,一直未果,今天有幸用requests模块和BeautifulSoup模块进行爬虫练习,拿到了一大批数据,照我以前,都只是用字典啊、列表啊,或者文本文件存放,之前没觉得哪里不好,但今天的我很奇怪,怎么看怎么不爽,而且 ......
长期以来都想用python对excel进行一些列的操作,但由于某种神秘的力量控制着我,一直未果,今天有幸用requests模块和beautifulsoup模块进行爬虫练习,拿到了一大批数据,照我以前,都只是用字典啊、列表啊,或者文本文件存放,之前没觉得哪里不好,但今天的我很奇怪,怎么看怎么不爽,而且,如果我把我爬取的数据给别人看的话,确实有点难堪,没有脸给别人看。于是,我拼尽平生所有能力挣脱某种神秘力量的束缚,粗略的学习了下用python如何操作excel,在此跟大家分享下,鄙人水平有限,不够深入或有问题的地方还请大佬指教。
一、用xlrd模块对excel进行读取
import xlrd
#打开excel文件读取数据 data=xlrd.open_workbook('myexcel.xls')
#获取一个工作表 table = data.sheets()[0] #通过索引顺序获取 table = data.sheet_by_index(0) #通过索引顺序获取 table = data.sheet_by_name(u'sheet1')#通过名称获取
获取整行或整列的值(数组)
table.row_values(num) table.col_values(num)
获取行数和列数 table.nrows table.ncols
获取单元格 table.cell(num1,num2).value
二、用xlwt模块对excel进行写入
1,基本写入操作
import xlwt
#创建workbook,也就是新建一个excel文件 workbook=xlwt.workbook(encoding='ascii')
#在刚才创建的excel文件下创建一个sheet表 worksheet=workbook.add_sheet('mysheet')
#往单元格里写入数据 worksheet.write(num,num,label='xxxxxx')
#保存excel文件 workbook.save('myworkbook.xls')
2,font设置
font = xlwt.font() # create the font 大概就是创建一个font对象,然后对其进行设置 font.name = 'times new roman' font.bold = true font.underline = true font.italic = true style = xlwt.xfstyle() # create the style #创建一个style对象,然后把font对象赋给它 style.font = font # apply the font to the style worksheet.write(0, 0, label = 'unformatted value') worksheet.write(1, 0, label = 'formatted value', style) # apply the style to the cell
下面的就不是很懂,大概也是font对象的一些设置 font.struck_out = true # may be: true, false font.underline = xlwt.font.underline_single # may be: underline_none, underline_single, underline_single_acc, underline_double, underline_double_acc font.escapement = xlwt.font.escapement_superscript # may be: escapement_none, escapement_superscript, escapement_subscript font.family = xlwt.font.family_roman # may be: family_none, family_roman, family_swiss, family_modern, family_script, family_decorative font.charset = xlwt.font.charset_ansi_latin # may be: charset_ansi_latin, charset_sys_default, charset_symbol, charset_apple_roman, charset_ansi_jap_shift_jis, charset_ansi_kor_hangul,
charset_ansi_kor_johab, charset_ansi_chinese_gbk, charset_ansi_chinese_big5, charset_ansi_greek, charset_ansi_turkish, charset_ansi_vietnamese, charset_ansi_hebrew, charset_ansi_arabic,
charset_ansi_baltic, charset_ansi_cyrillic, charset_ansi_thai, charset_ansi_latin_ii, charset_oem_latin_i font.colour_index = ? font.get_biff_record = ? font.height = 0x00c8 # c8 in hex (in decimal) = 10 points in height. font.name = ? font.outline = ? font.shadow = ?
3,每列的宽度设置
worksheet.col(0).width = 3333 #这是第0列
4,往单元格里插入时间
style = xlwt.xfstyle() style.num_format_str = 'm/d/yy' # other options: d-mmm-yy, d-mmm, mmm-yy, h:mm, h:mm:ss, h:mm, h:mm:ss, m/d/yy h:mm, mm:ss, [h]:mm:ss, mm:ss.0 worksheet.write(0, 0, datetime.datetime.now(), style)
5,在单元格插入公式
worksheet.write(0, 0, 5) # outputs 5 worksheet.write(0, 1, 2) # outputs 2 worksheet.write(1, 0, xlwt.formula('a1*b1')) # should output "10" (a1[5] * a2[2]) worksheet.write(1, 1, xlwt.formula('sum(a1,b1)')) # should output "7" (a1[5] + a2[2])
6,单元格插连接
worksheet.write(0, 0, xlwt.formula('hyperlink("http://www.google.com";"google")')) # outputs the text "google" linking to http://www.google.com
7,合并行和列
import xlwt workbook = xlwt.workbook() worksheet = workbook.add_sheet('my sheet') worksheet.write_merge(0, 0, 0, 3, 'first merge') # merges row 0's columns 0 through 3.合并第0列到底3列 font = xlwt.font() # create font font.bold = true # set font to bold style = xlwt.xfstyle() # create style style.font = font # add bold font to style worksheet.write_merge(1, 2, 0, 3, 'second merge', style) # merges row 1 through 2's columns 0 through 3.合并第1行到第2列和第0列到第3列 workbook.save('excel_workbook.xls')
8,alignment
setting the alignment for the contents of a cell import xlwt workbook = xlwt.workbook() worksheet = workbook.add_sheet('my sheet') alignment = xlwt.alignment() # create alignment alignment.horz = xlwt.alignment.horz_center # may be: horz_general, horz_left, horz_center, horz_right, horz_filled, horz_justified, horz_center_across_sel, horz_distributed alignment.vert = xlwt.alignment.vert_center # may be: vert_top, vert_center, vert_bottom, vert_justified, vert_distributed style = xlwt.xfstyle() # create style style.alignment = alignment # add alignment to style worksheet.write(0, 0, 'cell contents', style) workbook.save('excel_workbook.xls')
9,borders
adding borders to a cell # please note: while i was able to find these constants within the source code, on my system (using libreoffice,)
i was only presented with a solid line, varying from thin to thick; no dotted or dashed lines. import xlwt workbook = xlwt.workbook() worksheet = workbook.add_sheet('my sheet') borders = xlwt.borders() # create borders borders.left = xlwt.borders.dashed # may be: no_line, thin, medium, dashed, dotted, thick, double, hair, medium_dashed,
thin_dash_dotted, medium_dash_dotted, thin_dash_dot_dotted, medium_dash_dot_dotted, slanted_medium_dash_dotted,
or 0x00 through 0x0d. borders.right = xlwt.borders.dashed borders.top = xlwt.borders.dashed borders.bottom = xlwt.borders.dashed borders.left_colour = 0x40 borders.right_colour = 0x40 borders.top_colour = 0x40 borders.bottom_colour = 0x40 style = xlwt.xfstyle() # create style style.borders = borders # add borders to style worksheet.write(0, 0, 'cell contents', style) workbook.save('excel_workbook.xls')
10,background color
setting the background color of a cell import xlwt workbook = xlwt.workbook() worksheet = workbook.add_sheet('my sheet') pattern = xlwt.pattern() # create the pattern pattern.pattern = xlwt.pattern.solid_pattern # may be: no_pattern, solid_pattern, or 0x00 through 0x12 pattern.pattern_fore_colour = 5 # may be: 8 through 63. 0 = black, 1 = white, 2 = red, 3 = green, 4 = blue, 5 = yellow, 6 = magenta,
7 = cyan, 16 = maroon, 17 = dark green, 18 = dark blue, 19 = dark yellow , almost brown), 20 = dark magenta, 21 = teal, 22 = light gray,
23 = dark gray, the list goes on... style = xlwt.xfstyle() # create the pattern style.pattern = pattern # add pattern to style worksheet.write(0, 0, 'cell contents', style) workbook.save('excel_workbook.xls')
纯属借鉴大佬的笔记,在此感谢大佬
三、小应用
from bs4 import beautifulsoup import requests import xlwt import re headers={ 'user-agent':'mozilla/5.0 (windows nt 10.0; win64; x64) applewebkit/537.36 (khtml, like gecko) chrome/72.0.3626.119 safari/537.36' } workbook = xlwt.workbook(encoding='ascii') worksheet = workbook.add_sheet('my worksheet') worksheet.col(0).width = 14000 worksheet.col(5).width = 24000 url='https://sz.58.com/ershoufang/?utm_source=market&spm=u-2d2yxv86y3v43nkddh1.bdpcpz_bt&pgtid=0d30000c-0000-4591-0324-370565eccba8&clickid=1' res=requests.get(url=url,headers=headers) con=res.text soup=beautifulsoup(con,'lxml') ss=soup.find('ul',class_='house-list-wrap') li_list=ss.find_all('li') patter=re.compile(r'\s',re.s) for num in range(len(li_list)): worksheet.write(num, 0, label=li_list[num].find('a',tongji_label="listclick").text.strip()) p1=li_list[num].find_all('p')[0] span_list=p1.find_all('span') worksheet.write(num, 1, label=patter.sub('',span_list[0].text)) worksheet.write(num, 2, label=patter.sub('',span_list[1].text)) worksheet.write(num, 3, label=patter.sub('',span_list[2].text)) worksheet.write(num, 4, label=patter.sub('',span_list[3].text)) worksheet.write(num,5, label=patter.sub('',li_list[num].find('div',class_='jjrinfo').text)) worksheet.write(num, 6, label=li_list[num].find('p',class_='sum').text) worksheet.write(num, 7, label=li_list[num].find('p',class_='unit').text) workbook.save('myworkbook.xls')