处理 Excel 电子表格
卓越的人一大优点是:在不利与艰难的遭遇里百折不饶。——贝多芬
将 xls 格式的文档转换成 xlsx 格式这样后面才能读取
利用 pandas 模块 还有 xlrd 模块
(1) 安装 pandas 模块
pip install pandas
pip install xlrd
(2) 使用模块
import pandas
(3) 转换
- 要是只有一个 sheet 这样可以
import pandas as pd
import os
url = "./xml/测试数据.xls"
result_path = os.path.basename(url)
print(result_path) # 获取到最后的文件名
result = pd.DataFrame(pd.read_excel('./xml/测试数据.xls'))
result.to_excel('./xml/' + result_path+'x', index=False)
# 最后结果
# 测试数据.xls变成测试数据.xlsx
- 一个文件多个 sheet 必须用到循环
import pandas as pd
import os
url = "./xml/测试数据.xls"
result_path = os.path.basename(url)
print(result_path)
# 字符串或者int类型可以
result = pd.DataFrame(pd.read_excel(
'./xml/测试数据.xls', sheet_name=1))
print(result)
# 创建一个空的文件
final_path = './xml/' + result_path + 'x' # 设置路径和文件名
result = pd.DataFrame()
result.to_excel(final_path)
# 开始打开写入模式
# 开启写句柄 否则后面的会覆盖
# 下面这句必须有表示取消规则检查
# pylint: disable=abstract-class-instantiated
with pd.ExcelWriter('./xml/' + result_path + 'x') as writer:
# 要是全部表比如None或[0,1]多表的它返回的就是个字典选哟这样做
for index in range(10):
try:
result = pd.DataFrame(pd.read_excel(
'./xml/测试数据.xls', sheet_name=int(index)))
print(result)
result.to_excel(writer, sheet_name='Sheet' + str(index))
except:
print('第' + str(index)+'有问题')
安装 openpyxl 模块
openpyxl 只支持读取 xlsx 所以在使用前一定要确保用户名。可以通过 pandas 转化
- 安装模块
pip install openpyxl
读取 Excel 文档
(1) 用 openpyxl 模块 打开 Excel 文档
- load_workbook()
import openpyxl
result = openpyxl.load_workbook('./xml/测试数据.xlsx') # 获取到最终要操作的对象
print(type(result))
# 结果
# <class 'openpyxl.workbook.workbook.Workbook'>
(2) 获取到 Sheet
xxx.sheetnames 获取到所有 sheet 名字合集,他返回的是一个 list
xxx[‘Sheet1’] 这里特别强调的就是[]就是指定的 Sheet 名称,它返回的就是 ‘<Worksheet “Sheet1”>’
xxx.active 这里就是获取到活跃的 Sheet
import openpyxl
result = openpyxl.load_workbook('./xml/测试数据.xlsx') # 获取到最终要操作的对象
print(result.sheetnames) # 获取到Excel所有的sheet名称
print(result['Sheet1']) # 获取到Excel里面指定的Sheet内容
print(result.active) # 获取到活跃的Sheet内容
print(result.active.title) # 获取到活跃的Sheet内容的title
(3) 从表中获取到单元格
- 获取到一个单元格的数据
import openpyxl
result = openpyxl.load_workbook('./xml/测试2.xlsx') # 获取到最终要操作的对象
Sheet_content = result.active # 获取到活动的Sheet
Cell_content = Sheet_content['K2'] # 获取到单元格K2 指定的位置
print(Cell_content.value) # 获取到单元格K2的内容
- 有的时候你不知道单元格 但是你知道行或者列, 这样也可以
import openpyxl
result = openpyxl.load_workbook('./xml/测试数据.xlsx')
Sheet = result.active
Cell_content = Sheet.cell(row=2, column=12)
print(Cell_content.value)
# 结果
# 结果就是找到第二行列是12列的单元格数据
- 有的时候你需要获取特定的数据
比如我想获取到所有第 2 行,第 4 行,第 6 行,第 8 行匹配第 12 列的数据
import openpyxl
result = openpyxl.load_workbook('./xml/测试数据.xlsx') # 加载XLSX
Sheet = result.active # 获取到Sheet
result_content = [] # 空列表
for i in range(2, 10, 2):
Cell_content = Sheet.cell(row=i, column=5)
result_content.append(Cell_content.value)
print(result_content)
# 结果
# 结果就是找到第 2,4,6,8,行匹配第5列的数据
# [4.8773, 4.9222, 5.0121, 4.8998]
- 获取最大行和最大列
max_row 和 max_column
import openpyxl
result = openpyxl.load_workbook('./xml/测试数据.xlsx')
Sheet = result.active
maxrownum = Sheet.max_row # 最大行
minrownum = Sheet.min_row # 最小行
maxcolumnnum = Sheet.max_column # 最大列
mincolumnnum = Sheet.min_column # 最小列
print(maxrownum)
print(minrownum)
print(maxcolumnnum)
print(mincolumnnum)
(4) 列字母和数字之间的转换
要从字母转换到数字,就需要用 column_index_from_string()
要从数字转换到字母,就需要用 get_column_letter()函数
举个例子它自动帮你把 13 变成 M,也可以帮你把 M 变成 13
import openpyxl
from openpyxl.utils import get_column_letter, column_index_from_string
cell_result = 13
print(type(cell_result)) # 它的类型是int
cell_result_str = get_column_letter(cell_result)
print(cell_result_str) # 结果M
cell_str = 'AA'
cell_result_num = column_index_from_string(cell_str)
print(cell_result_num)
# 结果
# M
# 27
(5)读取 Excel 中每一行和列
- 获取列的内容
import openpyxl
result = openpyxl.load_workbook("./xml/测试数据.xlsx")
Sheet = result.active # 获取到活跃的Sheet
print(Sheet)
Column_content = list(Sheet.columns)[0] # 必须要加入list 因为返回的是生成器,第一列从0开始
for content in Column_content:
print(content.value) # 获取到第一列所有的数据
# 这样第一列的内容就出来了
- 获取行的内容
import openpyxl
result = openpyxl.load_workbook("./xml/测试数据.xlsx")
Sheet = result.active # 获取到活跃的Sheet
print(Sheet)
Row_content = list(Sheet.rows)[0] # 必须要加入list 因为返回的是生成器,第一列从0开始
for content in Row_content:
print(content.value) # 获取到第一列所有的数据
# 这样第一行的内容都出来了
写入 Excel 模块
创建并保存 Excel 文档
- openpyxl.Workbook()
import openpyxl
wb = openpyxl.Workbook()
sheet = wb.active
sheet.title = "第一页" # 给Sheet起了名字
wb.save('./第一次测试.xlsx')
创建和删除 sheet
create_sheet() 创建
remove() 删除
import openpyxl
wb = openpyxl.Workbook()
# 创建
wb.create_sheet(index=0, title="第一页") # 创建第一个sheet
wb.create_sheet(index=1, title="第二页") # 创建第二个sheet
wb.save('./第一次测试.xlsx')
# 结果
# 创建了一个Excel 2个sheet
- 删除
import openpyxl
wb = openpyxl.Workbook()
# 创建
wb.create_sheet(index=0, title="第一页") # 创建第一个sheet
wb.create_sheet(index=1, title="第二页") # 创建第二个sheet
# 删除
wb.remove(wb['第二页'])
wb.save('./第一次测试.xlsx')
创建一个 excel 文件,并写入不同的内容
import time
import datetime
from openpyxl import Workbook
wb = Workbook() # 创建文件对象
# grab the active worksheet
ws = wb.active # 获取第一个sheet
# Data can be assigned directly to cells
ws['A1'] = 42 # 写入数字
ws['B1'] = "你好"+"automation test" # 写入中文(unicode中文也可)
# Rows can also be appended
ws.append([1, 2, 3]) # 写入多个单元格
# Python types will automatically be converted
ws['A2'] = datetime.datetime.now() # 写入一个当前时间
# 写入一个自定义的时间格式
ws['A3'] = time.strftime('%Y{y}%m{m}%d{d} %H{h}%M{f}%S{s}').format(
y='年', m='月', d='日', h='时', f='分', s='秒')
# Save the file
wb.save("./example.xlsx")
创建 sheet
create_sheet(名字,位置)
ws1.sheet_properties.tabColor 设置 tab 颜色
# -*- coding: utf-8 -*-
from openpyxl import Workbook
wb = Workbook()
ws1 = wb.create_sheet("Mysheet") # 创建一个sheet
ws1.title = "New Title" # 设定一个sheet的名字
ws2 = wb.create_sheet("Mysheet", 0) # 设定sheet的插入位置 默认插在后面
ws2.title = "你好" # 设定一个sheet的名字 必须是Unicode
ws1.sheet_properties.tabColor = "1072BA" # 设定sheet的标签的背景颜色
# 获取全部sheet 的名字,遍历sheet名字
for content in wb.sheetnames:
print(content)
for sheet in wb:
print(sheet.title)
# 复制一个sheet
wb["New Title"]["A1"] = "zeke"
source = wb["New Title"]
target = wb.copy_worksheet(source)
# w3 = wb.copy_worksheet(wb['new title'])
# ws3.title = 'new2'
# wb.copy_worksheet(wb['new title']).title = 'hello'
# Save the file
wb.save("./example.xlsx")
操作单元格
# -*- coding: utf-8 -*-
from openpyxl import Workbook
wb = Workbook()
ws1 = wb.create_sheet("Mysheet", 0) # 创建一个sheet
ws1["A1"] = 123.11
ws1["B2"] = "你好"
d = ws1.cell(row=4, column=2, value=10)
print(ws1["A1"].value)
print(ws1["B2"].value)
print(d.value)
# Save the file
wb.save("./example.xlsx")
操作批量的单元格
- xxx[“A:C”] 操作多列
# -*- coding: utf-8 -*-
from openpyxl import Workbook
wb = Workbook()
ws1 = wb.create_sheet("Mysheet", 0) # 创建一个sheet
ws1["A1"] = 1
ws1["A2"] = 2
ws1["A3"] = 3
ws1["B1"] = 4
ws1["B2"] = 5
ws1["B3"] = 6
ws1["C1"] = 7
ws1["C2"] = 8
ws1["C3"] = 9
# 操作单列
print(ws1["A"])
for cell in ws1["A"]:
print(cell.value)
# 操作多列,获取每一个值
print(ws1["A:C"])
for column in ws1["A:C"]:
for cell in column:
print(cell.value)
# 操作多行
row_range = ws1[1:3]
print(row_range)
for row in row_range:
for cell in row:
print(cell.value)
print("*"*50)
for row in ws1.iter_rows(min_row=1, min_col=1, max_col=3, max_row=3):
for cell in row:
print(cell.value)
# 获取所有行
print(ws1.rows)
for row in ws1.rows:
print(row)
print("*"*50)
# 获取所有列
print(ws1.columns)
for col in ws1.columns:
print(col)
wb.save("./example.xlsx")
操作已经存在的文件
# -*- coding: utf-8 -*-
from openpyxl import Workbook
from openpyxl import load_workbook
wb = load_workbook("./example.xlsx")
wb.guess_types = True # 猜测格式类型
ws = wb.active
ws["D1"] = "12%"
print(ws["D1"].value)
# Save the file
wb.save("./example.xlsx")
# 注意如果原文件有一些图片或者图标,则保存的时候可能会导致图片丢失
单元格类型
# -*- coding: utf-8 -*-
from openpyxl import Workbook
from openpyxl import load_workbook
import datetime
wb = load_workbook('./example.xlsx')
ws = wb.active
wb.guess_types = True
ws["A1"] = datetime.datetime(2010, 7, 21)
print(ws["A1"].number_format)
ws["A2"] = "12%"
print(ws["A2"].number_format)
ws["A3"] = 1.1
print(ws["A4"].number_format)
ws["A4"] = "中国"
print(ws["A5"].number_format)
# Save the file
wb.save("./example.xlsx")
# 如果是常规,显示general,如果是数字,显示'0.00_ ',如果是百分数显示0%
# 数字需要在Excel中设置数字类型,直接写入的数字是常规类型
# 结果
# yyyy-mm-dd h:mm:ss
# General
# General
# General
使用公式
# -*- coding: utf-8 -*-
from openpyxl import Workbook
from openpyxl import load_workbook
wb = load_workbook('./example.xlsx')
ws1 = wb.active
ws1["A1"] = 1
ws1["A2"] = 2
ws1["A3"] = 3
ws1["A4"] = "=SUM(1, 1)"
ws1["A5"] = "=SUM(A1:A3)"
print(ws1["A4"].value) # 打印的是公式内容,不是公式计算后的值,程序无法取到计算后的值
print(ws1["A5"].value) # 打印的是公式内容,不是公式计算后的值,程序无法取到计算后的值
# Save the file
wb.save("./example.xlsx")
合并单元格
合并单元格 merge_cells
拆分单元格 unmerge_cells
# -*- coding: utf-8 -*-
from openpyxl import Workbook
from openpyxl import load_workbook
wb = load_workbook('./example.xlsx')
ws = wb.active
# ws.merge_cells('A2:D2')
# ws.unmerge_cells('A2:D2') # 合并后的单元格,脚本单独执行拆分操作会报错,需要重新执行合并操作再拆分
# or equivalently
ws.merge_cells(start_row=2, start_column=1, end_row=2, end_column=4)
ws.unmerge_cells(start_row=2, start_column=1, end_row=2, end_column=4)
# Save the file
wb.save("./example.xlsx")
插入一个图片
插入之前必须安装 pip install pillow
add_image
# -*- coding: utf-8 -*-
from openpyxl import load_workbook
from openpyxl.drawing.image import Image
wb = load_workbook('./example.xlsx')
ws1 = wb.active
img = Image('./img/1.jpg')
ws1.add_image(img, 'A1')
# Save the file
wb.save("./example.xlsx")
隐藏单元格
- ws1.column_dimensions.group(‘A’, ‘D’, hidden=True) # 隐藏 a 到 d 列范围内的列
# -*- coding: utf-8 -*-
from openpyxl import load_workbook
from openpyxl.drawing.image import Image
wb = load_workbook('./example.xlsx')
ws1 = wb.active
ws1.column_dimensions.group('A', 'D', hidden=True) # 隐藏a到d列范围内的列
# ws1.row_dimensions 无group方法
# Save the file
wb.save("./example.xlsx")
设定一个表格区域,并设定表格的形式
- Table
# -*- coding: utf-8 -*-
from openpyxl import load_workbook
from openpyxl import Workbook
from openpyxl.worksheet.table import Table, TableStyleInfo
wb = Workbook()
ws = wb.active
data = [
['Apples', 10000, 5000, 8000, 6000],
['Pears', 2000, 3000, 4000, 5000],
['Bananas', 6000, 6000, 6500, 6000],
['Oranges', 500, 300, 200, 700],
]
# add column headings. NB. these must be strings
ws.append(["Fruit", "2011", "2012", "2013", "2014"])
for row in data:
ws.append(row)
tab = Table(displayName="Table1", ref="A1:E5")
# Add a default style with striped rows and banded columns
style = TableStyleInfo(name="TableStyleMedium9", showFirstColumn=True,
showLastColumn=True, showRowStripes=True, showColumnStripes=True)
# 第一列是否和样式第一行颜色一行,第二列是否···
# 是否隔行换色,是否隔列换色
tab.tableStyleInfo = style
ws.add_table(tab)
# Save the file
wb.save("./example.xlsx")
给单元格设置样式
- Font
# -*- coding: utf-8 -*-
from openpyxl import Workbook
from openpyxl.styles import colors
from openpyxl.styles import Font
wb = Workbook()
ws = wb.active
a1 = ws['A1']
d4 = ws['D4']
ft = Font(color=colors.RED) # color="FFBB00",颜色编码也可以设定颜色
a1.font = ft
d4.font = ft
# If you want to change the color of a Font, you need to reassign it::
# italic 倾斜字体
a1.font = Font(color=colors.RED, italic=True) # the change only affects A1
a1.value = "abc"
# Save the file
wb.save("./example.xlsx")
设定字体和大小
# -*- coding: utf-8 -*-
from copy import copy
from openpyxl import Workbook
from openpyxl.styles import colors
from openpyxl.styles import Font
wb = Workbook()
ws = wb.active
a1 = ws['A1']
d4 = ws['D4']
a1.value = "abc"
ft1 = Font(name=u'宋体', size=14)
ft2 = copy(ft1) # 复制字体对象
ft2.name = "Tahoma"
print(ft1.name)
print(ft2.name)
print(ft2.size) # copied from the
a1.font = ft1
# Save the file
wb.save("./example.xlsx")
设置行和列的字体
# -*- coding: utf-8 -*-
from openpyxl import Workbook
from openpyxl.styles import Font
wb = Workbook()
ws = wb.active
col = ws.column_dimensions['A']
col.font = Font(bold=True) # 将A列设定为粗体
row = ws.row_dimensions[1]
row.font = Font(underline="single") # 将第一行设定为下划线格式
# 有的时候行和列在一行比如A1 那么后写的会覆盖前面的样式
# Save the file
wb.save("./example.xlsx")
设置单元格的边框 字体 颜色 大小 和边框背景色
# -*- coding: utf-8 -*-
from openpyxl import Workbook
from openpyxl.styles import Font
from openpyxl.styles import NamedStyle, Font, Border, Side, PatternFill
wb = Workbook()
ws = wb.active
highlight = NamedStyle(name="highlight")
highlight.font = Font(bold=True, size=20, color="ff0100")
highlight.fill = PatternFill("solid", fgColor="DDDDDD") # 背景填充
bd = Side(style='thick', color="000000")
highlight.border = Border(left=bd, top=bd, right=bd, bottom=bd)
print(dir(ws["A1"]))
ws["A1"].style = highlight
# Save the file
wb.save("./example.xlsx")
常用的样式和属性
# -*- coding: utf-8 -*-
from openpyxl import Workbook
from openpyxl.styles import Font
from openpyxl.styles import NamedStyle, Font, Border, Side, PatternFill
from openpyxl.styles import PatternFill, Border, Side, Alignment, Protection, Font
wb = Workbook()
ws = wb.active
ft = Font(name=u'微软雅黑',
size=11,
bold=False,
italic=False,
vertAlign=None,
underline='none',
strike=False,
color='FF000000')
fill = PatternFill(fill_type="solid",
start_color='FFEEFFFF',
end_color='FF001100')
# 边框可以选择的值为:'hair', 'medium', 'dashDot', 'dotted', 'mediumDashDot', 'dashed', 'mediumDashed', 'mediumDashDotDot', 'dashDotDot', 'slantDashDot', 'double', 'thick', 'thin']
# diagonal 表示对角线
bd = Border(left=Side(border_style="thin",
color='FF001000'),
right=Side(border_style="thin",
color='FF110000'),
top=Side(border_style="thin",
color='FF110000'),
bottom=Side(border_style="thin",
color='FF110000'),
diagonal=Side(border_style=None,
color='FF000000'),
diagonal_direction=0,
outline=Side(border_style=None,
color='FF000000'),
vertical=Side(border_style=None,
color='FF000000'),
horizontal=Side(border_style=None,
color='FF110000')
)
alignment = Alignment(horizontal='general',
vertical='bottom',
text_rotation=0,
wrap_text=False,
shrink_to_fit=False,
indent=0)
number_format = 'General'
protection = Protection(locked=True,
hidden=False)
ws["B5"].font = ft
ws["B5"].fill = fill
ws["B5"].border = bd
ws["B5"].alignment = alignment
ws["B5"].number_format = number_format
ws["B5"].value = "zeke"
# Save the file
wb.save("./example.xlsx")