Python 处理excel表格(数据方向)

处理 Excel 电子表格

卓越的人一大优点是:在不利与艰难的遭遇里百折不饶。——贝多芬

将 xls 格式的文档转换成 xlsx 格式这样后面才能读取

利用 pandas 模块 还有 xlrd 模块

(1) 安装 pandas 模块


pip install pandas

pip install xlrd

(2) 使用模块


import pandas

(3) 转换

  • 要是只有一个 sheet 这样可以

import pandas as pd

import os

url = "./xml/测试数据.xls"

result_path = os.path.basename(url)

print(result_path) # 获取到最后的文件名

result = pd.DataFrame(pd.read_excel('./xml/测试数据.xls'))

result.to_excel('./xml/' + result_path+'x', index=False)

# 最后结果
# 测试数据.xls变成测试数据.xlsx
  • 一个文件多个 sheet 必须用到循环

import pandas as pd
import os

url = "./xml/测试数据.xls"

result_path = os.path.basename(url)

print(result_path)

# 字符串或者int类型可以
result = pd.DataFrame(pd.read_excel(
    './xml/测试数据.xls', sheet_name=1))

print(result)

# 创建一个空的文件
final_path = './xml/' + result_path + 'x'  # 设置路径和文件名
result = pd.DataFrame()
result.to_excel(final_path)

# 开始打开写入模式
# 开启写句柄 否则后面的会覆盖

# 下面这句必须有表示取消规则检查
# pylint: disable=abstract-class-instantiated
with pd.ExcelWriter('./xml/' + result_path + 'x') as writer:
    # 要是全部表比如None或[0,1]多表的它返回的就是个字典选哟这样做
    for index in range(10):
        try:
            result = pd.DataFrame(pd.read_excel(
                './xml/测试数据.xls', sheet_name=int(index)))
            print(result)
            result.to_excel(writer, sheet_name='Sheet' + str(index))
        except:
            print('第' + str(index)+'有问题')

安装 openpyxl 模块

openpyxl 只支持读取 xlsx 所以在使用前一定要确保用户名。可以通过 pandas 转化

  • 安装模块

pip install openpyxl

读取 Excel 文档

(1) 用 openpyxl 模块 打开 Excel 文档

  • load_workbook()

import openpyxl

result = openpyxl.load_workbook('./xml/测试数据.xlsx')  # 获取到最终要操作的对象

print(type(result))

# 结果

# <class 'openpyxl.workbook.workbook.Workbook'>

(2) 获取到 Sheet

  • xxx.sheetnames 获取到所有 sheet 名字合集,他返回的是一个 list

  • xxx[‘Sheet1’] 这里特别强调的就是[]就是指定的 Sheet 名称,它返回的就是 ‘<Worksheet “Sheet1”>’

  • xxx.active 这里就是获取到活跃的 Sheet


import openpyxl

result = openpyxl.load_workbook('./xml/测试数据.xlsx')  # 获取到最终要操作的对象

print(result.sheetnames)  # 获取到Excel所有的sheet名称

print(result['Sheet1'])  # 获取到Excel里面指定的Sheet内容

print(result.active)  # 获取到活跃的Sheet内容

print(result.active.title)  # 获取到活跃的Sheet内容的title

(3) 从表中获取到单元格

  • 获取到一个单元格的数据

import openpyxl

result = openpyxl.load_workbook('./xml/测试2.xlsx')  # 获取到最终要操作的对象

Sheet_content = result.active  # 获取到活动的Sheet

Cell_content = Sheet_content['K2']  # 获取到单元格K2 指定的位置


print(Cell_content.value)  # 获取到单元格K2的内容
  • 有的时候你不知道单元格 但是你知道行或者列, 这样也可以

import openpyxl

result = openpyxl.load_workbook('./xml/测试数据.xlsx')

Sheet = result.active

Cell_content = Sheet.cell(row=2, column=12)

print(Cell_content.value)


# 结果

# 结果就是找到第二行列是12列的单元格数据
  • 有的时候你需要获取特定的数据

比如我想获取到所有第 2 行,第 4 行,第 6 行,第 8 行匹配第 12 列的数据


import openpyxl

result = openpyxl.load_workbook('./xml/测试数据.xlsx')  # 加载XLSX

Sheet = result.active  # 获取到Sheet

result_content = []  # 空列表
for i in range(2, 10, 2):

    Cell_content = Sheet.cell(row=i, column=5)

    result_content.append(Cell_content.value)

print(result_content)


# 结果

# 结果就是找到第 2,4,6,8,行匹配第5列的数据

# [4.8773, 4.9222, 5.0121, 4.8998]
  • 获取最大行和最大列

max_row 和 max_column


import openpyxl

result = openpyxl.load_workbook('./xml/测试数据.xlsx')

Sheet = result.active

maxrownum = Sheet.max_row  # 最大行

minrownum = Sheet.min_row  # 最小行

maxcolumnnum = Sheet.max_column # 最大列

mincolumnnum = Sheet.min_column  # 最小列

print(maxrownum)

print(minrownum)

print(maxcolumnnum)

print(mincolumnnum)

(4) 列字母和数字之间的转换

  • 要从字母转换到数字,就需要用 column_index_from_string()

  • 要从数字转换到字母,就需要用 get_column_letter()函数

  • 举个例子它自动帮你把 13 变成 M,也可以帮你把 M 变成 13


import openpyxl

from openpyxl.utils import get_column_letter, column_index_from_string


cell_result = 13

print(type(cell_result))  # 它的类型是int

cell_result_str = get_column_letter(cell_result)

print(cell_result_str)  # 结果M


cell_str = 'AA'

cell_result_num = column_index_from_string(cell_str)

print(cell_result_num)

# 结果
# M
# 27

(5)读取 Excel 中每一行和列

  • 获取列的内容

import openpyxl

result = openpyxl.load_workbook("./xml/测试数据.xlsx")

Sheet = result.active  # 获取到活跃的Sheet

print(Sheet)

Column_content = list(Sheet.columns)[0]  # 必须要加入list 因为返回的是生成器,第一列从0开始

for content in Column_content:
    print(content.value)  # 获取到第一列所有的数据

# 这样第一列的内容就出来了
  • 获取行的内容

import openpyxl

result = openpyxl.load_workbook("./xml/测试数据.xlsx")

Sheet = result.active  # 获取到活跃的Sheet

print(Sheet)

Row_content = list(Sheet.rows)[0]  # 必须要加入list 因为返回的是生成器,第一列从0开始

for content in Row_content:
    print(content.value)  # 获取到第一列所有的数据

# 这样第一行的内容都出来了

写入 Excel 模块

创建并保存 Excel 文档

  • openpyxl.Workbook()

import openpyxl
wb = openpyxl.Workbook()
sheet = wb.active
sheet.title = "第一页"  # 给Sheet起了名字

wb.save('./第一次测试.xlsx')

创建和删除 sheet

  • create_sheet() 创建

  • remove() 删除


import openpyxl
wb = openpyxl.Workbook()
# 创建
wb.create_sheet(index=0, title="第一页")  # 创建第一个sheet
wb.create_sheet(index=1, title="第二页")  # 创建第二个sheet
wb.save('./第一次测试.xlsx')

# 结果

# 创建了一个Excel 2个sheet
  • 删除

import openpyxl
wb = openpyxl.Workbook()
# 创建
wb.create_sheet(index=0, title="第一页")  # 创建第一个sheet
wb.create_sheet(index=1, title="第二页")  # 创建第二个sheet
# 删除
wb.remove(wb['第二页'])
wb.save('./第一次测试.xlsx')

创建一个 excel 文件,并写入不同的内容


import time
import datetime
from openpyxl import Workbook
wb = Workbook()  # 创建文件对象

# grab the active worksheet
ws = wb.active  # 获取第一个sheet

# Data can be assigned directly to cells
ws['A1'] = 42  # 写入数字
ws['B1'] = "你好"+"automation test"  # 写入中文(unicode中文也可)

# Rows can also be appended
ws.append([1, 2, 3])  # 写入多个单元格

# Python types will automatically be converted
ws['A2'] = datetime.datetime.now()  # 写入一个当前时间
# 写入一个自定义的时间格式
ws['A3'] = time.strftime('%Y{y}%m{m}%d{d} %H{h}%M{f}%S{s}').format(
    y='年', m='月', d='日', h='时', f='分', s='秒')

# Save the file
wb.save("./example.xlsx")

创建 sheet

  • create_sheet(名字,位置)

  • ws1.sheet_properties.tabColor 设置 tab 颜色


# -*- coding: utf-8 -*-

from openpyxl import Workbook
wb = Workbook()

ws1 = wb.create_sheet("Mysheet")  # 创建一个sheet
ws1.title = "New Title"  # 设定一个sheet的名字
ws2 = wb.create_sheet("Mysheet", 0)  # 设定sheet的插入位置 默认插在后面
ws2.title = "你好"  # 设定一个sheet的名字 必须是Unicode

ws1.sheet_properties.tabColor = "1072BA"  # 设定sheet的标签的背景颜色


# 获取全部sheet 的名字,遍历sheet名字

for content in wb.sheetnames:
    print(content)


for sheet in wb:
    print(sheet.title)

# 复制一个sheet
wb["New Title"]["A1"] = "zeke"
source = wb["New Title"]
target = wb.copy_worksheet(source)

# w3 = wb.copy_worksheet(wb['new title'])
# ws3.title = 'new2'
# wb.copy_worksheet(wb['new title']).title = 'hello'
# Save the file
wb.save("./example.xlsx")

操作单元格


# -*- coding: utf-8 -*-
from openpyxl import Workbook
wb = Workbook()
ws1 = wb.create_sheet("Mysheet", 0)  # 创建一个sheet

ws1["A1"] = 123.11
ws1["B2"] = "你好"
d = ws1.cell(row=4, column=2, value=10)

print(ws1["A1"].value)
print(ws1["B2"].value)
print(d.value)

# Save the file
wb.save("./example.xlsx")

操作批量的单元格

  • xxx[“A:C”] 操作多列

# -*- coding: utf-8 -*-
from openpyxl import Workbook
wb = Workbook()

ws1 = wb.create_sheet("Mysheet", 0)  # 创建一个sheet

ws1["A1"] = 1
ws1["A2"] = 2
ws1["A3"] = 3

ws1["B1"] = 4
ws1["B2"] = 5
ws1["B3"] = 6

ws1["C1"] = 7
ws1["C2"] = 8
ws1["C3"] = 9

# 操作单列
print(ws1["A"])
for cell in ws1["A"]:
    print(cell.value)

# 操作多列,获取每一个值
print(ws1["A:C"])
for column in ws1["A:C"]:
    for cell in column:
        print(cell.value)

# 操作多行
row_range = ws1[1:3]
print(row_range)
for row in row_range:
    for cell in row:
        print(cell.value)

print("*"*50)
for row in ws1.iter_rows(min_row=1, min_col=1, max_col=3, max_row=3):
    for cell in row:
        print(cell.value)

# 获取所有行
print(ws1.rows)
for row in ws1.rows:
    print(row)

print("*"*50)
# 获取所有列
print(ws1.columns)
for col in ws1.columns:
    print(col)

wb.save("./example.xlsx")

操作已经存在的文件


# -*- coding: utf-8 -*-
from openpyxl import Workbook
from openpyxl import load_workbook
wb = load_workbook("./example.xlsx")
wb.guess_types = True  # 猜测格式类型
ws = wb.active
ws["D1"] = "12%"
print(ws["D1"].value)

# Save the file
wb.save("./example.xlsx")
# 注意如果原文件有一些图片或者图标,则保存的时候可能会导致图片丢失

单元格类型


# -*- coding: utf-8 -*-
from openpyxl import Workbook
from openpyxl import load_workbook
import datetime
wb = load_workbook('./example.xlsx')

ws = wb.active
wb.guess_types = True

ws["A1"] = datetime.datetime(2010, 7, 21)
print(ws["A1"].number_format)

ws["A2"] = "12%"
print(ws["A2"].number_format)

ws["A3"] = 1.1
print(ws["A4"].number_format)

ws["A4"] = "中国"
print(ws["A5"].number_format)
# Save the file
wb.save("./example.xlsx")
# 如果是常规,显示general,如果是数字,显示'0.00_ ',如果是百分数显示0%
# 数字需要在Excel中设置数字类型,直接写入的数字是常规类型
# 结果
# yyyy-mm-dd h:mm:ss
# General
# General
# General

使用公式


# -*- coding: utf-8 -*-
from openpyxl import Workbook
from openpyxl import load_workbook

wb = load_workbook('./example.xlsx')
ws1 = wb.active

ws1["A1"] = 1
ws1["A2"] = 2
ws1["A3"] = 3

ws1["A4"] = "=SUM(1, 1)"
ws1["A5"] = "=SUM(A1:A3)"

print(ws1["A4"].value)  # 打印的是公式内容,不是公式计算后的值,程序无法取到计算后的值
print(ws1["A5"].value)  # 打印的是公式内容,不是公式计算后的值,程序无法取到计算后的值

# Save the file
wb.save("./example.xlsx")

合并单元格

  • 合并单元格 merge_cells

  • 拆分单元格 unmerge_cells


# -*- coding: utf-8 -*-
from openpyxl import Workbook
from openpyxl import load_workbook

wb = load_workbook('./example.xlsx')
ws = wb.active

# ws.merge_cells('A2:D2')
# ws.unmerge_cells('A2:D2')  # 合并后的单元格,脚本单独执行拆分操作会报错,需要重新执行合并操作再拆分

# or equivalently
ws.merge_cells(start_row=2, start_column=1, end_row=2, end_column=4)
ws.unmerge_cells(start_row=2, start_column=1, end_row=2, end_column=4)

# Save the file
wb.save("./example.xlsx")

插入一个图片

  • 插入之前必须安装 pip install pillow

  • add_image


# -*- coding: utf-8 -*-
from openpyxl import load_workbook
from openpyxl.drawing.image import Image

wb = load_workbook('./example.xlsx')
ws1 = wb.active

img = Image('./img/1.jpg')
ws1.add_image(img, 'A1')

# Save the file
wb.save("./example.xlsx")

隐藏单元格

  • ws1.column_dimensions.group(‘A’, ‘D’, hidden=True) # 隐藏 a 到 d 列范围内的列

# -*- coding: utf-8 -*-
from openpyxl import load_workbook
from openpyxl.drawing.image import Image

wb = load_workbook('./example.xlsx')
ws1 = wb.active

ws1.column_dimensions.group('A', 'D', hidden=True)  # 隐藏a到d列范围内的列
# ws1.row_dimensions 无group方法
# Save the file
wb.save("./example.xlsx")

设定一个表格区域,并设定表格的形式

  • Table

# -*- coding: utf-8 -*-
from openpyxl import load_workbook
from openpyxl import Workbook
from openpyxl.worksheet.table import Table, TableStyleInfo

wb = Workbook()
ws = wb.active

data = [
    ['Apples', 10000, 5000, 8000, 6000],
    ['Pears',   2000, 3000, 4000, 5000],
    ['Bananas', 6000, 6000, 6500, 6000],
    ['Oranges',  500,  300,  200,  700],
]

# add column headings. NB. these must be strings
ws.append(["Fruit", "2011", "2012", "2013", "2014"])
for row in data:
    ws.append(row)

tab = Table(displayName="Table1", ref="A1:E5")

# Add a default style with striped rows and banded columns
style = TableStyleInfo(name="TableStyleMedium9", showFirstColumn=True,
                       showLastColumn=True, showRowStripes=True, showColumnStripes=True)
# 第一列是否和样式第一行颜色一行,第二列是否···
# 是否隔行换色,是否隔列换色
tab.tableStyleInfo = style
ws.add_table(tab)

# Save the file
wb.save("./example.xlsx")

给单元格设置样式

  • Font

# -*- coding: utf-8 -*-
from openpyxl import Workbook
from openpyxl.styles import colors
from openpyxl.styles import Font

wb = Workbook()
ws = wb.active

a1 = ws['A1']
d4 = ws['D4']
ft = Font(color=colors.RED)  # color="FFBB00",颜色编码也可以设定颜色
a1.font = ft
d4.font = ft

# If you want to change the color of a Font, you need to reassign it::
# italic 倾斜字体
a1.font = Font(color=colors.RED, italic=True)  # the change only affects A1
a1.value = "abc"

# Save the file
wb.save("./example.xlsx")

设定字体和大小


# -*- coding: utf-8 -*-
from copy import copy
from openpyxl import Workbook
from openpyxl.styles import colors
from openpyxl.styles import Font

wb = Workbook()
ws = wb.active

a1 = ws['A1']
d4 = ws['D4']
a1.value = "abc"


ft1 = Font(name=u'宋体', size=14)
ft2 = copy(ft1)  # 复制字体对象
ft2.name = "Tahoma"

print(ft1.name)
print(ft2.name)
print(ft2.size)  # copied from the

a1.font = ft1
# Save the file
wb.save("./example.xlsx")

设置行和列的字体


# -*- coding: utf-8 -*-
from openpyxl import Workbook
from openpyxl.styles import Font
wb = Workbook()
ws = wb.active
col = ws.column_dimensions['A']
col.font = Font(bold=True)  # 将A列设定为粗体
row = ws.row_dimensions[1]
row.font = Font(underline="single")  # 将第一行设定为下划线格式
# 有的时候行和列在一行比如A1 那么后写的会覆盖前面的样式
# Save the file
wb.save("./example.xlsx")

设置单元格的边框 字体 颜色 大小 和边框背景色


# -*- coding: utf-8 -*-
from openpyxl import Workbook
from openpyxl.styles import Font
from openpyxl.styles import NamedStyle, Font, Border, Side, PatternFill

wb = Workbook()
ws = wb.active

highlight = NamedStyle(name="highlight")
highlight.font = Font(bold=True, size=20, color="ff0100")
highlight.fill = PatternFill("solid", fgColor="DDDDDD")  # 背景填充
bd = Side(style='thick', color="000000")
highlight.border = Border(left=bd, top=bd, right=bd, bottom=bd)

print(dir(ws["A1"]))
ws["A1"].style = highlight

# Save the file
wb.save("./example.xlsx")

常用的样式和属性


# -*- coding: utf-8 -*-
from openpyxl import Workbook
from openpyxl.styles import Font
from openpyxl.styles import NamedStyle, Font, Border, Side, PatternFill
from openpyxl.styles import PatternFill, Border, Side, Alignment, Protection, Font

wb = Workbook()
ws = wb.active

ft = Font(name=u'微软雅黑',
          size=11,
          bold=False,
          italic=False,
          vertAlign=None,
          underline='none',
          strike=False,
          color='FF000000')

fill = PatternFill(fill_type="solid",
                   start_color='FFEEFFFF',
                   end_color='FF001100')

# 边框可以选择的值为:'hair', 'medium', 'dashDot', 'dotted', 'mediumDashDot', 'dashed', 'mediumDashed', 'mediumDashDotDot', 'dashDotDot', 'slantDashDot', 'double', 'thick', 'thin']
# diagonal 表示对角线
bd = Border(left=Side(border_style="thin",
                      color='FF001000'),
            right=Side(border_style="thin",
                       color='FF110000'),
            top=Side(border_style="thin",
                     color='FF110000'),
            bottom=Side(border_style="thin",
                        color='FF110000'),
            diagonal=Side(border_style=None,
                          color='FF000000'),
            diagonal_direction=0,
            outline=Side(border_style=None,
                         color='FF000000'),
            vertical=Side(border_style=None,
                          color='FF000000'),
            horizontal=Side(border_style=None,
                            color='FF110000')
            )

alignment = Alignment(horizontal='general',
                      vertical='bottom',
                      text_rotation=0,
                      wrap_text=False,
                      shrink_to_fit=False,
                      indent=0)

number_format = 'General'

protection = Protection(locked=True,
                        hidden=False)

ws["B5"].font = ft
ws["B5"].fill = fill
ws["B5"].border = bd
ws["B5"].alignment = alignment
ws["B5"].number_format = number_format

ws["B5"].value = "zeke"

# Save the file
wb.save("./example.xlsx")


文章作者: 雾烟云
版权声明: 本博客所有文章除特別声明外,均采用 CC BY 4.0 许可协议。转载请注明来源 雾烟云 !
  目录