I\'m reading *xlsx files with openpyxl python library, and within other data I want to get information on cells background color.
This code I was using normally, w
Following @Sib's work, and updating to use items(x)
instead of the deprecated x.getchildren()
(see https://docs.python.org/3/whatsnew/3.9.html?highlight=getchildren#removed)
from colorsys import rgb_to_hls, hls_to_rgb
# From: https://stackoverflow.com/questions/58429823/getting-excel-cell-background-themed-color-as-hex-with-openpyxl/58443509#58443509
# which refers to: https://pastebin.com/B2nGEGX2 (October 2020)
# Updated to use list(elem) instead of the deprecated elem.getchildren() method
# which has now been removed completely from Python 3.9 onwards.
#
#https://bitbucket.org/openpyxl/openpyxl/issues/987/add-utility-functions-for-colors-to-help
RGBMAX = 0xff # Corresponds to 255
HLSMAX = 240 # MS excel's tint function expects that HLS is base 240. see:
# https://social.msdn.microsoft.com/Forums/en-US/e9d8c136-6d62-4098-9b1b-dac786149f43/excel-color-tint-algorithm-incorrect?forum=os_binaryfile#d3c2ac95-52e0-476b-86f1-e2a697f24969
def rgb_to_ms_hls(red, green=None, blue=None):
"""Converts rgb values in range (0,1) or a hex string of the form '[#aa]rrggbb' to HLSMAX based HLS, (alpha values are ignored)"""
if green is None:
if isinstance(red, str):
if len(red) > 6:
red = red[-6:] # Ignore preceding '#' and alpha values
blue = int(red[4:], 16) / RGBMAX
green = int(red[2:4], 16) / RGBMAX
red = int(red[0:2], 16) / RGBMAX
else:
red, green, blue = red
h, l, s = rgb_to_hls(red, green, blue)
return (int(round(h * HLSMAX)), int(round(l * HLSMAX)), int(round(s * HLSMAX)))
def ms_hls_to_rgb(hue, lightness=None, saturation=None):
"""Converts HLSMAX based HLS values to rgb values in the range (0,1)"""
if lightness is None:
hue, lightness, saturation = hue
return hls_to_rgb(hue / HLSMAX, lightness / HLSMAX, saturation / HLSMAX)
def rgb_to_hex(red, green=None, blue=None):
"""Converts (0,1) based RGB values to a hex string 'rrggbb'"""
if green is None:
red, green, blue = red
return ('%02x%02x%02x' % (int(round(red * RGBMAX)), int(round(green * RGBMAX)), int(round(blue * RGBMAX)))).upper()
def get_theme_colors(wb):
"""Gets theme colors from the workbook"""
# see: https://groups.google.com/forum/#!topic/openpyxl-users/I0k3TfqNLrc
from openpyxl.xml.functions import QName, fromstring
xlmns = 'http://schemas.openxmlformats.org/drawingml/2006/main'
root = fromstring(wb.loaded_theme)
themeEl = root.find(QName(xlmns, 'themeElements').text)
colorSchemes = themeEl.findall(QName(xlmns, 'clrScheme').text)
firstColorScheme = colorSchemes[0]
colors = []
for c in ['lt1', 'dk1', 'lt2', 'dk2', 'accent1', 'accent2', 'accent3', 'accent4', 'accent5', 'accent6']:
accent = firstColorScheme.find(QName(xlmns, c).text)
for i in list(accent): # walk all child nodes, rather than assuming [0]
if 'window' in i.attrib['val']:
colors.append(i.attrib['lastClr'])
else:
colors.append(i.attrib['val'])
return colors
def tint_luminance(tint, lum):
"""Tints a HLSMAX based luminance"""
# See: http://ciintelligence.blogspot.co.uk/2012/02/converting-excel-theme-color-and-tint.html
if tint < 0:
return int(round(lum * (1.0 + tint)))
else:
return int(round(lum * (1.0 - tint) + (HLSMAX - HLSMAX * (1.0 - tint))))
def theme_and_tint_to_rgb(wb, theme, tint):
"""Given a workbook, a theme number and a tint return a hex based rgb"""
rgb = get_theme_colors(wb)[theme]
h, l, s = rgb_to_ms_hls(rgb)
return rgb_to_hex(ms_hls_to_rgb(h, tint_luminance(tint, l), s))
After hours of googling finally found the solution - proposed idea to add converting between theme + tint to RGB in openpyxl. Importing their set of color converting functions, I can reduce my solution to the next steps:
wb = load_workbook(filename, data_only=True)
theme = cell.fill.start_color.theme
tint = cell.fill.start_color.tint
color = theme_and_tint_to_rgb(wb, theme, tint)