Extracting product image main colours in Python
The main colours of a product are often quite important data in e-commerce systems, and it’s nice to have an automated way to extract these.
Here’s a rough summary of what I ended doing in Python to get this data from product images.
It uses three different colour extraction libraries and merges the results from them, which is somewhat questionable but seems to provide better overall results than a single library alone.
The extracted colours are normalized to
HSL tuples with a (360, 100%, 100%)
structure. HSL makes it easy to look at the hue value to assign a named
colour category. HSL also makes it easier to detect black, white, grey and beige
colours.
#!/usr/bin/python3.8
import os
import random
from typing import Tuple, List
from collections import OrderedDict, namedtuple
from enum import Enum
import numpy
import dominant_color_detection
import colorgram
import colorthief
from colour import Color
# 360, 100, 100
HSL = namedtuple("HSL", ("hue", "sat", "lit"))
class Colour(Enum):
BEIGE = "beige"
WHITE = "white"
BLACK = "black"
GREY = "grey"
BROWN = "brown"
RED = "red"
ORANGE = "orange"
YELLOW = "yellow"
GREEN = "green"
TURQUOISE = "turquoise"
BLUE = "blue"
PURPLE = "purple"
PINK = "pink"
GREYSCALE = [
Colour.BLACK,
Colour.WHITE,
Colour.GREY,
Colour.BEIGE,
] # type: List[Colour]
def extract_image_colour_names(image_path: str) -> List[Colour]:
dcd_hsl = extract_hsl_dcd(image_path)
print({"dcd_hsl": dcd_hsl})
color_gram_hsl = extract_hsl_color_gram(image_path)
print({"color_gram_hsl": color_gram_hsl})
color_thief_hsl = extract_hsl_color_thief(image_path)
print({"color_thief_hsl": color_thief_hsl})
dcd_names = hsl_list_to_colour_names(dcd_hsl)
print({"dcd_names": dcd_names})
color_gram_names = hsl_list_to_colour_names(dcd_hsl)
print({"color_gram_names": color_gram_names})
color_thief_names = hsl_list_to_colour_names(color_thief_hsl)
print({"color_thief_names": color_thief_names})
merged_names = uniq(dcd_names + color_gram_names + color_thief_names)
print({"merged_names": merged_names})
names_colourful = [n for n in merged_names if n not in GREYSCALE]
names_greyscale = [n for n in merged_names if n in GREYSCALE]
main_colours = names_colourful[:3]
fill = 3 - len(main_colours)
main_colours += names_greyscale[0:fill]
print({"main_colours": main_colours})
return main_colours
def reset_randomness():
random.seed(23)
numpy.random.seed(23)
def uniq(items) -> List:
return list(OrderedDict.fromkeys(items))
def extract_hsl_dcd(image_path: str) -> List[HSL]:
reset_randomness()
return sorted(
uniq(
hex_to_hsl(h)
for h in dominant_color_detection.detect_colors(image_path, 6)[0]
),
key=lambda hsl: hsl.hue,
)
def extract_hsl_color_gram(image_path: str) -> List[HSL]:
reset_randomness()
return uniq(
[
HSL(c.hsl[0], (c.hsl[1] / 256) * 100, (c.hsl[2] / 256) * 100)
for c in colorgram.extract(image_path, 6)
]
)
def extract_hsl_color_thief(image_path: str) -> List[HSL]:
reset_randomness()
color_thief = colorthief.ColorThief(image_path)
return sorted(
[rgb_256_to_hsl(rgb) for rgb in color_thief.get_palette(color_count=6)],
key=lambda hsl: hsl.hue,
)
def hsl_list_to_colour_names(hsl_list: List[HSL]) -> List[str]:
return uniq([name_hsl_colour(h) for h in hsl_list])
def hex_to_hsl(hex_code: str) -> HSL:
hsl = Color(hex_code).hsl
return HSL(hsl[0] * 360, hsl[1] * 100, hsl[2] * 100)
def rgb_256_to_hsl(rgb: Tuple[float, float, float]) -> HSL:
hsl = Color(rgb=(rgb[0] / 256, rgb[1] / 256, rgb[2] / 256)).hsl
return HSL(hsl[0] * 360, hsl[1] * 100, hsl[2] * 100)
def name_hsl_colour(hsl: HSL) -> Colour:
if hsl.sat <= 50 and hsl.lit >= 90:
return Colour.WHITE
if hsl.lit <= 13:
return Colour.BLACK
if hsl.hue <= 70 and hsl.lit >= 85:
return Colour.BEIGE
if hsl.sat <= 13:
return Colour.GREY
if hsl.lit >= 70 and hsl.sat <= 20:
return Colour.GREY
if hsl.hue <= 5:
if hsl.lit <= 30:
return Colour.BROWN
if hsl.lit >= 70:
return Colour.PINK
return Colour.RED
if hsl.hue <= 40:
if hsl.lit <= 30:
return Colour.BROWN
return Colour.ORANGE
if hsl.hue <= 70:
return Colour.YELLOW
if hsl.hue <= 150:
return Colour.GREEN
if hsl.hue <= 180:
return Colour.TURQUOISE
if hsl.hue <= 260:
if hsl.sat <= 40:
return Colour.PURPLE
return Colour.BLUE
if hsl.hue <= 290:
return Colour.PURPLE
if hsl.hue <= 330:
return Colour.PINK
if hsl.hue <= 340:
if hsl.lit <= 35:
return Colour.PURPLE
return Colour.PINK
return Colour.RED
We use this to produce use colour taxonomies for our wall art prints shop, which allows having taxonomy pages like this one for blue wall art poster prints.