Extracting product image main colours in Python

The main colours of a product are often quite important data in e-commerce systems, and it’s nice to have an automated way to extract these.

Here’s a rough summary of what I ended doing in Python to get this data from product images.

It uses three different colour extraction libraries and merges the results from them, which is somewhat questionable but seems to provide better overall results than a single library alone.

The extracted colours are normalized to HSL tuples with a (360, 100%, 100%) structure. HSL makes it easy to look at the hue value to assign a named colour category. HSL also makes it easier to detect black, white, grey and beige colours.

#!/usr/bin/python3.8

import os
import random

from typing import Tuple, List
from collections import OrderedDict, namedtuple
from enum import Enum

import numpy

import dominant_color_detection
import colorgram
import colorthief

from colour import Color

# 360, 100, 100
HSL = namedtuple("HSL", ("hue", "sat", "lit"))


class Colour(Enum):
    BEIGE = "beige"
    WHITE = "white"
    BLACK = "black"
    GREY = "grey"
    BROWN = "brown"
    RED = "red"
    ORANGE = "orange"
    YELLOW = "yellow"
    GREEN = "green"
    TURQUOISE = "turquoise"
    BLUE = "blue"
    PURPLE = "purple"
    PINK = "pink"


GREYSCALE = [
    Colour.BLACK,
    Colour.WHITE,
    Colour.GREY,
    Colour.BEIGE,
]  # type: List[Colour]


def extract_image_colour_names(image_path: str) -> List[Colour]:
    dcd_hsl = extract_hsl_dcd(image_path)
    print({"dcd_hsl": dcd_hsl})

    color_gram_hsl = extract_hsl_color_gram(image_path)
    print({"color_gram_hsl": color_gram_hsl})

    color_thief_hsl = extract_hsl_color_thief(image_path)
    print({"color_thief_hsl": color_thief_hsl})

    dcd_names = hsl_list_to_colour_names(dcd_hsl)
    print({"dcd_names": dcd_names})

    color_gram_names = hsl_list_to_colour_names(dcd_hsl)
    print({"color_gram_names": color_gram_names})

    color_thief_names = hsl_list_to_colour_names(color_thief_hsl)
    print({"color_thief_names": color_thief_names})

    merged_names = uniq(dcd_names + color_gram_names + color_thief_names)
    print({"merged_names": merged_names})

    names_colourful = [n for n in merged_names if n not in GREYSCALE]
    names_greyscale = [n for n in merged_names if n in GREYSCALE]

    main_colours = names_colourful[:3]
    fill = 3 - len(main_colours)
    main_colours += names_greyscale[0:fill]

    print({"main_colours": main_colours})

    return main_colours


def reset_randomness():
    random.seed(23)
    numpy.random.seed(23)


def uniq(items) -> List:
    return list(OrderedDict.fromkeys(items))


def extract_hsl_dcd(image_path: str) -> List[HSL]:
    reset_randomness()
    return sorted(
        uniq(
            hex_to_hsl(h)
            for h in dominant_color_detection.detect_colors(image_path, 6)[0]
        ),
        key=lambda hsl: hsl.hue,
    )


def extract_hsl_color_gram(image_path: str) -> List[HSL]:
    reset_randomness()
    return uniq(
        [
            HSL(c.hsl[0], (c.hsl[1] / 256) * 100, (c.hsl[2] / 256) * 100)
            for c in colorgram.extract(image_path, 6)
        ]
    )


def extract_hsl_color_thief(image_path: str) -> List[HSL]:
    reset_randomness()
    color_thief = colorthief.ColorThief(image_path)
    return sorted(
        [rgb_256_to_hsl(rgb) for rgb in color_thief.get_palette(color_count=6)],
        key=lambda hsl: hsl.hue,
    )


def hsl_list_to_colour_names(hsl_list: List[HSL]) -> List[str]:
    return uniq([name_hsl_colour(h) for h in hsl_list])


def hex_to_hsl(hex_code: str) -> HSL:
    hsl = Color(hex_code).hsl
    return HSL(hsl[0] * 360, hsl[1] * 100, hsl[2] * 100)


def rgb_256_to_hsl(rgb: Tuple[float, float, float]) -> HSL:
    hsl = Color(rgb=(rgb[0] / 256, rgb[1] / 256, rgb[2] / 256)).hsl
    return HSL(hsl[0] * 360, hsl[1] * 100, hsl[2] * 100)


def name_hsl_colour(hsl: HSL) -> Colour:
    if hsl.sat <= 50 and hsl.lit >= 90:
        return Colour.WHITE
    if hsl.lit <= 13:
        return Colour.BLACK
    if hsl.hue <= 70 and hsl.lit >= 85:
        return Colour.BEIGE
    if hsl.sat <= 13:
        return Colour.GREY
    if hsl.lit >= 70 and hsl.sat <= 20:
        return Colour.GREY

    if hsl.hue <= 5:
        if hsl.lit <= 30:
            return Colour.BROWN
        if hsl.lit >= 70:
            return Colour.PINK
        return Colour.RED
    if hsl.hue <= 40:
        if hsl.lit <= 30:
            return Colour.BROWN
        return Colour.ORANGE
    if hsl.hue <= 70:
        return Colour.YELLOW
    if hsl.hue <= 150:
        return Colour.GREEN
    if hsl.hue <= 180:
        return Colour.TURQUOISE
    if hsl.hue <= 260:
        if hsl.sat <= 40:
            return Colour.PURPLE
        return Colour.BLUE
    if hsl.hue <= 290:
        return Colour.PURPLE
    if hsl.hue <= 330:
        return Colour.PINK

    if hsl.hue <= 340:
        if hsl.lit <= 35:
            return Colour.PURPLE
        return Colour.PINK

    return Colour.RED

We use this to produce use colour taxonomies for our wall art prints shop, which allows having taxonomy pages like this one for blue wall art poster prints.


Tech mentioned