Applied Data Analysis in Python

First we get the RGB data set:

In [1]:
import numpy as np
from pandas import Series, DataFrame
import pandas as pd
from skimage import io
from sklearn.cluster import KMeans

photo = io.imread("https://upload.wikimedia.org/wikipedia/commons/thumb/9/97/Swallow-tailed_bee-eater_%28Merops_hirundineus_chrysolaimus%29.jpg/768px-Swallow-tailed_bee-eater_%28Merops_hirundineus_chrysolaimus%29.jpg")

photo = np.array(photo, dtype=np.float64) / 255  # Scale values
w, h, d = original_shape = tuple(photo.shape)  # Get the current shape
image_array = np.reshape(photo, (w * h, d))  # Reshape to to 2D

pixels = DataFrame(image_array, columns=["Red", "Green", "Blue"])

pixels_sample = pixels.sample(frac=0.05)

Then we compute the L*a*b* dataset:

In [2]:
from skimage.color import rgb2lab

photo_lab = rgb2lab(photo)  # This is where we convert colour space
w, h, d = original_shape = tuple(photo_lab.shape)
image_array_lab = np.reshape(photo_lab, (w * h, d))

pixels_lab = DataFrame(image_array_lab, columns=["L", "a", "b"])

pixels_sample_lab = pixels_lab.sample(frac=0.05)

Then we normalise the two inertia values so we can compare them alongside each other:

In [3]:
kmeans = KMeans(n_clusters=1, n_init="auto").fit(pixels_sample[["Red", "Green", "Blue"]])
kmeans_lab = KMeans(n_clusters=1, n_init="auto").fit(pixels_sample_lab[["L", "a", "b"]])

norm = kmeans_lab.inertia_ / kmeans.inertia_

Then we loop over the number of clusters and calculate the inertia of each:

In [4]:
%matplotlib inline

inertia_values = []
r = pd.RangeIndex(2, 10)
for n_clusters in r:
    kmeans = KMeans(n_clusters=n_clusters, n_init="auto").fit(pixels_sample[["Red", "Green", "Blue"]])
    kmeans_lab = KMeans(n_clusters=n_clusters, n_init="auto").fit(pixels_sample_lab[["L", "a", "b"]])
    inertia_values.append((kmeans.inertia_, kmeans_lab.inertia_ / norm))

inertia = DataFrame(inertia_values, columns=["RGB", "L*a*b*"], index=r)
inertia.plot()
Out[4]:
<Axes: >
No description has been provided for this image