Practical 6: Histogram Matching

Practical 6: Histogram Matching & Specification

Run this practical in Google Colab

1. Aim

Objective

To understand and implement histogram matching (specification) — a generalization of histogram equalization that transforms an image so its histogram approximates a specified target histogram rather than a uniform one.

2. Description / Theory

Theory: Histogram equalization maps every image to a uniform distribution. Histogram matching goes further: given a source image and a target (reference) image, we transform the source so its intensity distribution matches the target's. The algorithm uses CDF inversion:

Compute $\text{CDF}_s(r)$ of the source image.
Compute $\text{CDF}_t(z)$ of the target image.
For each source intensity $r$, find $z$ such that $\text{CDF}_t(z) \approx \text{CDF}_s(r)$.
Build a lookup table: $\text{mapping}[r] = z$.

$$z = \text{CDF}_t^{-1}\!\bigl(\text{CDF}_s(r)\bigr)$$

Source Image

Select the source image whose histogram you want to transform.

3. Code



# ----------------------------------------------------------------------

# Install dependencies (uncomment for Google Colab)
# !pip install opencv-python-headless matplotlib numpy

import cv2
import numpy as np
import matplotlib.pyplot as plt
import os

# === AUTO-DOWNLOAD DATASET (works in Google Colab and locally) ===
import os, urllib.request, zipfile

# Choose which chapter to download (CH01-CH12)
CHAPTER = "CH02"  # Chapter 2: Digital Image Fundamentals
DATASET_PATH = f"datasets/{CHAPTER}/"
DOWNLOAD_BASE = "https://www.imageprocessingplace.com/downloads_V3/dip3e_downloads/dip3e_book_images"

if not os.path.exists(DATASET_PATH) or not any(f.endswith('.tif') for f in os.listdir(DATASET_PATH)):
    zip_name = f"DIP3E_{CHAPTER}_Original_Images.zip"
    url = f"{DOWNLOAD_BASE}/{zip_name}"
    print(f"Downloading {CHAPTER} dataset from imageprocessingplace.com...")
    urllib.request.urlretrieve(url, "chapter.zip")
    os.makedirs(DATASET_PATH, exist_ok=True)
    with zipfile.ZipFile("chapter.zip", "r") as z:
        for f in z.namelist():
            if f.lower().endswith(".tif"):
                fname = os.path.basename(f)
                if fname:
                    with z.open(f) as src, open(os.path.join(DATASET_PATH, fname), "wb") as dst:
                        dst.write(src.read())
    os.remove("chapter.zip")
    print(f"Downloaded {len([f for f in os.listdir(DATASET_PATH) if f.endswith('.tif')])} images")
else:
    print(f"Dataset ready: {len([f for f in os.listdir(DATASET_PATH) if f.endswith('.tif')])} images")

# List all available images
images = sorted([f for f in os.listdir(DATASET_PATH) if f.endswith('.tif')])
print(f"\nAvailable images ({len(images)}):")
for i, name in enumerate(images, 1):
    print(f"  {i}. {name}")

# === SOURCE IMAGE: Low-contrast Einstein ===
source_file = "Fig0241(a)(einstein low contrast).tif"
source = cv2.imread(os.path.join(DATASET_PATH, source_file), cv2.IMREAD_GRAYSCALE)

print(f"Source: {source_file}")
print(f"Shape: {source.shape}")
print(f"Intensity range: [{source.min()}, {source.max()}]")
print(f"Mean: {source.mean():.2f}, Std: {source.std():.2f}")

fig, axes = plt.subplots(1, 2, figsize=(12, 5))

axes[0].imshow(source, cmap='gray', vmin=0, vmax=255)
axes[0].set_title('Source Image (Low Contrast)')
axes[0].axis('off')

axes[1].hist(source.ravel(), bins=256, range=(0, 256), color='steelblue', alpha=0.85)
axes[1].set_title('Source Histogram')
axes[1].set_xlabel('Pixel Intensity')
axes[1].set_ylabel('Frequency')
axes[1].set_xlim(0, 255)

plt.suptitle(f'Source Image & Histogram: {source_file}', fontsize=13, fontweight='bold')
plt.tight_layout()
plt.show()

# Equalization baseline
equalized = cv2.equalizeHist(source)

print(f"Original  — Mean: {source.mean():.2f}, Std: {source.std():.2f}, Range: [{source.min()}, {source.max()}]")
print(f"Equalized — Mean: {equalized.mean():.2f}, Std: {equalized.std():.2f}, Range: [{equalized.min()}, {equalized.max()}]")

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

axes[0, 0].imshow(source, cmap='gray', vmin=0, vmax=255)
axes[0, 0].set_title('Original Image')
axes[0, 0].axis('off')

axes[0, 1].imshow(equalized, cmap='gray', vmin=0, vmax=255)
axes[0, 1].set_title('Histogram Equalized Image')
axes[0, 1].axis('off')

axes[1, 0].hist(source.ravel(), bins=256, range=(0, 256), color='steelblue', alpha=0.85)
axes[1, 0].set_title('Original Histogram')
axes[1, 0].set_xlabel('Pixel Intensity')
axes[1, 0].set_ylabel('Frequency')
axes[1, 0].set_xlim(0, 255)

axes[1, 1].hist(equalized.ravel(), bins=256, range=(0, 256), color='darkorange', alpha=0.85)
axes[1, 1].set_title('Equalized Histogram')
axes[1, 1].set_xlabel('Pixel Intensity')
axes[1, 1].set_ylabel('Frequency')
axes[1, 1].set_xlim(0, 255)

plt.suptitle('Equalization Baseline: Original vs Equalized', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

def histogram_match(source, target):
    """Match source histogram to target histogram using CDF mapping."""
    src_hist, _ = np.histogram(source.ravel(), 256, [0, 256])
    tgt_hist, _ = np.histogram(target.ravel(), 256, [0, 256])
    src_cdf = np.cumsum(src_hist).astype(np.float64)
    src_cdf /= src_cdf[-1]
    tgt_cdf = np.cumsum(tgt_hist).astype(np.float64)
    tgt_cdf /= tgt_cdf[-1]
    lut = np.zeros(256, dtype=np.uint8)
    for r in range(256):
        lut[r] = np.argmin(np.abs(tgt_cdf - src_cdf[r]))
    return lut[source]

# Load target image
target_file = "Fig0222(b)(cameraman).tif"
target = cv2.imread(os.path.join(DATASET_PATH, target_file), cv2.IMREAD_GRAYSCALE)

# Perform histogram matching
matched = histogram_match(source, target)

print(f"Source:  {source_file} — Mean: {source.mean():.2f}, Std: {source.std():.2f}")
print(f"Target:  {target_file} — Mean: {target.mean():.2f}, Std: {target.std():.2f}")
print(f"Matched: Mean: {matched.mean():.2f}, Std: {matched.std():.2f}")

fig, axes = plt.subplots(2, 3, figsize=(18, 10))

# Top row: images
axes[0, 0].imshow(source, cmap='gray', vmin=0, vmax=255)
axes[0, 0].set_title('Source (Low Contrast)')
axes[0, 0].axis('off')

axes[0, 1].imshow(target, cmap='gray', vmin=0, vmax=255)
axes[0, 1].set_title('Target (Cameraman)')
axes[0, 1].axis('off')

axes[0, 2].imshow(matched, cmap='gray', vmin=0, vmax=255)
axes[0, 2].set_title('Matched Result')
axes[0, 2].axis('off')

# Bottom row: histograms
axes[1, 0].hist(source.ravel(), bins=256, range=(0, 256), color='steelblue', alpha=0.85)
axes[1, 0].set_title('Source Histogram')
axes[1, 0].set_xlabel('Pixel Intensity')
axes[1, 0].set_ylabel('Frequency')
axes[1, 0].set_xlim(0, 255)

axes[1, 1].hist(target.ravel(), bins=256, range=(0, 256), color='forestgreen', alpha=0.85)
axes[1, 1].set_title('Target Histogram')
axes[1, 1].set_xlabel('Pixel Intensity')
axes[1, 1].set_ylabel('Frequency')
axes[1, 1].set_xlim(0, 255)

axes[1, 2].hist(matched.ravel(), bins=256, range=(0, 256), color='darkorange', alpha=0.85)
axes[1, 2].set_title('Matched Histogram')
axes[1, 2].set_xlabel('Pixel Intensity')
axes[1, 2].set_ylabel('Frequency')
axes[1, 2].set_xlim(0, 255)

plt.suptitle('Histogram Matching: Source → Target', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

# Load target images
high_contrast_file = "Fig0241(c)(einstein high contrast).tif"
cameraman_file = "Fig0222(b)(cameraman).tif"

high_contrast = cv2.imread(os.path.join(DATASET_PATH, high_contrast_file), cv2.IMREAD_GRAYSCALE)
cameraman = cv2.imread(os.path.join(DATASET_PATH, cameraman_file), cv2.IMREAD_GRAYSCALE)

# Match source to each target
matched_eq = histogram_match(source, equalized)  # Match to equalized (uniform-ish)
matched_high = histogram_match(source, high_contrast)  # Match to high contrast
matched_cam = histogram_match(source, cameraman)  # Match to cameraman

results = [
    ("Source", source),
    ("Matched \u2192 Equalized", matched_eq),
    ("Matched \u2192 High Contrast", matched_high),
    ("Matched \u2192 Cameraman", matched_cam),
]

fig, axes = plt.subplots(2, 4, figsize=(20, 10))

colors = ['steelblue', 'darkorange', 'crimson', 'forestgreen']

for col, (title, img) in enumerate(results):
    # Top row: images
    axes[0, col].imshow(img, cmap='gray', vmin=0, vmax=255)
    axes[0, col].set_title(title)
    axes[0, col].axis('off')

    # Bottom row: histograms
    axes[1, col].hist(img.ravel(), bins=256, range=(0, 256), color=colors[col], alpha=0.85)
    axes[1, col].set_title(f'{title} Histogram')
    axes[1, col].set_xlabel('Pixel Intensity')
    axes[1, col].set_ylabel('Frequency')
    axes[1, col].set_xlim(0, 255)

plt.suptitle('Multi-Target Histogram Matching Comparison', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

# Compute CDFs
src_hist, _ = np.histogram(source.ravel(), 256, [0, 256])
tgt_hist, _ = np.histogram(target.ravel(), 256, [0, 256])
matched_hist, _ = np.histogram(matched.ravel(), 256, [0, 256])

src_cdf = np.cumsum(src_hist).astype(np.float64)
src_cdf /= src_cdf[-1]
tgt_cdf = np.cumsum(tgt_hist).astype(np.float64)
tgt_cdf /= tgt_cdf[-1]
matched_cdf = np.cumsum(matched_hist).astype(np.float64)
matched_cdf /= matched_cdf[-1]

# Build the transfer function (LUT)
lut = np.zeros(256, dtype=np.uint8)
for r in range(256):
    lut[r] = np.argmin(np.abs(tgt_cdf - src_cdf[r]))

fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Plot 1: Source CDF vs Target CDF
axes[0].plot(range(256), src_cdf, color='steelblue', linewidth=2, label='Source CDF T(r)')
axes[0].plot(range(256), tgt_cdf, color='forestgreen', linewidth=2, label='Target CDF G(z)')
axes[0].set_title('CDF Comparison: Source vs Target')
axes[0].set_xlabel('Pixel Intensity')
axes[0].set_ylabel('Cumulative Probability')
axes[0].legend()
axes[0].set_xlim(0, 255)
axes[0].set_ylim(0, 1.05)
axes[0].grid(True, alpha=0.3)

# Plot 2: Transfer function (mapping)
axes[1].plot(range(256), lut, color='crimson', linewidth=2)
axes[1].plot([0, 255], [0, 255], 'k--', alpha=0.3, label='Identity')
axes[1].set_title('Transfer Function: LUT[r] = G\u207b\u00b9(T(r))')
axes[1].set_xlabel('Input Intensity (r)')
axes[1].set_ylabel('Output Intensity (z)')
axes[1].legend()
axes[1].set_xlim(0, 255)
axes[1].set_ylim(0, 255)
axes[1].set_aspect('equal')
axes[1].grid(True, alpha=0.3)

# Plot 3: Verification — Matched CDF vs Target CDF
axes[2].plot(range(256), tgt_cdf, color='forestgreen', linewidth=2, label='Target CDF G(z)')
axes[2].plot(range(256), matched_cdf, color='darkorange', linewidth=2, linestyle='--', label='Matched CDF')
axes[2].set_title('Verification: Matched CDF \u2248 Target CDF')
axes[2].set_xlabel('Pixel Intensity')
axes[2].set_ylabel('Cumulative Probability')
axes[2].legend()
axes[2].set_xlim(0, 255)
axes[2].set_ylim(0, 1.05)
axes[2].grid(True, alpha=0.3)

# Quantify CDF match quality
cdf_error = np.mean(np.abs(matched_cdf - tgt_cdf))
print(f"Mean absolute CDF error (matched vs target): {cdf_error:.6f}")

plt.suptitle('Transfer Function Analysis: Histogram Matching', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

Part 1: Source Histogram

Visualize the source image and its pixel intensity distribution.

Part 2: Equalization Baseline

Equalize the source as a baseline. Equalization is histogram matching where the target is a uniform distribution — the simplest special case.

Part 3: Histogram Matching

Select a target (reference) image. The source image's histogram will be transformed to match the target's distribution.

Part 4: Multi-Target Comparison

Match the same source to multiple targets simultaneously: the equalized version, a high-contrast image, and a bright image. This demonstrates how different target distributions reshape the same source.

Part 5: Transfer Function Analysis

Examine the CDF-based mapping used in histogram matching: source vs. target CDFs, the derived transfer function, and verification that the matched CDF approximates the target CDF.

4. Output

5. Analysis / Conclusion

Analysis Questions

How does histogram matching differ from histogram equalization? When would you prefer matching over equalization?
After matching, the result histogram approximates the target but isn't identical. Why? What limitations does the discrete nature of pixel intensities impose?
Compare the matched result when the target is a bright image vs. a dark image. How does the transfer function change?
If you match image A to B, then match the result to A, do you get back the original? Why or why not?