Practical 5: Histogram Equalization

Run this practical in Google Colab

1. Aim

Objective

To understand and implement histogram equalization — a technique that enhances image contrast by redistributing pixel intensities to approximate a uniform distribution.

2. Description / Theory

Theory: Histogram equalization uses the cumulative distribution function (CDF) of an image's intensity histogram as a transfer function. For an image with $L$ intensity levels (0 to $L{-}1$), the transformation maps each input intensity $r_k$ to output intensity $s_k$ using:

PDF: $p(r_k) = n_k / n$, where $n_k$ is the count of pixels with intensity $r_k$ and $n$ is the total pixel count.
CDF: $T(r_k) = \sum_{j=0}^{k} p(r_j)$, the cumulative sum of the PDF.
The output intensity: $s_k = (L{-}1) \cdot T(r_k)$, rounded to the nearest integer.

$$s_k = (L - 1) \cdot \sum_{j=0}^{k} \frac{n_j}{n}$$

3. Code



# ----------------------------------------------------------------------

# Install dependencies (uncomment for Google Colab)
# !pip install opencv-python-headless matplotlib numpy

import cv2
import numpy as np
import matplotlib.pyplot as plt
import os

# === AUTO-DOWNLOAD DATASET (works in Google Colab and locally) ===
import os, urllib.request, zipfile

# Choose which chapter to download (CH01-CH12)
CHAPTER = "CH02"  # Chapter 2: Digital Image Fundamentals
DATASET_PATH = f"datasets/{CHAPTER}/"
DOWNLOAD_BASE = "https://www.imageprocessingplace.com/downloads_V3/dip3e_downloads/dip3e_book_images"

if not os.path.exists(DATASET_PATH) or not any(f.endswith('.tif') for f in os.listdir(DATASET_PATH)):
    zip_name = f"DIP3E_{CHAPTER}_Original_Images.zip"
    url = f"{DOWNLOAD_BASE}/{zip_name}"
    print(f"Downloading {CHAPTER} dataset from imageprocessingplace.com...")
    urllib.request.urlretrieve(url, "chapter.zip")
    os.makedirs(DATASET_PATH, exist_ok=True)
    with zipfile.ZipFile("chapter.zip", "r") as z:
        for f in z.namelist():
            if f.lower().endswith(".tif"):
                fname = os.path.basename(f)
                if fname:
                    with z.open(f) as src, open(os.path.join(DATASET_PATH, fname), "wb") as dst:
                        dst.write(src.read())
    os.remove("chapter.zip")
    print(f"Downloaded {len([f for f in os.listdir(DATASET_PATH) if f.endswith('.tif')])} images")
else:
    print(f"Dataset ready: {len([f for f in os.listdir(DATASET_PATH) if f.endswith('.tif')])} images")

# List all available images
images = sorted([f for f in os.listdir(DATASET_PATH) if f.endswith('.tif')])
print(f"\nAvailable images ({len(images)}):")
for i, name in enumerate(images, 1):
    print(f"  {i}. {name}")

# === SELECT YOUR IMAGE HERE ===
selected_image = "Fig0241(a)(einstein low contrast).tif"  # Change to any image

img = cv2.imread(os.path.join(DATASET_PATH, selected_image), cv2.IMREAD_GRAYSCALE)

print(f"Image: {selected_image}")
print(f"Shape: {img.shape}")
print(f"Intensity range: [{img.min()}, {img.max()}]")
print(f"Mean: {img.mean():.2f}, Std: {img.std():.2f}")

fig, axes = plt.subplots(1, 2, figsize=(12, 5))

axes[0].imshow(img, cmap='gray', vmin=0, vmax=255)
axes[0].set_title('Original Image')
axes[0].axis('off')

axes[1].hist(img.ravel(), bins=256, range=(0, 256), color='steelblue', alpha=0.85)
axes[1].set_title('Original Histogram')
axes[1].set_xlabel('Pixel Intensity')
axes[1].set_ylabel('Frequency')
axes[1].set_xlim(0, 255)

plt.suptitle(f'Original Image & Histogram: {selected_image}', fontsize=13, fontweight='bold')
plt.tight_layout()
plt.show()

# Perform histogram equalization
equalized = cv2.equalizeHist(img)

print(f"Original  — Mean: {img.mean():.2f}, Std: {img.std():.2f}, Range: [{img.min()}, {img.max()}]")
print(f"Equalized — Mean: {equalized.mean():.2f}, Std: {equalized.std():.2f}, Range: [{equalized.min()}, {equalized.max()}]")

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

axes[0, 0].imshow(img, cmap='gray', vmin=0, vmax=255)
axes[0, 0].set_title('Original Image')
axes[0, 0].axis('off')

axes[0, 1].imshow(equalized, cmap='gray', vmin=0, vmax=255)
axes[0, 1].set_title('Histogram Equalized Image')
axes[0, 1].axis('off')

axes[1, 0].hist(img.ravel(), bins=256, range=(0, 256), color='steelblue', alpha=0.85)
axes[1, 0].set_title('Original Histogram')
axes[1, 0].set_xlabel('Pixel Intensity')
axes[1, 0].set_ylabel('Frequency')
axes[1, 0].set_xlim(0, 255)

axes[1, 1].hist(equalized.ravel(), bins=256, range=(0, 256), color='darkorange', alpha=0.85)
axes[1, 1].set_title('Equalized Histogram')
axes[1, 1].set_xlabel('Pixel Intensity')
axes[1, 1].set_ylabel('Frequency')
axes[1, 1].set_xlim(0, 255)

plt.suptitle(f'Histogram Equalization: {selected_image}', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

# Compute PDF and CDF for original image
hist_orig, bins = np.histogram(img.ravel(), bins=256, range=(0, 256))
pdf_orig = hist_orig / hist_orig.sum()
cdf_orig = np.cumsum(pdf_orig)

# Compute PDF and CDF for equalized image
hist_eq, _ = np.histogram(equalized.ravel(), bins=256, range=(0, 256))
pdf_eq = hist_eq / hist_eq.sum()
cdf_eq = np.cumsum(pdf_eq)

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Original PDF
axes[0, 0].bar(range(256), pdf_orig, color='steelblue', alpha=0.85, width=1.0)
axes[0, 0].set_title('Original PDF (Normalized Histogram)')
axes[0, 0].set_xlabel('Pixel Intensity')
axes[0, 0].set_ylabel('Probability')
axes[0, 0].set_xlim(0, 255)

# Transfer function (CDF scaled to 0-255)
axes[0, 1].plot(range(256), cdf_orig * 255, color='crimson', linewidth=2)
axes[0, 1].plot([0, 255], [0, 255], 'k--', alpha=0.3, label='Identity')
axes[0, 1].set_title('Transfer Function T(r) = CDF(r) \u00d7 (L\u22121)')
axes[0, 1].set_xlabel('Input Intensity (r)')
axes[0, 1].set_ylabel('Output Intensity (s)')
axes[0, 1].legend()
axes[0, 1].set_xlim(0, 255)
axes[0, 1].set_ylim(0, 255)
axes[0, 1].set_aspect('equal')
axes[0, 1].grid(True, alpha=0.3)

# Equalized PDF
axes[1, 0].bar(range(256), pdf_eq, color='darkorange', alpha=0.85, width=1.0)
axes[1, 0].set_title('Equalized PDF')
axes[1, 0].set_xlabel('Pixel Intensity')
axes[1, 0].set_ylabel('Probability')
axes[1, 0].set_xlim(0, 255)

# CDF comparison
axes[1, 1].plot(range(256), cdf_orig, color='steelblue', linewidth=2, label='Original CDF')
axes[1, 1].plot(range(256), cdf_eq, color='darkorange', linewidth=2, label='Equalized CDF')
axes[1, 1].plot([0, 255], [0, 1], 'k--', alpha=0.3, label='Ideal Uniform CDF')
axes[1, 1].set_title('CDF Comparison')
axes[1, 1].set_xlabel('Pixel Intensity')
axes[1, 1].set_ylabel('Cumulative Probability')
axes[1, 1].legend()
axes[1, 1].set_xlim(0, 255)
axes[1, 1].set_ylim(0, 1.05)
axes[1, 1].grid(True, alpha=0.3)

plt.suptitle('Histogram Equalization: Transfer Function & CDF Analysis', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

# Select multiple images for comparison
comparison_images = [
    "Fig0241(a)(einstein low contrast).tif",
    "Fig0222(b)(cameraman).tif",
    "Fig0240(a)(ct-skull-fracture).tif",
    "Fig0233(a)(galaxy-pair).tif",
]

# Filter to images that exist in the dataset
valid_images = [f for f in comparison_images if f in images]
n = len(valid_images)

fig, axes = plt.subplots(n, 2, figsize=(12, 5 * n))
if n == 1:
    axes = axes.reshape(1, -1)

for i, fname in enumerate(valid_images):
    orig = cv2.imread(os.path.join(DATASET_PATH, fname), cv2.IMREAD_GRAYSCALE)
    eq = cv2.equalizeHist(orig)

    axes[i, 0].imshow(orig, cmap='gray', vmin=0, vmax=255)
    axes[i, 0].set_title(f'Original: {fname[:35]}...')
    axes[i, 0].axis('off')

    axes[i, 1].imshow(eq, cmap='gray', vmin=0, vmax=255)
    axes[i, 1].set_title('Equalized')
    axes[i, 1].axis('off')

plt.suptitle('Histogram Equalization: Multi-Image Comparison', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

# Manual histogram equalization
L = 256  # Number of intensity levels
h, w = img.shape
n_pixels = h * w

# Step 1: Compute histogram (frequency of each intensity)
hist = np.zeros(L, dtype=np.int64)
for intensity in img.ravel():
    hist[intensity] += 1

# Step 2: Compute PDF (normalized histogram)
pdf = hist / n_pixels

# Step 3: Compute CDF (cumulative sum of PDF)
cdf = np.cumsum(pdf)

# Step 4: Build transfer function: s_k = round((L-1) * CDF(r_k))
transfer = np.round((L - 1) * cdf).astype(np.uint8)

# Step 5: Apply mapping to every pixel
manual_equalized = transfer[img]

# Verify: compare with OpenCV's result
cv2_equalized = cv2.equalizeHist(img)
diff = np.abs(manual_equalized.astype(int) - cv2_equalized.astype(int))
print(f"Max difference between manual and cv2: {diff.max()}")
print(f"Mean difference: {diff.mean():.4f}")

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

axes[0].imshow(img, cmap='gray', vmin=0, vmax=255)
axes[0].set_title('Original')
axes[0].axis('off')

axes[1].imshow(manual_equalized, cmap='gray', vmin=0, vmax=255)
axes[1].set_title('Manual Equalization')
axes[1].axis('off')

axes[2].imshow(cv2_equalized, cmap='gray', vmin=0, vmax=255)
axes[2].set_title('cv2.equalizeHist()')
axes[2].axis('off')

plt.suptitle('Manual vs OpenCV Histogram Equalization', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

Part 1: Original Image & Histogram

Load an image and visualize its pixel intensity histogram. The histogram shape reveals the contrast characteristics — a narrow cluster means low contrast, a wide spread means high contrast.

Part 2: Histogram Equalization

Apply histogram equalization using cv2.equalizeHist(). Compare the original and equalized images side by side with their histograms. Notice how the equalized histogram spreads pixel intensities across the full 0–255 range.

Part 3: Transfer Function & CDF Analysis

Examine the internals of histogram equalization: the original PDF, the CDF-based transfer function $T(r) = \text{CDF}(r) \times (L{-}1)$, the equalized PDF, and a comparison of original vs. equalized CDFs. The equalized CDF should approximate a straight diagonal line (uniform distribution).

Part 4: Multi-Image Comparison

Apply histogram equalization to multiple images simultaneously and compare the before/after results. This demonstrates that equalization adapts to each image's unique intensity distribution.

4. Output

5. Analysis / Conclusion

Analysis Questions

Compare the original and equalized histograms. Why does the equalized histogram approximate a uniform distribution? Is it perfectly uniform — why or why not?
Look at the CDF comparison plot. How does the equalized CDF differ from the original CDF? What would a perfectly uniform histogram's CDF look like?
Does histogram equalization always improve image quality? Consider an image that already has good contrast — what happens when you equalize it?
The transfer function is monotonically increasing. Why is this property necessary? What would happen if the mapping were not monotonic?