Practical 7: 2D Correlation & Convolution

Run this practical in Google Colab

1. Aim

Aim

To implement and analyze the two fundamental linear spatial operations of digital image processing — correlation and convolution — from first principles, demonstrate the impulse-response interpretation that distinguishes them, and apply standard kernels to a real image.

2. Description / Theory

Theory: For an image $f$ and kernel $w$ of size $m \times n$ (with $m=2a+1, n=2b+1$):

Correlation: $ (w \star f)(x,y) = \sum_{s=-a}^{a}\sum_{t=-b}^{b} w(s,t)\,f(x+s,y+t) $
Convolution: $ (w * f)(x,y) = \sum_{s=-a}^{a}\sum_{t=-b}^{b} w(s,t)\,f(x-s,y-t) $
Identity: $ w * f \;=\; \text{rot}_{180}(w) \star f $

At a unit impulse the correlation places $\text{rot}_{180}(w)$ and the convolution places $w$. The two operations coincide only when the kernel is symmetric under 180° rotation.

$$ (w * f)(x,y) \;=\; \text{rot}_{180}(w) \;\star\; f $$

3. Code



# ----------------------------------------------------------------------

# Install dependencies (uncomment for Google Colab)
# !pip install opencv-python-headless matplotlib numpy

import cv2
import numpy as np
import matplotlib.pyplot as plt
import os

# === AUTO-DOWNLOAD DATASET (works in Google Colab and locally) ===
import os, urllib.request, zipfile

CHAPTER = "CH02"  # Chapter 2: Digital Image Fundamentals
DATASET_PATH = f"datasets/{CHAPTER}/"
DOWNLOAD_BASE = "https://www.imageprocessingplace.com/downloads_V3/dip3e_downloads/dip3e_book_images"

if not os.path.exists(DATASET_PATH) or not any(f.endswith('.tif') for f in os.listdir(DATASET_PATH)):
    zip_name = f"DIP3E_{CHAPTER}_Original_Images.zip"
    url = f"{DOWNLOAD_BASE}/{zip_name}"
    print(f"Downloading {CHAPTER} dataset from imageprocessingplace.com...")
    urllib.request.urlretrieve(url, "chapter.zip")
    os.makedirs(DATASET_PATH, exist_ok=True)
    with zipfile.ZipFile("chapter.zip", "r") as z:
        for f in z.namelist():
            if f.lower().endswith(".tif"):
                fname = os.path.basename(f)
                if fname:
                    with z.open(f) as src, open(os.path.join(DATASET_PATH, fname), "wb") as dst:
                        dst.write(src.read())
    os.remove("chapter.zip")
    print(f"Downloaded {len([f for f in os.listdir(DATASET_PATH) if f.endswith('.tif')])} images")
else:
    print(f"Dataset ready: {len([f for f in os.listdir(DATASET_PATH) if f.endswith('.tif')])} images")

def correlate2d(f, w, mode='same'):
    """Direct 2D correlation: (w * f)(x,y) = sum_{s,t} w(s,t) f(x+s, y+t).

    mode = 'same' returns an array of the same size as f.
    mode = 'full' returns the full sliding extent (M+m-1, N+n-1).
    """
    f = np.asarray(f, dtype=np.float64)
    w = np.asarray(w, dtype=np.float64)
    fh, fw = f.shape
    wh, ww = w.shape
    a, b = wh // 2, ww // 2
    padded = np.pad(f, ((a, a), (b, b)), mode='constant', constant_values=0)
    if mode == 'same':
        out = np.zeros((fh, fw), dtype=np.float64)
        for x in range(fh):
            for y in range(fw):
                out[x, y] = np.sum(w * padded[x:x+wh, y:y+ww])
        return out, padded
    elif mode == 'full':
        big = np.pad(f, ((wh-1, wh-1), (ww-1, ww-1)), mode='constant', constant_values=0)
        H, W = fh + wh - 1, fw + ww - 1
        out = np.zeros((H, W), dtype=np.float64)
        for x in range(H):
            for y in range(W):
                out[x, y] = np.sum(w * big[x:x+wh, y:y+ww])
        return out, padded
    else:
        raise ValueError("mode must be 'same' or 'full'")

def convolve2d(f, w, mode='same'):
    """Direct 2D convolution = correlation with w rotated 180 degrees."""
    w_rot = np.rot90(w, 2)
    return correlate2d(f, w_rot, mode=mode)

print('correlate2d and convolve2d defined.')

f = np.array([
    [0, 0, 0],
    [0, 1, 0],
    [0, 0, 0],
])

w = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9],
])

corr_same, padded_f = correlate2d(f, w, mode='same')
conv_same, _        = convolve2d(f, w, mode='same')
corr_full, _        = correlate2d(f, w, mode='full')
conv_full, _        = convolve2d(f, w, mode='full')

print('--- Inputs ---')
print('Image f =\n', f)
print('Kernel w =\n', w)
print('Rotated kernel rot180(w) =\n', np.rot90(w, 2))
print('Zero-padded f =\n', padded_f.astype(int))

print('\n--- Correlation (w * f) ---')
print('same :\n', corr_same.astype(int))
print('full :\n', corr_full.astype(int))

print('\n--- Convolution (w (*) f) ---')
print('same :\n', conv_same.astype(int))
print('full :\n', conv_full.astype(int))

print('\nObservation: at the impulse, correlation reproduces rot180(w);')
print('             convolution reproduces w itself.')

fig, axes = plt.subplots(2, 3, figsize=(13, 8))

panels_top = [
    ('Image f (impulse)', f),
    ('Kernel w', w),
    ('rot180(w)', np.rot90(w, 2)),
]
panels_bot = [
    ('Padded f', padded_f.astype(int)),
    ('Correlation (same)', corr_same.astype(int)),
    ('Convolution (same)', conv_same.astype(int)),
]

for ax, (title, mat) in zip(axes[0], panels_top):
    im = ax.imshow(mat, cmap='viridis')
    ax.set_title(title)
    for (i, j), v in np.ndenumerate(mat):
        ax.text(j, i, str(int(v)), ha='center', va='center', color='white', fontsize=11, fontweight='bold')
    ax.set_xticks([]); ax.set_yticks([])

for ax, (title, mat) in zip(axes[1], panels_bot):
    im = ax.imshow(mat, cmap='viridis')
    ax.set_title(title)
    for (i, j), v in np.ndenumerate(mat):
        ax.text(j, i, str(int(v)), ha='center', va='center', color='white', fontsize=10, fontweight='bold')
    ax.set_xticks([]); ax.set_yticks([])

plt.suptitle('Impulse Response: Correlation vs Convolution', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

selected_image = "Fig0222(b)(cameraman).tif"
img = cv2.imread(os.path.join(DATASET_PATH, selected_image), cv2.IMREAD_GRAYSCALE)

print(f"Image:  {selected_image}")
print(f"Shape:  {img.shape}")
print(f"Range:  [{img.min()}, {img.max()}]")

# Standard kernels
box = np.ones((3, 3), dtype=np.float64) / 9.0
lap = np.array([[ 0, -1,  0],
                [-1,  4, -1],
                [ 0, -1,  0]], dtype=np.float64)
sobel_x = np.array([[-1, 0, 1],
                    [-2, 0, 2],
                    [-1, 0, 1]], dtype=np.float64)

out_box, _ = convolve2d(img.astype(np.float64), box,     mode='same')
out_lap, _ = convolve2d(img.astype(np.float64), lap,     mode='same')
out_sob, _ = convolve2d(img.astype(np.float64), sobel_x, mode='same')

def to_uint8(a):
    a = a - a.min()
    a = 255.0 * a / max(a.max(), 1e-9)
    return a.astype(np.uint8)

fig, axes = plt.subplots(1, 4, figsize=(18, 5))
axes[0].imshow(img, cmap='gray', vmin=0, vmax=255); axes[0].set_title('Original'); axes[0].axis('off')
axes[1].imshow(np.clip(out_box, 0, 255).astype(np.uint8), cmap='gray', vmin=0, vmax=255)
axes[1].set_title('Box 3x3 (smoothing)'); axes[1].axis('off')
axes[2].imshow(to_uint8(out_lap), cmap='gray'); axes[2].set_title('Laplacian (edges, normalised)'); axes[2].axis('off')
axes[3].imshow(to_uint8(out_sob), cmap='gray'); axes[3].set_title('Sobel-X (vertical edges, normalised)'); axes[3].axis('off')
plt.suptitle('Manual convolve2d: Three Standard Kernels on Cameraman', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

kernel = sobel_x  # asymmetric, so corr != conv

manual_conv, _ = convolve2d(img.astype(np.float64), kernel, mode='same')
cv_conv = cv2.filter2D(img.astype(np.float64), ddepth=-1,
                       kernel=np.rot90(kernel, 2),
                       borderType=cv2.BORDER_CONSTANT)
manual_corr, _ = correlate2d(img.astype(np.float64), kernel, mode='same')
cv_corr = cv2.filter2D(img.astype(np.float64), ddepth=-1,
                       kernel=kernel,
                       borderType=cv2.BORDER_CONSTANT)

print(f"max |manual_conv - cv_conv| = {np.max(np.abs(manual_conv - cv_conv)):.6f}")
print(f"max |manual_corr - cv_corr| = {np.max(np.abs(manual_corr - cv_corr)):.6f}")
print(f"max |conv - corr|           = {np.max(np.abs(manual_conv - manual_corr)):.6f}")
print("\nA non-zero conv-vs-corr gap confirms the kernel is asymmetric.")

Part 1: Impulse Response Demonstration

Place a unit impulse at the centre of a $3\times3$ image and operate with a $3\times3$ ramp kernel. The two outputs differ by a 180° rotation of $w$ at the impulse position.

Part 2: Custom Matrices

Provide your own $f$ and $w$ matrices (rows of space-separated numbers, one row per line). The kernel must have odd dimensions so it has a unique centre.

Image f

Kernel w (odd dimensions)

Part 3: Standard Kernels on a Real Image

Apply a $3\times3$ box (averaging), a Laplacian (sharpening edge detector), and a Sobel-X (vertical edge detector) to a real grayscale image via convolution.

Part 4: Verification — corr ≠ conv for an Asymmetric Kernel

OpenCV's filter2D performs correlation. With the asymmetric Sobel-X kernel the correlation and convolution outputs differ; the difference image confirms the kernel is not symmetric under 180° rotation.

4. Output

5. Analysis / Conclusion

Analysis Questions

Why does correlation produce a flipped copy of $w$ when $f$ is a unit impulse, while convolution produces $w$ itself?
For which kernels are correlation and convolution numerically identical? Give two examples and one counter-example.
What is the role of zero padding, and what alternatives (replicate, reflect, wrap) might you prefer in different contexts?
What is the computational cost of a direct $m\times n$ convolution on an $M\times N$ image, and how is it reduced via separability and FFT?