Practical 7: 2D Correlation & Convolution
Run this practical in Google Colab
Aim
To implement and analyze the two fundamental linear spatial operations of digital image processing — correlation and convolution — from first principles, demonstrate the impulse-response interpretation that distinguishes them, and apply standard kernels to a real image.
2. Description / Theory- Correlation: \( (w \star f)(x,y) = \sum_{s=-a}^{a}\sum_{t=-b}^{b} w(s,t)\,f(x+s,y+t) \)
- Convolution: \( (w * f)(x,y) = \sum_{s=-a}^{a}\sum_{t=-b}^{b} w(s,t)\,f(x-s,y-t) \)
- Identity: \( w * f \;=\; \text{rot}_{180}(w) \star f \)
# ----------------------------------------------------------------------
# Install dependencies (uncomment for Google Colab)
# !pip install opencv-python-headless matplotlib numpy
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
# === AUTO-DOWNLOAD DATASET (works in Google Colab and locally) ===
import os, urllib.request, zipfile
CHAPTER = "CH02" # Chapter 2: Digital Image Fundamentals
DATASET_PATH = f"datasets/{CHAPTER}/"
DOWNLOAD_BASE = "https://www.imageprocessingplace.com/downloads_V3/dip3e_downloads/dip3e_book_images"
if not os.path.exists(DATASET_PATH) or not any(f.endswith('.tif') for f in os.listdir(DATASET_PATH)):
zip_name = f"DIP3E_{CHAPTER}_Original_Images.zip"
url = f"{DOWNLOAD_BASE}/{zip_name}"
print(f"Downloading {CHAPTER} dataset from imageprocessingplace.com...")
urllib.request.urlretrieve(url, "chapter.zip")
os.makedirs(DATASET_PATH, exist_ok=True)
with zipfile.ZipFile("chapter.zip", "r") as z:
for f in z.namelist():
if f.lower().endswith(".tif"):
fname = os.path.basename(f)
if fname:
with z.open(f) as src, open(os.path.join(DATASET_PATH, fname), "wb") as dst:
dst.write(src.read())
os.remove("chapter.zip")
print(f"Downloaded {len([f for f in os.listdir(DATASET_PATH) if f.endswith('.tif')])} images")
else:
print(f"Dataset ready: {len([f for f in os.listdir(DATASET_PATH) if f.endswith('.tif')])} images")
def correlate2d(f, w, mode='same'):
"""Direct 2D correlation: (w * f)(x,y) = sum_{s,t} w(s,t) f(x+s, y+t).
mode = 'same' returns an array of the same size as f.
mode = 'full' returns the full sliding extent (M+m-1, N+n-1).
"""
f = np.asarray(f, dtype=np.float64)
w = np.asarray(w, dtype=np.float64)
fh, fw = f.shape
wh, ww = w.shape
a, b = wh // 2, ww // 2
padded = np.pad(f, ((a, a), (b, b)), mode='constant', constant_values=0)
if mode == 'same':
out = np.zeros((fh, fw), dtype=np.float64)
for x in range(fh):
for y in range(fw):
out[x, y] = np.sum(w * padded[x:x+wh, y:y+ww])
return out, padded
elif mode == 'full':
big = np.pad(f, ((wh-1, wh-1), (ww-1, ww-1)), mode='constant', constant_values=0)
H, W = fh + wh - 1, fw + ww - 1
out = np.zeros((H, W), dtype=np.float64)
for x in range(H):
for y in range(W):
out[x, y] = np.sum(w * big[x:x+wh, y:y+ww])
return out, padded
else:
raise ValueError("mode must be 'same' or 'full'")
def convolve2d(f, w, mode='same'):
"""Direct 2D convolution = correlation with w rotated 180 degrees."""
w_rot = np.rot90(w, 2)
return correlate2d(f, w_rot, mode=mode)
print('correlate2d and convolve2d defined.')
f = np.array([
[0, 0, 0],
[0, 1, 0],
[0, 0, 0],
])
w = np.array([
[1, 2, 3],
[4, 5, 6],
[7, 8, 9],
])
corr_same, padded_f = correlate2d(f, w, mode='same')
conv_same, _ = convolve2d(f, w, mode='same')
corr_full, _ = correlate2d(f, w, mode='full')
conv_full, _ = convolve2d(f, w, mode='full')
print('--- Inputs ---')
print('Image f =\n', f)
print('Kernel w =\n', w)
print('Rotated kernel rot180(w) =\n', np.rot90(w, 2))
print('Zero-padded f =\n', padded_f.astype(int))
print('\n--- Correlation (w * f) ---')
print('same :\n', corr_same.astype(int))
print('full :\n', corr_full.astype(int))
print('\n--- Convolution (w (*) f) ---')
print('same :\n', conv_same.astype(int))
print('full :\n', conv_full.astype(int))
print('\nObservation: at the impulse, correlation reproduces rot180(w);')
print(' convolution reproduces w itself.')
fig, axes = plt.subplots(2, 3, figsize=(13, 8))
panels_top = [
('Image f (impulse)', f),
('Kernel w', w),
('rot180(w)', np.rot90(w, 2)),
]
panels_bot = [
('Padded f', padded_f.astype(int)),
('Correlation (same)', corr_same.astype(int)),
('Convolution (same)', conv_same.astype(int)),
]
for ax, (title, mat) in zip(axes[0], panels_top):
im = ax.imshow(mat, cmap='viridis')
ax.set_title(title)
for (i, j), v in np.ndenumerate(mat):
ax.text(j, i, str(int(v)), ha='center', va='center', color='white', fontsize=11, fontweight='bold')
ax.set_xticks([]); ax.set_yticks([])
for ax, (title, mat) in zip(axes[1], panels_bot):
im = ax.imshow(mat, cmap='viridis')
ax.set_title(title)
for (i, j), v in np.ndenumerate(mat):
ax.text(j, i, str(int(v)), ha='center', va='center', color='white', fontsize=10, fontweight='bold')
ax.set_xticks([]); ax.set_yticks([])
plt.suptitle('Impulse Response: Correlation vs Convolution', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()
selected_image = "Fig0222(b)(cameraman).tif"
img = cv2.imread(os.path.join(DATASET_PATH, selected_image), cv2.IMREAD_GRAYSCALE)
print(f"Image: {selected_image}")
print(f"Shape: {img.shape}")
print(f"Range: [{img.min()}, {img.max()}]")
# Standard kernels
box = np.ones((3, 3), dtype=np.float64) / 9.0
lap = np.array([[ 0, -1, 0],
[-1, 4, -1],
[ 0, -1, 0]], dtype=np.float64)
sobel_x = np.array([[-1, 0, 1],
[-2, 0, 2],
[-1, 0, 1]], dtype=np.float64)
out_box, _ = convolve2d(img.astype(np.float64), box, mode='same')
out_lap, _ = convolve2d(img.astype(np.float64), lap, mode='same')
out_sob, _ = convolve2d(img.astype(np.float64), sobel_x, mode='same')
def to_uint8(a):
a = a - a.min()
a = 255.0 * a / max(a.max(), 1e-9)
return a.astype(np.uint8)
fig, axes = plt.subplots(1, 4, figsize=(18, 5))
axes[0].imshow(img, cmap='gray', vmin=0, vmax=255); axes[0].set_title('Original'); axes[0].axis('off')
axes[1].imshow(np.clip(out_box, 0, 255).astype(np.uint8), cmap='gray', vmin=0, vmax=255)
axes[1].set_title('Box 3x3 (smoothing)'); axes[1].axis('off')
axes[2].imshow(to_uint8(out_lap), cmap='gray'); axes[2].set_title('Laplacian (edges, normalised)'); axes[2].axis('off')
axes[3].imshow(to_uint8(out_sob), cmap='gray'); axes[3].set_title('Sobel-X (vertical edges, normalised)'); axes[3].axis('off')
plt.suptitle('Manual convolve2d: Three Standard Kernels on Cameraman', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()
kernel = sobel_x # asymmetric, so corr != conv
manual_conv, _ = convolve2d(img.astype(np.float64), kernel, mode='same')
cv_conv = cv2.filter2D(img.astype(np.float64), ddepth=-1,
kernel=np.rot90(kernel, 2),
borderType=cv2.BORDER_CONSTANT)
manual_corr, _ = correlate2d(img.astype(np.float64), kernel, mode='same')
cv_corr = cv2.filter2D(img.astype(np.float64), ddepth=-1,
kernel=kernel,
borderType=cv2.BORDER_CONSTANT)
print(f"max |manual_conv - cv_conv| = {np.max(np.abs(manual_conv - cv_conv)):.6f}")
print(f"max |manual_corr - cv_corr| = {np.max(np.abs(manual_corr - cv_corr)):.6f}")
print(f"max |conv - corr| = {np.max(np.abs(manual_conv - manual_corr)):.6f}")
print("\nA non-zero conv-vs-corr gap confirms the kernel is asymmetric.")
Part 1: Impulse Response Demonstration
Place a unit impulse at the centre of a \(3\times3\) image and operate with a \(3\times3\) ramp kernel. The two outputs differ by a 180° rotation of \(w\) at the impulse position.
Part 2: Custom Matrices
Provide your own \(f\) and \(w\) matrices (rows of space-separated numbers, one row per line). The kernel must have odd dimensions so it has a unique centre.
Part 3: Standard Kernels on a Real Image
Apply a \(3\times3\) box (averaging), a Laplacian (sharpening edge detector), and a Sobel-X (vertical edge detector) to a real grayscale image via convolution.
Part 4: Verification — corr ≠ conv for an Asymmetric Kernel
OpenCV's filter2D performs correlation. With the asymmetric Sobel-X kernel the correlation and convolution outputs differ; the difference image confirms the kernel is not symmetric under 180° rotation.
Analysis Questions
- Why does correlation produce a flipped copy of \(w\) when \(f\) is a unit impulse, while convolution produces \(w\) itself?
- For which kernels are correlation and convolution numerically identical? Give two examples and one counter-example.
- What is the role of zero padding, and what alternatives (replicate, reflect, wrap) might you prefer in different contexts?
- What is the computational cost of a direct \(m\times n\) convolution on an \(M\times N\) image, and how is it reduced via separability and FFT?