capture intrinsic camera parameters

This commit is contained in:
Ethan Clark 2025-03-26 14:35:34 -07:00
parent d7aa008d7a
commit 83d06a07be
2 changed files with 382 additions and 0 deletions

View File

@ -0,0 +1,121 @@
import numpy as np
def compute_line_equation(points: list) -> tuple:
"""Compute line equation from two points"""
pt1, pt2 = points[0], points[1]
slope = (pt2[1] - pt1[1]) / (pt2[0] - pt1[0] + 1e-10) # Avoid division by zero
intercept = pt2[1] - slope * pt2[0]
a, b, c = -slope, 1.0, -intercept
return a, b, c
def compute_point_of_intersection(line1: tuple, line2: tuple) -> tuple | None:
"""Compute intersection point of two lines"""
a1, b1, c1 = line1
a2, b2, c2 = line2
det = a1 * b2 - a2 * b1
# If lines are parallel, determinant will be zero
if abs(det) < 1e-10:
return None
x = (b1 * c2 - b2 * c1) / det
y = (a2 * c1 - a1 * c2) / det
return (x, y)
def compute_vanishing_point(points: list) -> tuple | None:
"""Compute vanishing point from two parallel lines by intersecting their line equations"""
a1, b1, c1 = compute_line_equation([points[0], points[1]])
a2, b2, c2 = compute_line_equation([points[2], points[3]])
return compute_point_of_intersection((a1, b1, c1), (a2, b2, c2))
def compute_K_from_vanishing_points(vanishing_points: list) -> np.ndarray:
"""
Compute camera intrinsic matrix K from orthogonal vanishing points.
Mathematical background:
- Vanishing points from orthogonal 3D directions provide constraints on the camera's
intrinsic parameters.
- If v_i and v_j are vanishing points from orthogonal 3D directions, then:
v_i^T * omega * v_j = 0, where omega = (K*K^T)^(-1)
- This gives us quadratic constraints on the elements of omega, which we solve
to recover K.
Args:
vanishing_points: List of at least 3 vanishing points corresponding to
orthogonal directions in 3D space.
Returns:
K: 3x3 camera intrinsic matrix
"""
# Each pair of orthogonal vanishing points provides one constraint on omega
A = []
for i, point_i in enumerate(vanishing_points):
for j, point_j in enumerate(vanishing_points):
if i != j and j > i: # Consider each pair only once
# Convert to homogeneous coordinates
p1 = np.array([point_i[0], point_i[1], 1.0])
p2 = np.array([point_j[0], point_j[1], 1.0])
# The constraint v_i^T * omega * v_j = 0 expands to:
# p1[0]*p2[0]*omega[0,0] + p1[0]*p2[1]*omega[0,1] + p1[0]*omega[0,2]*p2[2] +
# p1[1]*p2[0]*omega[1,0] + p1[1]*p2[1]*omega[1,1] + p1[1]*omega[1,2]*p2[2] +
# p1[2]*p2[0]*omega[2,0] + p1[2]*p2[1]*omega[2,1] + p1[2]*p2[2]*omega[2,2] = 0
# With zero-skew assumption (omega[0,1]=omega[1,0]=0) and
# square pixels (omega[0,0]=omega[1,1]), we get:
A.append(
[
p1[0] * p2[0]
+ p1[1] * p2[1], # Coefficient for omega[0,0]=omega[1,1]
p1[0] * p2[2]
+ p1[2] * p2[0], # Coefficient for omega[0,2]=omega[2,0]
p1[1] * p2[2]
+ p1[2] * p2[1], # Coefficient for omega[1,2]=omega[2,1]
p1[2] * p2[2], # Coefficient for omega[2,2]
]
)
A = np.array(A, dtype=np.float32)
# Solve the homogeneous system A*w = 0 using SVD
# The solution is the eigenvector corresponding to the smallest eigenvalue
_, _, vt = np.linalg.svd(A)
# Extract the coefficients from the last row of Vt (smallest eigenvalue)
w1, w4, w5, w6 = vt[
-1
] # These represent omega[0,0], omega[0,2], omega[1,2], omega[2,2]
# Reconstruct the omega matrix with enforced symmetry and square pixel constraints
# omega = (K*K^T)^(-1)
omega = np.array([[w1, 0.0, w4], [0.0, w1, w5], [w4, w5, w6]])
# K can be obtained by Cholesky factorization of inverse of omega:
# If omega = (K*K^T)^(-1), then K*K^T = omega^(-1)
# Cholesky gives us K^T, so we invert and transpose
try:
# Get L such that L*L^T = omega, then K^T = L^(-1)
K_transpose_inv = np.linalg.cholesky(omega)
K = np.linalg.inv(K_transpose_inv.T)
K = K / K[2, 2] # Normalize so K[2,2] = 1
except np.linalg.LinAlgError:
# If omega is not positive definite, find closest PD matrix
# This can happen due to noise in vanishing point estimation
eigvals, eigvecs = np.linalg.eigh(omega)
eigvals = np.maximum(eigvals, 1e-10) # Ensure positive eigenvalues
omega_pd = (
eigvecs @ np.diag(eigvals) @ eigvecs.T
) # Reconstruct with positive eigenvalues
K_transpose_inv = np.linalg.cholesky(omega_pd)
K = np.linalg.inv(K_transpose_inv.T)
K = K / K[2, 2] # Normalize so K[2,2] = 1
return K

View File

@ -0,0 +1,261 @@
import os
import re
import cv2
import time
import numpy as np
from datetime import datetime
from calibration_math import *
class PointCollector:
"""Collect points for vanishing point calibration"""
def __init__(self, image: np.ndarray) -> None:
self.image = image.copy()
self.display_image = image.copy()
self.height, self.width = image.shape[:2]
self.points = []
self.line_sets = []
self.n_line_sets = 3
self.current_points = []
self.colors = [
(0, 0, 255),
(0, 255, 0),
(255, 0, 0),
(255, 255, 0),
(255, 0, 255),
(0, 255, 255),
]
self.window_name = "Camera Calibration"
cv2.namedWindow(self.window_name)
cv2.setMouseCallback(self.window_name, self._mouse_callback)
def _mouse_callback(
self, event: int, x: int, y: int, flags: None, param: None
) -> None:
"""Mouse callback function for collecting points"""
if event == cv2.EVENT_LBUTTONDOWN:
if len(self.current_points) < 4:
self.current_points.append((x, y))
self._update_display()
def _update_display(self) -> None:
"""Update display with current points and instructions"""
self.display_image = self.image.copy()
# Draw instructions
instructions = [
"Click to place points for parallel lines",
"Press 'r' to reset current set",
"Press 'q' to quit",
]
for i, text in enumerate(instructions):
cv2.putText(
self.display_image,
text,
(10, 30 + i * 30),
cv2.FONT_HERSHEY_SIMPLEX,
0.7,
(255, 255, 255),
2,
)
for i, line_set in enumerate(self.line_sets):
self._draw_line_set(line_set, self.colors[i % len(self.colors)])
self._draw_line_set(
self.current_points, self.colors[len(self.line_sets) % len(self.colors)]
)
# Show current state
status = (
f"Line set {len(self.line_sets) + 1}: {len(self.current_points)}/4 points"
)
cv2.putText(
self.display_image,
status,
(10, self.height - 60),
cv2.FONT_HERSHEY_SIMPLEX,
0.7,
(255, 255, 255),
2,
)
cv2.imshow(self.window_name, self.display_image)
def _draw_line_set(self, line_set: list, color: tuple) -> None:
# Draw lines
if len(line_set) >= 2:
cv2.line(self.display_image, line_set[0], line_set[1], color, 2)
if len(line_set) >= 4:
cv2.line(self.display_image, line_set[2], line_set[3], color, 2)
# Draw points
for point in line_set:
cv2.circle(self.display_image, point, 5, color, -1)
def collect_points(self) -> None:
print("Collecting points for vanishing point calculation...")
while True:
self._update_display()
key = cv2.waitKey(1) & 0xFF
if key == ord("q"):
exit()
elif key == ord("r"):
self.current_points = []
elif key == ord("c"):
if len(self.current_points) == 4:
self.line_sets.append(self.current_points)
self.current_points = []
if len(self.line_sets) < 3:
print(
f"Need {self.n_line_sets} line sets, currently have {len(self.line_sets)}"
)
if len(self.line_sets) == self.n_line_sets:
break
if len(self.current_points) == 4:
self.line_sets.append(self.current_points)
self.current_points = []
cv2.destroyWindow(self.window_name)
def show_calibration_guidance() -> None:
"""Display guidance for vanishing point calibration"""
print("\n=== VANISHING POINT CALIBRATION GUIDANCE ===")
print(
"You'll need to identify 3 sets of parallel lines that are perpendicular to each other in the real world."
)
print("\nTIPS FOR ACCURATE CALIBRATION:")
print(
"1. Choose lines from truly orthogonal structures (walls/floor/ceiling intersections)"
)
print("2. Place points for each line as FAR APART as possible")
print("3. Use high-contrast edges for better precision")
print("4. Use structures that span a large portion of the image")
print("\nFor each direction:")
print("- First line: click 2 points along one edge")
print("- Second line: click 2 points along another parallel edge")
print("\nPress Enter to begin...")
input()
def capture_image(file_dir: str) -> str:
"""Capture image from camera for intrinsic calibration"""
cap = cv2.VideoCapture(0)
assert cap is not None, "Error: Could not open camera."
window_name = "Camera Calibration"
cv2.namedWindow(window_name)
print("Camera preview opened. Press SPACE to capture or ESC to cancel.")
print("Position your camera to capture a scene with clear orthogonal structures.")
print("Building interiors, boxes, or furniture work well.")
captured_frame = None
while True:
success, frame = cap.read()
if not success:
print("Error: Failed to capture frame.")
time.sleep(0.5)
cv2.imshow(window_name, frame)
key = cv2.waitKey(1) & 0xFF
if key == 27: # ESC key
print("Capture cancelled.")
break
elif key == 32: # SPACE key
captured_frame = frame.copy()
break
cap.release()
cv2.destroyWindow(window_name)
assert captured_frame is not None, "Error: No frame captured."
filename = "calibration_img.jpg"
filepath = os.path.join(file_dir, filename)
cv2.imwrite(filepath, captured_frame)
print(f"Image saved as {filepath}")
return filename
def calibrate_intrinsics() -> np.ndarray:
"""Calibrate camera intrinsic matrix using vanishing points"""
file_dir = os.path.dirname(os.path.abspath(__file__))
files = os.listdir(file_dir)
match = re.compile(r"calibration_img\.\w+")
# Look for existing calibration image in calibration directory
calibration_img = next((file for file in files if match.match(file)), None)
if calibration_img is None:
response = input(
"No calibration image found. Would you like to take a picture now? (y/n)"
)
if response.strip().lower() == "y":
calibration_img = capture_image(file_dir)
else:
print("Calibration cancelled.")
exit()
img_path = os.path.join(file_dir, calibration_img)
img = cv2.imread(img_path)
assert img is not None, f"Error: Could not load image '{img_path}'."
show_calibration_guidance()
point_collector = PointCollector(img)
point_collector.collect_points()
vanishing_points = []
for line_set in point_collector.line_sets:
try:
vp = compute_vanishing_point(line_set)
vanishing_points.append(vp)
except Exception as e:
print(f"Error computing vanishing point: {e}")
if len(vanishing_points) < 3:
print("Could not compute enough vanishing points.")
return None
try:
K = compute_K_from_vanishing_points(vanishing_points[:3])
print("Estimated Intrinsic Matrix:")
print(K)
except Exception as e:
print(f"Error computing intrinsic matrix: {e}")
return None
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
np_filename = f"intrinsics.npz"
file_path = os.path.join(file_dir, np_filename)
np.savez(
file_path,
K=K,
vanishing_points=vanishing_points,
img_path=img_path,
timestamp=timestamp,
)
print(f"Saved camera intrinsics to: {np_filename}")
return K
if __name__ == "__main__":
calibrate_intrinsics()