From 83d06a07be201f8876a690ce5ef1b2a29b32002b Mon Sep 17 00:00:00 2001
From: Ethan Clark <eclark715@gmail.com>
Date: Wed, 26 Mar 2025 14:35:34 -0700
Subject: [PATCH] capture intrinsic camera parameters

---
 .../utils/calibration/calibration_math.py     | 121 ++++++++
 open_phantom/utils/calibration/intrinsic.py   | 261 ++++++++++++++++++
 2 files changed, 382 insertions(+)
 create mode 100644 open_phantom/utils/calibration/calibration_math.py
 create mode 100644 open_phantom/utils/calibration/intrinsic.py

diff --git a/open_phantom/utils/calibration/calibration_math.py b/open_phantom/utils/calibration/calibration_math.py
new file mode 100644
index 0000000..9f39868
--- /dev/null
+++ b/open_phantom/utils/calibration/calibration_math.py
@@ -0,0 +1,121 @@
+import numpy as np
+
+
+def compute_line_equation(points: list) -> tuple:
+    """Compute line equation from two points"""
+    pt1, pt2 = points[0], points[1]
+    slope = (pt2[1] - pt1[1]) / (pt2[0] - pt1[0] + 1e-10)  # Avoid division by zero
+    intercept = pt2[1] - slope * pt2[0]
+
+    a, b, c = -slope, 1.0, -intercept
+
+    return a, b, c
+
+
+def compute_point_of_intersection(line1: tuple, line2: tuple) -> tuple | None:
+    """Compute intersection point of two lines"""
+    a1, b1, c1 = line1
+    a2, b2, c2 = line2
+
+    det = a1 * b2 - a2 * b1
+    # If lines are parallel, determinant will be zero
+    if abs(det) < 1e-10:
+        return None
+
+    x = (b1 * c2 - b2 * c1) / det
+    y = (a2 * c1 - a1 * c2) / det
+
+    return (x, y)
+
+
+def compute_vanishing_point(points: list) -> tuple | None:
+    """Compute vanishing point from two parallel lines by intersecting their line equations"""
+    a1, b1, c1 = compute_line_equation([points[0], points[1]])
+    a2, b2, c2 = compute_line_equation([points[2], points[3]])
+
+    return compute_point_of_intersection((a1, b1, c1), (a2, b2, c2))
+
+
+def compute_K_from_vanishing_points(vanishing_points: list) -> np.ndarray:
+    """
+    Compute camera intrinsic matrix K from orthogonal vanishing points.
+
+    Mathematical background:
+    - Vanishing points from orthogonal 3D directions provide constraints on the camera's
+      intrinsic parameters.
+    - If v_i and v_j are vanishing points from orthogonal 3D directions, then:
+      v_i^T * omega * v_j = 0, where omega = (K*K^T)^(-1)
+    - This gives us quadratic constraints on the elements of omega, which we solve
+      to recover K.
+
+    Args:
+        vanishing_points: List of at least 3 vanishing points corresponding to
+                          orthogonal directions in 3D space.
+
+    Returns:
+        K: 3x3 camera intrinsic matrix
+    """
+    # Each pair of orthogonal vanishing points provides one constraint on omega
+    A = []
+    for i, point_i in enumerate(vanishing_points):
+        for j, point_j in enumerate(vanishing_points):
+            if i != j and j > i:  # Consider each pair only once
+                # Convert to homogeneous coordinates
+                p1 = np.array([point_i[0], point_i[1], 1.0])
+                p2 = np.array([point_j[0], point_j[1], 1.0])
+
+                # The constraint v_i^T * omega * v_j = 0 expands to:
+                # p1[0]*p2[0]*omega[0,0] + p1[0]*p2[1]*omega[0,1] + p1[0]*omega[0,2]*p2[2] +
+                # p1[1]*p2[0]*omega[1,0] + p1[1]*p2[1]*omega[1,1] + p1[1]*omega[1,2]*p2[2] +
+                # p1[2]*p2[0]*omega[2,0] + p1[2]*p2[1]*omega[2,1] + p1[2]*p2[2]*omega[2,2] = 0
+
+                # With zero-skew assumption (omega[0,1]=omega[1,0]=0) and
+                # square pixels (omega[0,0]=omega[1,1]), we get:
+                A.append(
+                    [
+                        p1[0] * p2[0]
+                        + p1[1] * p2[1],  # Coefficient for omega[0,0]=omega[1,1]
+                        p1[0] * p2[2]
+                        + p1[2] * p2[0],  # Coefficient for omega[0,2]=omega[2,0]
+                        p1[1] * p2[2]
+                        + p1[2] * p2[1],  # Coefficient for omega[1,2]=omega[2,1]
+                        p1[2] * p2[2],  # Coefficient for omega[2,2]
+                    ]
+                )
+
+    A = np.array(A, dtype=np.float32)
+
+    # Solve the homogeneous system A*w = 0 using SVD
+    # The solution is the eigenvector corresponding to the smallest eigenvalue
+    _, _, vt = np.linalg.svd(A)
+
+    # Extract the coefficients from the last row of Vt (smallest eigenvalue)
+    w1, w4, w5, w6 = vt[
+        -1
+    ]  # These represent omega[0,0], omega[0,2], omega[1,2], omega[2,2]
+
+    # Reconstruct the omega matrix with enforced symmetry and square pixel constraints
+    # omega = (K*K^T)^(-1)
+    omega = np.array([[w1, 0.0, w4], [0.0, w1, w5], [w4, w5, w6]])
+
+    # K can be obtained by Cholesky factorization of inverse of omega:
+    # If omega = (K*K^T)^(-1), then K*K^T = omega^(-1)
+    # Cholesky gives us K^T, so we invert and transpose
+    try:
+        # Get L such that L*L^T = omega, then K^T = L^(-1)
+        K_transpose_inv = np.linalg.cholesky(omega)
+        K = np.linalg.inv(K_transpose_inv.T)
+        K = K / K[2, 2]  # Normalize so K[2,2] = 1
+    except np.linalg.LinAlgError:
+        # If omega is not positive definite, find closest PD matrix
+        # This can happen due to noise in vanishing point estimation
+        eigvals, eigvecs = np.linalg.eigh(omega)
+        eigvals = np.maximum(eigvals, 1e-10)  # Ensure positive eigenvalues
+        omega_pd = (
+            eigvecs @ np.diag(eigvals) @ eigvecs.T
+        )  # Reconstruct with positive eigenvalues
+        K_transpose_inv = np.linalg.cholesky(omega_pd)
+        K = np.linalg.inv(K_transpose_inv.T)
+        K = K / K[2, 2]  # Normalize so K[2,2] = 1
+
+    return K
diff --git a/open_phantom/utils/calibration/intrinsic.py b/open_phantom/utils/calibration/intrinsic.py
new file mode 100644
index 0000000..986225b
--- /dev/null
+++ b/open_phantom/utils/calibration/intrinsic.py
@@ -0,0 +1,261 @@
+import os
+import re
+import cv2
+import time
+import numpy as np
+
+from datetime import datetime
+from calibration_math import *
+
+
+class PointCollector:
+    """Collect points for vanishing point calibration"""
+
+    def __init__(self, image: np.ndarray) -> None:
+        self.image = image.copy()
+        self.display_image = image.copy()
+
+        self.height, self.width = image.shape[:2]
+
+        self.points = []
+        self.line_sets = []
+        self.n_line_sets = 3
+        self.current_points = []
+        self.colors = [
+            (0, 0, 255),
+            (0, 255, 0),
+            (255, 0, 0),
+            (255, 255, 0),
+            (255, 0, 255),
+            (0, 255, 255),
+        ]
+
+        self.window_name = "Camera Calibration"
+        cv2.namedWindow(self.window_name)
+        cv2.setMouseCallback(self.window_name, self._mouse_callback)
+
+    def _mouse_callback(
+        self, event: int, x: int, y: int, flags: None, param: None
+    ) -> None:
+        """Mouse callback function for collecting points"""
+        if event == cv2.EVENT_LBUTTONDOWN:
+            if len(self.current_points) < 4:
+                self.current_points.append((x, y))
+                self._update_display()
+
+    def _update_display(self) -> None:
+        """Update display with current points and instructions"""
+        self.display_image = self.image.copy()
+
+        # Draw instructions
+        instructions = [
+            "Click to place points for parallel lines",
+            "Press 'r' to reset current set",
+            "Press 'q' to quit",
+        ]
+        for i, text in enumerate(instructions):
+            cv2.putText(
+                self.display_image,
+                text,
+                (10, 30 + i * 30),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                0.7,
+                (255, 255, 255),
+                2,
+            )
+
+        for i, line_set in enumerate(self.line_sets):
+            self._draw_line_set(line_set, self.colors[i % len(self.colors)])
+
+        self._draw_line_set(
+            self.current_points, self.colors[len(self.line_sets) % len(self.colors)]
+        )
+
+        # Show current state
+        status = (
+            f"Line set {len(self.line_sets) + 1}: {len(self.current_points)}/4 points"
+        )
+        cv2.putText(
+            self.display_image,
+            status,
+            (10, self.height - 60),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.7,
+            (255, 255, 255),
+            2,
+        )
+
+        cv2.imshow(self.window_name, self.display_image)
+
+    def _draw_line_set(self, line_set: list, color: tuple) -> None:
+        # Draw lines
+        if len(line_set) >= 2:
+            cv2.line(self.display_image, line_set[0], line_set[1], color, 2)
+        if len(line_set) >= 4:
+            cv2.line(self.display_image, line_set[2], line_set[3], color, 2)
+
+        # Draw points
+        for point in line_set:
+            cv2.circle(self.display_image, point, 5, color, -1)
+
+    def collect_points(self) -> None:
+        print("Collecting points for vanishing point calculation...")
+
+        while True:
+            self._update_display()
+            key = cv2.waitKey(1) & 0xFF
+
+            if key == ord("q"):
+                exit()
+            elif key == ord("r"):
+                self.current_points = []
+            elif key == ord("c"):
+                if len(self.current_points) == 4:
+                    self.line_sets.append(self.current_points)
+                    self.current_points = []
+
+                if len(self.line_sets) < 3:
+                    print(
+                        f"Need {self.n_line_sets} line sets, currently have {len(self.line_sets)}"
+                    )
+
+            if len(self.line_sets) == self.n_line_sets:
+                break
+
+            if len(self.current_points) == 4:
+                self.line_sets.append(self.current_points)
+                self.current_points = []
+
+        cv2.destroyWindow(self.window_name)
+
+
+def show_calibration_guidance() -> None:
+    """Display guidance for vanishing point calibration"""
+    print("\n=== VANISHING POINT CALIBRATION GUIDANCE ===")
+    print(
+        "You'll need to identify 3 sets of parallel lines that are perpendicular to each other in the real world."
+    )
+    print("\nTIPS FOR ACCURATE CALIBRATION:")
+    print(
+        "1. Choose lines from truly orthogonal structures (walls/floor/ceiling intersections)"
+    )
+    print("2. Place points for each line as FAR APART as possible")
+    print("3. Use high-contrast edges for better precision")
+    print("4. Use structures that span a large portion of the image")
+    print("\nFor each direction:")
+    print("- First line: click 2 points along one edge")
+    print("- Second line: click 2 points along another parallel edge")
+    print("\nPress Enter to begin...")
+    input()
+
+
+def capture_image(file_dir: str) -> str:
+    """Capture image from camera for intrinsic calibration"""
+    cap = cv2.VideoCapture(0)
+    assert cap is not None, "Error: Could not open camera."
+
+    window_name = "Camera Calibration"
+    cv2.namedWindow(window_name)
+
+    print("Camera preview opened. Press SPACE to capture or ESC to cancel.")
+    print("Position your camera to capture a scene with clear orthogonal structures.")
+    print("Building interiors, boxes, or furniture work well.")
+
+    captured_frame = None
+
+    while True:
+        success, frame = cap.read()
+        if not success:
+            print("Error: Failed to capture frame.")
+            time.sleep(0.5)
+
+        cv2.imshow(window_name, frame)
+
+        key = cv2.waitKey(1) & 0xFF
+
+        if key == 27:  # ESC key
+            print("Capture cancelled.")
+            break
+        elif key == 32:  # SPACE key
+            captured_frame = frame.copy()
+            break
+
+    cap.release()
+    cv2.destroyWindow(window_name)
+
+    assert captured_frame is not None, "Error: No frame captured."
+
+    filename = "calibration_img.jpg"
+    filepath = os.path.join(file_dir, filename)
+    cv2.imwrite(filepath, captured_frame)
+    print(f"Image saved as {filepath}")
+
+    return filename
+
+
+def calibrate_intrinsics() -> np.ndarray:
+    """Calibrate camera intrinsic matrix using vanishing points"""
+    file_dir = os.path.dirname(os.path.abspath(__file__))
+    files = os.listdir(file_dir)
+    match = re.compile(r"calibration_img\.\w+")
+
+    # Look for existing calibration image in calibration directory
+    calibration_img = next((file for file in files if match.match(file)), None)
+
+    if calibration_img is None:
+        response = input(
+            "No calibration image found. Would you like to take a picture now? (y/n)"
+        )
+        if response.strip().lower() == "y":
+            calibration_img = capture_image(file_dir)
+        else:
+            print("Calibration cancelled.")
+            exit()
+
+    img_path = os.path.join(file_dir, calibration_img)
+    img = cv2.imread(img_path)
+    assert img is not None, f"Error: Could not load image '{img_path}'."
+
+    show_calibration_guidance()
+    point_collector = PointCollector(img)
+    point_collector.collect_points()
+
+    vanishing_points = []
+    for line_set in point_collector.line_sets:
+        try:
+            vp = compute_vanishing_point(line_set)
+            vanishing_points.append(vp)
+        except Exception as e:
+            print(f"Error computing vanishing point: {e}")
+
+    if len(vanishing_points) < 3:
+        print("Could not compute enough vanishing points.")
+        return None
+
+    try:
+        K = compute_K_from_vanishing_points(vanishing_points[:3])
+        print("Estimated Intrinsic Matrix:")
+        print(K)
+    except Exception as e:
+        print(f"Error computing intrinsic matrix: {e}")
+        return None
+
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    np_filename = f"intrinsics.npz"
+    file_path = os.path.join(file_dir, np_filename)
+
+    np.savez(
+        file_path,
+        K=K,
+        vanishing_points=vanishing_points,
+        img_path=img_path,
+        timestamp=timestamp,
+    )
+
+    print(f"Saved camera intrinsics to: {np_filename}")
+
+    return K
+
+
+if __name__ == "__main__":
+    calibrate_intrinsics()