From cd4464357a810cd4638ee9352e0627c40bab9471 Mon Sep 17 00:00:00 2001 From: Ethan Clark Date: Tue, 25 Mar 2025 16:42:45 -0700 Subject: [PATCH] add initial camera calibration --- open_phantom/utils/calibration.py | 214 +++++++++++++++++++++++++++++ open_phantom/utils/calibrations.py | 1 - 2 files changed, 214 insertions(+), 1 deletion(-) create mode 100644 open_phantom/utils/calibration.py delete mode 100644 open_phantom/utils/calibrations.py diff --git a/open_phantom/utils/calibration.py b/open_phantom/utils/calibration.py new file mode 100644 index 0000000..9e85cc5 --- /dev/null +++ b/open_phantom/utils/calibration.py @@ -0,0 +1,214 @@ +import cv2 +import numpy as np +import mediapipe as mp + +from collections import deque + + +# Convert 3D robot positions to 2D screen positions +def robot_to_screen_pos(robot_pos: list, width: int, height: int) -> tuple: + center_x, center_y = width // 2, height // 2 + scale_factor = min(width, height) / 2 + + # Y-axis maps to horizontal + screen_x = int(center_x + robot_pos[1] * scale_factor) + # Z-axis maps to vertical (inverted) + screen_y = int(center_y - robot_pos[2] * scale_factor) + + return screen_x, screen_y + + +# Return 3D hand position from MediaPipe landmarks +def get_hand_pos(landmarks, width: int, height: int): + # Use thumb tip and index finger tip + thumb_tip = landmarks.landmark[4] # Thumb tip index + index_tip = landmarks.landmark[8] # Index finger tip index + + # Convert to 3D position + thumb_pos = np.array( + [thumb_tip.x * width, thumb_tip.y * height, thumb_tip.z * 100] + ) # Rough scaling + index_pos = np.array([index_tip.x * width, index_tip.y * height, index_tip.z * 100]) + + # Midpoint between thumb and index finger + position = (thumb_pos + index_pos) / 2 + + return position + + +# Check if hand position is stable at current location +def is_hand_stable( + position, history: deque, frames: int = 50, threshold: float = 8.0 +) -> bool: + history.append(position) + + if len(history) < frames: + return False + + positions = np.array(history) + max_movement = np.max(np.linalg.norm(positions - positions[-1], axis=1)) + + return max_movement < threshold + + +def calibrate_camera() -> np.ndarray: + mp_hands = mp.solutions.hands + mp_drawing = mp.solutions.drawing_utils + hands = mp_hands.Hands( + model_complexity=1, + max_num_hands=1, + static_image_mode=False, + ) + + # Robot frame positions + reference_positions = [ + [0.3, 0.0, 0.4], # Center + [0.3, 0.2, 0.4], # Right + [0.3, -0.2, 0.4], # Left + [0.3, 0.0, 0.6], # Higher + ] + + cap = cv2.VideoCapture(0) + + # Get camera dimensions + success, frame = cap.read() + assert success, "Failed to access camera" + + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + + position_history = deque(maxlen=15) + + current_target = 0 + camera_positions = [] + calibration_complete = False + + print("Automatic calibration starting.") + print("Please move your hand to the highlighted positions on screen.") + + while not calibration_complete: + success, frame = cap.read() + if not success: + continue + + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + results = hands.process(frame_rgb) + + target_position = reference_positions[current_target] + screen_target = robot_to_screen_pos(target_position, width, height) + + # Draw target + cv2.circle(frame, screen_target, 50, (0, 255, 0), 2) + cv2.circle(frame, screen_target, 10, (0, 255, 0), -1) + cv2.putText( + frame, + f"Target {current_target+1}: Move hand here", + (screen_target[0] - 100, screen_target[1] - 20), + cv2.FONT_HERSHEY_SIMPLEX, + 0.7, + (0, 255, 0), + 2, + ) + + hand_position = None + + if results.multi_hand_landmarks: + hand_landmarks = results.multi_hand_landmarks[0] # Just use the first hand + + mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS) + + hand_position = get_hand_pos(hand_landmarks) + + # Draw hand position indicator + hand_x, hand_y = int(hand_position[0]), int(hand_position[1]) + cv2.circle(frame, (hand_x, hand_y), 15, (255, 0, 0), -1) + + # Check if hand is close to target (in 2D screen space for simplicity) + distance = np.sqrt( + (hand_x - screen_target[0]) ** 2 + (hand_y - screen_target[1]) ** 2 + ) + + if distance < 50: # Threshold in pixels + cv2.putText( + frame, + "Hold position steady...", + (50, 50), + cv2.FONT_HERSHEY_SIMPLEX, + 1.0, + (0, 0, 255), + 2, + ) + + if is_hand_stable(hand_position, position_history): + camera_positions.append(hand_position) + print(f"Position {current_target+1} recorded!") + + # Move to next target and reset history + current_target += 1 + position_history.clear() + + if current_target >= len(reference_positions): + calibration_complete = True + else: + print(f"Please move to position {current_target+1}") + + cv2.putText( + frame, + f"Calibrating: {current_target}/{len(reference_positions)} positions", + (10, height - 20), + cv2.FONT_HERSHEY_SIMPLEX, + 0.7, + (255, 255, 255), + 2, + ) + + cv2.imshow("Calibration", frame) + + if cv2.waitKey(1) & 0xFF == ord("q"): + break + + cap.release() + cv2.destroyAllWindows() + + if not calibration_complete: + print("Calibration aborted.") + return None + + robot_points = np.array(reference_positions) + robot_centroid = np.mean(robot_points, axis=0) + robot_centered = robot_points - robot_centroid + + camera_points = np.array(camera_positions) + camera_centroid = np.mean(camera_points, axis=0) + camera_centered = camera_points - camera_centroid + + # Find rotation using SVD + H = np.dot(camera_centered.T, robot_centered) + U, S, Vt = np.linalg.svd(H) + rotation = np.dot(U, Vt) + + # Check that it's a proper rotation matrix + if np.linalg.det(rotation) < 0: + Vt[-1, :] *= -1 + rotation = np.dot(U, Vt) + + translation = robot_centroid - np.dot(rotation, camera_centroid) + + # Create homogeneous transform matrix + transform = np.eye(4) + transform[:3, :3] = rotation + transform[:3, 3] = translation + + print("Calibration complete!") + + np.save("camera_extrinsics.npy", transform) + + return transform + + +if __name__ == "__main__": + transform = calibrate_camera() + if transform is not None: + print("Transform matrix:") + print(transform) + print("Saved to camera_to_robot_transform.npy") diff --git a/open_phantom/utils/calibrations.py b/open_phantom/utils/calibrations.py deleted file mode 100644 index 8b53754..0000000 --- a/open_phantom/utils/calibrations.py +++ /dev/null @@ -1 +0,0 @@ -# TODO: Implement camera calibration functions to extract camera extrinsics and intrinsics