add initial camera calibration

This commit is contained in:
Ethan Clark 2025-03-25 16:42:45 -07:00
parent c1fb0c6df2
commit cd4464357a
2 changed files with 214 additions and 1 deletions

View File

@ -0,0 +1,214 @@
import cv2
import numpy as np
import mediapipe as mp
from collections import deque
# Convert 3D robot positions to 2D screen positions
def robot_to_screen_pos(robot_pos: list, width: int, height: int) -> tuple:
center_x, center_y = width // 2, height // 2
scale_factor = min(width, height) / 2
# Y-axis maps to horizontal
screen_x = int(center_x + robot_pos[1] * scale_factor)
# Z-axis maps to vertical (inverted)
screen_y = int(center_y - robot_pos[2] * scale_factor)
return screen_x, screen_y
# Return 3D hand position from MediaPipe landmarks
def get_hand_pos(landmarks, width: int, height: int):
# Use thumb tip and index finger tip
thumb_tip = landmarks.landmark[4] # Thumb tip index
index_tip = landmarks.landmark[8] # Index finger tip index
# Convert to 3D position
thumb_pos = np.array(
[thumb_tip.x * width, thumb_tip.y * height, thumb_tip.z * 100]
) # Rough scaling
index_pos = np.array([index_tip.x * width, index_tip.y * height, index_tip.z * 100])
# Midpoint between thumb and index finger
position = (thumb_pos + index_pos) / 2
return position
# Check if hand position is stable at current location
def is_hand_stable(
position, history: deque, frames: int = 50, threshold: float = 8.0
) -> bool:
history.append(position)
if len(history) < frames:
return False
positions = np.array(history)
max_movement = np.max(np.linalg.norm(positions - positions[-1], axis=1))
return max_movement < threshold
def calibrate_camera() -> np.ndarray:
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(
model_complexity=1,
max_num_hands=1,
static_image_mode=False,
)
# Robot frame positions
reference_positions = [
[0.3, 0.0, 0.4], # Center
[0.3, 0.2, 0.4], # Right
[0.3, -0.2, 0.4], # Left
[0.3, 0.0, 0.6], # Higher
]
cap = cv2.VideoCapture(0)
# Get camera dimensions
success, frame = cap.read()
assert success, "Failed to access camera"
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
position_history = deque(maxlen=15)
current_target = 0
camera_positions = []
calibration_complete = False
print("Automatic calibration starting.")
print("Please move your hand to the highlighted positions on screen.")
while not calibration_complete:
success, frame = cap.read()
if not success:
continue
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = hands.process(frame_rgb)
target_position = reference_positions[current_target]
screen_target = robot_to_screen_pos(target_position, width, height)
# Draw target
cv2.circle(frame, screen_target, 50, (0, 255, 0), 2)
cv2.circle(frame, screen_target, 10, (0, 255, 0), -1)
cv2.putText(
frame,
f"Target {current_target+1}: Move hand here",
(screen_target[0] - 100, screen_target[1] - 20),
cv2.FONT_HERSHEY_SIMPLEX,
0.7,
(0, 255, 0),
2,
)
hand_position = None
if results.multi_hand_landmarks:
hand_landmarks = results.multi_hand_landmarks[0] # Just use the first hand
mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
hand_position = get_hand_pos(hand_landmarks)
# Draw hand position indicator
hand_x, hand_y = int(hand_position[0]), int(hand_position[1])
cv2.circle(frame, (hand_x, hand_y), 15, (255, 0, 0), -1)
# Check if hand is close to target (in 2D screen space for simplicity)
distance = np.sqrt(
(hand_x - screen_target[0]) ** 2 + (hand_y - screen_target[1]) ** 2
)
if distance < 50: # Threshold in pixels
cv2.putText(
frame,
"Hold position steady...",
(50, 50),
cv2.FONT_HERSHEY_SIMPLEX,
1.0,
(0, 0, 255),
2,
)
if is_hand_stable(hand_position, position_history):
camera_positions.append(hand_position)
print(f"Position {current_target+1} recorded!")
# Move to next target and reset history
current_target += 1
position_history.clear()
if current_target >= len(reference_positions):
calibration_complete = True
else:
print(f"Please move to position {current_target+1}")
cv2.putText(
frame,
f"Calibrating: {current_target}/{len(reference_positions)} positions",
(10, height - 20),
cv2.FONT_HERSHEY_SIMPLEX,
0.7,
(255, 255, 255),
2,
)
cv2.imshow("Calibration", frame)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
cap.release()
cv2.destroyAllWindows()
if not calibration_complete:
print("Calibration aborted.")
return None
robot_points = np.array(reference_positions)
robot_centroid = np.mean(robot_points, axis=0)
robot_centered = robot_points - robot_centroid
camera_points = np.array(camera_positions)
camera_centroid = np.mean(camera_points, axis=0)
camera_centered = camera_points - camera_centroid
# Find rotation using SVD
H = np.dot(camera_centered.T, robot_centered)
U, S, Vt = np.linalg.svd(H)
rotation = np.dot(U, Vt)
# Check that it's a proper rotation matrix
if np.linalg.det(rotation) < 0:
Vt[-1, :] *= -1
rotation = np.dot(U, Vt)
translation = robot_centroid - np.dot(rotation, camera_centroid)
# Create homogeneous transform matrix
transform = np.eye(4)
transform[:3, :3] = rotation
transform[:3, 3] = translation
print("Calibration complete!")
np.save("camera_extrinsics.npy", transform)
return transform
if __name__ == "__main__":
transform = calibrate_camera()
if transform is not None:
print("Transform matrix:")
print(transform)
print("Saved to camera_to_robot_transform.npy")

View File

@ -1 +0,0 @@
# TODO: Implement camera calibration functions to extract camera extrinsics and intrinsics