mirror of
https://github.com/luckyrobots/open_phantom.git
synced 2025-04-03 10:32:19 +00:00
add initial camera calibration
This commit is contained in:
parent
c1fb0c6df2
commit
cd4464357a
214
open_phantom/utils/calibration.py
Normal file
214
open_phantom/utils/calibration.py
Normal file
@ -0,0 +1,214 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
import mediapipe as mp
|
||||
|
||||
from collections import deque
|
||||
|
||||
|
||||
# Convert 3D robot positions to 2D screen positions
|
||||
def robot_to_screen_pos(robot_pos: list, width: int, height: int) -> tuple:
|
||||
center_x, center_y = width // 2, height // 2
|
||||
scale_factor = min(width, height) / 2
|
||||
|
||||
# Y-axis maps to horizontal
|
||||
screen_x = int(center_x + robot_pos[1] * scale_factor)
|
||||
# Z-axis maps to vertical (inverted)
|
||||
screen_y = int(center_y - robot_pos[2] * scale_factor)
|
||||
|
||||
return screen_x, screen_y
|
||||
|
||||
|
||||
# Return 3D hand position from MediaPipe landmarks
|
||||
def get_hand_pos(landmarks, width: int, height: int):
|
||||
# Use thumb tip and index finger tip
|
||||
thumb_tip = landmarks.landmark[4] # Thumb tip index
|
||||
index_tip = landmarks.landmark[8] # Index finger tip index
|
||||
|
||||
# Convert to 3D position
|
||||
thumb_pos = np.array(
|
||||
[thumb_tip.x * width, thumb_tip.y * height, thumb_tip.z * 100]
|
||||
) # Rough scaling
|
||||
index_pos = np.array([index_tip.x * width, index_tip.y * height, index_tip.z * 100])
|
||||
|
||||
# Midpoint between thumb and index finger
|
||||
position = (thumb_pos + index_pos) / 2
|
||||
|
||||
return position
|
||||
|
||||
|
||||
# Check if hand position is stable at current location
|
||||
def is_hand_stable(
|
||||
position, history: deque, frames: int = 50, threshold: float = 8.0
|
||||
) -> bool:
|
||||
history.append(position)
|
||||
|
||||
if len(history) < frames:
|
||||
return False
|
||||
|
||||
positions = np.array(history)
|
||||
max_movement = np.max(np.linalg.norm(positions - positions[-1], axis=1))
|
||||
|
||||
return max_movement < threshold
|
||||
|
||||
|
||||
def calibrate_camera() -> np.ndarray:
|
||||
mp_hands = mp.solutions.hands
|
||||
mp_drawing = mp.solutions.drawing_utils
|
||||
hands = mp_hands.Hands(
|
||||
model_complexity=1,
|
||||
max_num_hands=1,
|
||||
static_image_mode=False,
|
||||
)
|
||||
|
||||
# Robot frame positions
|
||||
reference_positions = [
|
||||
[0.3, 0.0, 0.4], # Center
|
||||
[0.3, 0.2, 0.4], # Right
|
||||
[0.3, -0.2, 0.4], # Left
|
||||
[0.3, 0.0, 0.6], # Higher
|
||||
]
|
||||
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
# Get camera dimensions
|
||||
success, frame = cap.read()
|
||||
assert success, "Failed to access camera"
|
||||
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
position_history = deque(maxlen=15)
|
||||
|
||||
current_target = 0
|
||||
camera_positions = []
|
||||
calibration_complete = False
|
||||
|
||||
print("Automatic calibration starting.")
|
||||
print("Please move your hand to the highlighted positions on screen.")
|
||||
|
||||
while not calibration_complete:
|
||||
success, frame = cap.read()
|
||||
if not success:
|
||||
continue
|
||||
|
||||
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
||||
results = hands.process(frame_rgb)
|
||||
|
||||
target_position = reference_positions[current_target]
|
||||
screen_target = robot_to_screen_pos(target_position, width, height)
|
||||
|
||||
# Draw target
|
||||
cv2.circle(frame, screen_target, 50, (0, 255, 0), 2)
|
||||
cv2.circle(frame, screen_target, 10, (0, 255, 0), -1)
|
||||
cv2.putText(
|
||||
frame,
|
||||
f"Target {current_target+1}: Move hand here",
|
||||
(screen_target[0] - 100, screen_target[1] - 20),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.7,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
hand_position = None
|
||||
|
||||
if results.multi_hand_landmarks:
|
||||
hand_landmarks = results.multi_hand_landmarks[0] # Just use the first hand
|
||||
|
||||
mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
|
||||
|
||||
hand_position = get_hand_pos(hand_landmarks)
|
||||
|
||||
# Draw hand position indicator
|
||||
hand_x, hand_y = int(hand_position[0]), int(hand_position[1])
|
||||
cv2.circle(frame, (hand_x, hand_y), 15, (255, 0, 0), -1)
|
||||
|
||||
# Check if hand is close to target (in 2D screen space for simplicity)
|
||||
distance = np.sqrt(
|
||||
(hand_x - screen_target[0]) ** 2 + (hand_y - screen_target[1]) ** 2
|
||||
)
|
||||
|
||||
if distance < 50: # Threshold in pixels
|
||||
cv2.putText(
|
||||
frame,
|
||||
"Hold position steady...",
|
||||
(50, 50),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1.0,
|
||||
(0, 0, 255),
|
||||
2,
|
||||
)
|
||||
|
||||
if is_hand_stable(hand_position, position_history):
|
||||
camera_positions.append(hand_position)
|
||||
print(f"Position {current_target+1} recorded!")
|
||||
|
||||
# Move to next target and reset history
|
||||
current_target += 1
|
||||
position_history.clear()
|
||||
|
||||
if current_target >= len(reference_positions):
|
||||
calibration_complete = True
|
||||
else:
|
||||
print(f"Please move to position {current_target+1}")
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f"Calibrating: {current_target}/{len(reference_positions)} positions",
|
||||
(10, height - 20),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.7,
|
||||
(255, 255, 255),
|
||||
2,
|
||||
)
|
||||
|
||||
cv2.imshow("Calibration", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
if not calibration_complete:
|
||||
print("Calibration aborted.")
|
||||
return None
|
||||
|
||||
robot_points = np.array(reference_positions)
|
||||
robot_centroid = np.mean(robot_points, axis=0)
|
||||
robot_centered = robot_points - robot_centroid
|
||||
|
||||
camera_points = np.array(camera_positions)
|
||||
camera_centroid = np.mean(camera_points, axis=0)
|
||||
camera_centered = camera_points - camera_centroid
|
||||
|
||||
# Find rotation using SVD
|
||||
H = np.dot(camera_centered.T, robot_centered)
|
||||
U, S, Vt = np.linalg.svd(H)
|
||||
rotation = np.dot(U, Vt)
|
||||
|
||||
# Check that it's a proper rotation matrix
|
||||
if np.linalg.det(rotation) < 0:
|
||||
Vt[-1, :] *= -1
|
||||
rotation = np.dot(U, Vt)
|
||||
|
||||
translation = robot_centroid - np.dot(rotation, camera_centroid)
|
||||
|
||||
# Create homogeneous transform matrix
|
||||
transform = np.eye(4)
|
||||
transform[:3, :3] = rotation
|
||||
transform[:3, 3] = translation
|
||||
|
||||
print("Calibration complete!")
|
||||
|
||||
np.save("camera_extrinsics.npy", transform)
|
||||
|
||||
return transform
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
transform = calibrate_camera()
|
||||
if transform is not None:
|
||||
print("Transform matrix:")
|
||||
print(transform)
|
||||
print("Saved to camera_to_robot_transform.npy")
|
@ -1 +0,0 @@
|
||||
# TODO: Implement camera calibration functions to extract camera extrinsics and intrinsics
|
Loading…
x
Reference in New Issue
Block a user