import cv2
import numpy as np
import mediapipe as mp

from collections import deque


# Convert 3D robot positions to 2D screen positions
def robot_to_screen_pos(robot_pos: list, width: int, height: int) -> tuple:
    center_x, center_y = width // 2, height // 2
    scale_factor = min(width, height) / 2

    # Y-axis maps to horizontal
    screen_x = int(center_x + robot_pos[1] * scale_factor)
    # Z-axis maps to vertical (inverted)
    screen_y = int(center_y - robot_pos[2] * scale_factor)

    return screen_x, screen_y


# Return 3D hand position from MediaPipe landmarks
def get_hand_pos(landmarks, width: int, height: int):
    # Use thumb tip and index finger tip
    thumb_tip = landmarks.landmark[4]  # Thumb tip index
    index_tip = landmarks.landmark[8]  # Index finger tip index

    # Convert to 3D position
    thumb_pos = np.array(
        [thumb_tip.x * width, thumb_tip.y * height, thumb_tip.z * 100]
    )  # Rough scaling
    index_pos = np.array([index_tip.x * width, index_tip.y * height, index_tip.z * 100])

    # Midpoint between thumb and index finger
    position = (thumb_pos + index_pos) / 2

    return position


# Check if hand position is stable at current location
def is_hand_stable(
    position, history: deque, frames: int = 50, threshold: float = 8.0
) -> bool:
    history.append(position)

    if len(history) < frames:
        return False

    positions = np.array(history)
    max_movement = np.max(np.linalg.norm(positions - positions[-1], axis=1))

    return max_movement < threshold


def calibrate_camera() -> np.ndarray:
    mp_hands = mp.solutions.hands
    mp_drawing = mp.solutions.drawing_utils
    hands = mp_hands.Hands(
        model_complexity=1,
        max_num_hands=1,
        static_image_mode=False,
    )

    # Robot frame positions
    reference_positions = [
        [0.3, 0.0, 0.4],  # Center
        [0.3, 0.2, 0.4],  # Right
        [0.3, -0.2, 0.4],  # Left
        [0.3, 0.0, 0.6],  # Higher
    ]

    cap = cv2.VideoCapture(0)

    # Get camera dimensions
    success, frame = cap.read()
    assert success, "Failed to access camera"

    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    position_history = deque(maxlen=15)

    current_target = 0
    camera_positions = []
    calibration_complete = False

    print("Automatic calibration starting.")
    print("Please move your hand to the highlighted positions on screen.")

    while not calibration_complete:
        success, frame = cap.read()
        if not success:
            continue

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(frame_rgb)

        target_position = reference_positions[current_target]
        screen_target = robot_to_screen_pos(target_position, width, height)

        # Draw target
        cv2.circle(frame, screen_target, 50, (0, 255, 0), 2)
        cv2.circle(frame, screen_target, 10, (0, 255, 0), -1)
        cv2.putText(
            frame,
            f"Target {current_target+1}: Move hand here",
            (screen_target[0] - 100, screen_target[1] - 20),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.7,
            (0, 255, 0),
            2,
        )

        hand_position = None

        if results.multi_hand_landmarks:
            hand_landmarks = results.multi_hand_landmarks[0]  # Just use the first hand

            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            hand_position = get_hand_pos(hand_landmarks)

            # Draw hand position indicator
            hand_x, hand_y = int(hand_position[0]), int(hand_position[1])
            cv2.circle(frame, (hand_x, hand_y), 15, (255, 0, 0), -1)

            # Check if hand is close to target (in 2D screen space for simplicity)
            distance = np.sqrt(
                (hand_x - screen_target[0]) ** 2 + (hand_y - screen_target[1]) ** 2
            )

            if distance < 50:  # Threshold in pixels
                cv2.putText(
                    frame,
                    "Hold position steady...",
                    (50, 50),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    1.0,
                    (0, 0, 255),
                    2,
                )

                if is_hand_stable(hand_position, position_history):
                    camera_positions.append(hand_position)
                    print(f"Position {current_target+1} recorded!")

                    # Move to next target and reset history
                    current_target += 1
                    position_history.clear()

                    if current_target >= len(reference_positions):
                        calibration_complete = True
                    else:
                        print(f"Please move to position {current_target+1}")

        cv2.putText(
            frame,
            f"Calibrating: {current_target}/{len(reference_positions)} positions",
            (10, height - 20),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.7,
            (255, 255, 255),
            2,
        )

        cv2.imshow("Calibration", frame)

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    cap.release()
    cv2.destroyAllWindows()

    if not calibration_complete:
        print("Calibration aborted.")
        return None

    robot_points = np.array(reference_positions)
    robot_centroid = np.mean(robot_points, axis=0)
    robot_centered = robot_points - robot_centroid

    camera_points = np.array(camera_positions)
    camera_centroid = np.mean(camera_points, axis=0)
    camera_centered = camera_points - camera_centroid

    # Find rotation using SVD
    H = np.dot(camera_centered.T, robot_centered)
    U, S, Vt = np.linalg.svd(H)
    rotation = np.dot(U, Vt)

    # Check that it's a proper rotation matrix
    if np.linalg.det(rotation) < 0:
        Vt[-1, :] *= -1
        rotation = np.dot(U, Vt)

    translation = robot_centroid - np.dot(rotation, camera_centroid)

    # Create homogeneous transform matrix
    transform = np.eye(4)
    transform[:3, :3] = rotation
    transform[:3, 3] = translation

    print("Calibration complete!")

    np.save("camera_extrinsics.npy", transform)

    return transform


if __name__ == "__main__":
    transform = calibrate_camera()
    if transform is not None:
        print("Transform matrix:")
        print(transform)
        print("Saved to camera_to_robot_transform.npy")