mirror of
https://github.com/luckyrobots/open_phantom.git
synced 2025-04-10 16:28:04 +00:00
improve icp and robot params
This commit is contained in:
parent
aae4e91f54
commit
b38527b5db
@ -1,15 +1,17 @@
|
|||||||
import os
|
import os
|
||||||
import cv2
|
|
||||||
import time
|
import time
|
||||||
import torch
|
|
||||||
|
import cv2
|
||||||
|
import depth_pro
|
||||||
|
import mediapipe as mp
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import open3d as o3d
|
import open3d as o3d
|
||||||
import mediapipe as mp
|
import torch
|
||||||
|
from PIL import Image
|
||||||
|
from robot_manager import RobotManager
|
||||||
|
from sam2.sam2_image_predictor import SAM2ImagePredictor
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from utils.visualizations import *
|
from utils.visualizations import *
|
||||||
from robot_manager import RobotManager
|
|
||||||
# from diffusers import StableDiffusionInpaintPipeline
|
|
||||||
|
|
||||||
|
|
||||||
class HandProcessor:
|
class HandProcessor:
|
||||||
@ -37,39 +39,38 @@ class HandProcessor:
|
|||||||
self.midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
|
self.midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
|
||||||
self.transform = self.midas_transforms.dpt_transform
|
self.transform = self.midas_transforms.dpt_transform
|
||||||
|
|
||||||
# Segment hand from image using MediaPipe Hands landmarks
|
# Load SAM2 model for hand segmentation
|
||||||
def _create_mask(self, image: np.ndarray, landmarks: list, thickness: int=5, padding:int=10) -> np.ndarray:
|
print("Loading SAM2 model...")
|
||||||
height, width = image.shape[:2]
|
self.sam2_predictor = SAM2ImagePredictor.from_pretrained(
|
||||||
mask = np.zeros((height, width), dtype=np.uint8)
|
"facebook/sam2-hiera-large"
|
||||||
|
)
|
||||||
|
|
||||||
points = [(int(landmark.x * width), int(landmark.y * height)) for landmark in landmarks.landmark]
|
def _create_mask(self, frame: np.ndarray, landmarks: list) -> np.ndarray:
|
||||||
|
height, width = frame.shape[:2]
|
||||||
|
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
||||||
|
|
||||||
# Draw filled circles at each landmark
|
# Set image in SAM2 predictor
|
||||||
for point in points:
|
with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
|
||||||
cv2.circle(mask, point, thickness, 255, -1)
|
self.sam2_predictor.set_image(frame_rgb) # Set image for prediction
|
||||||
|
|
||||||
# Connect all landmarks with thick lines
|
# Convert landmarks to point prompts
|
||||||
for connection in self.mp_hands.HAND_CONNECTIONS:
|
points = []
|
||||||
start_idx, end_idx = connection
|
for landmark in landmarks.landmark:
|
||||||
cv2.line(mask, points[start_idx], points[end_idx], 255, thickness)
|
x, y = int(landmark.x * width), int(landmark.y * height)
|
||||||
|
points.append([x, y])
|
||||||
|
|
||||||
# Create palm by connecting base of fingers with wrist
|
input_points = np.array(points)
|
||||||
palm_points = [points[0], points[1], points[5], points[9], points[13], points[17]]
|
|
||||||
cv2.fillPoly(mask, [np.array(palm_points)], 255)
|
|
||||||
|
|
||||||
# Create shape between fingers
|
# Predict mask with point prompts
|
||||||
finger_bases = [(1,5), (5,9), (9,13), (13,17)]
|
masks, _, _ = self.sam2_predictor.predict(
|
||||||
for base1, base2 in finger_bases:
|
point_coords=input_points, # Pass the points as prompts
|
||||||
triangle = np.array([points[0], points[base1], points[base2]])
|
point_labels=np.ones(
|
||||||
cv2.fillPoly(mask, [triangle], 255)
|
len(input_points)
|
||||||
|
), # All points from hand are foreground
|
||||||
|
multimask_output=False, # Just get one mask
|
||||||
|
)
|
||||||
|
|
||||||
# Dilate to smooth and expand slightly
|
mask = masks[0].astype(np.uint8) * 255
|
||||||
kernel = np.ones((padding, padding), np.uint8)
|
|
||||||
mask = cv2.dilate(mask, kernel, iterations=1)
|
|
||||||
|
|
||||||
# Smooth the mask
|
|
||||||
mask = cv2.GaussianBlur(mask, (21, 21), 0)
|
|
||||||
_, mask = cv2.threshold(mask, 50, 255, cv2.THRESH_BINARY)
|
|
||||||
|
|
||||||
return mask
|
return mask
|
||||||
|
|
||||||
@ -77,7 +78,8 @@ class HandProcessor:
|
|||||||
Transform input image to match MiDaS model requirements
|
Transform input image to match MiDaS model requirements
|
||||||
Estimate depth map using MiDaS model
|
Estimate depth map using MiDaS model
|
||||||
"""
|
"""
|
||||||
# TODO: Look into why screen goes black sometimes
|
|
||||||
|
# TODO: Swap MiDaS for ml-depth-pro model
|
||||||
def _estimate_depth(self, image: np.ndarray) -> tuple:
|
def _estimate_depth(self, image: np.ndarray) -> tuple:
|
||||||
img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||||
|
|
||||||
@ -116,7 +118,6 @@ class HandProcessor:
|
|||||||
v_coords = v_coords[valid_indices]
|
v_coords = v_coords[valid_indices]
|
||||||
z_values = z_values[valid_indices]
|
z_values = z_values[valid_indices]
|
||||||
|
|
||||||
# NOTE: Abritrary scaling factor for depth
|
|
||||||
z_metric = z_values * 0.5
|
z_metric = z_values * 0.5
|
||||||
|
|
||||||
# Back-project to 3D using camera intrinsics
|
# Back-project to 3D using camera intrinsics
|
||||||
@ -127,32 +128,38 @@ class HandProcessor:
|
|||||||
|
|
||||||
return points
|
return points
|
||||||
|
|
||||||
# TODO: Look into better Z scaling
|
# TODO: Look into better depth scaling
|
||||||
def _create_mesh(self, landmarks: list, image_dims: tuple) -> np.ndarray:
|
def _create_mesh(self, landmarks: list, image_dims: tuple) -> np.ndarray:
|
||||||
width, height = image_dims
|
width, height = image_dims
|
||||||
|
|
||||||
vertices = []
|
vertices = []
|
||||||
for landmark in landmarks.landmark:
|
for landmark in landmarks.landmark:
|
||||||
vertices.append([
|
vertices.append(
|
||||||
landmark.x * width,
|
[landmark.x * width, landmark.y * height, landmark.z * width]
|
||||||
landmark.y * height,
|
)
|
||||||
landmark.z * width
|
|
||||||
])
|
|
||||||
|
|
||||||
# Define faces (triangles) connecting landmarks
|
# Define faces (triangles) connecting landmarks
|
||||||
faces = [
|
faces = [
|
||||||
# Palm
|
# Palm
|
||||||
[0, 1, 5], [0, 5, 9], [0, 9, 13], [0, 13, 17],
|
[0, 1, 5],
|
||||||
|
[0, 5, 9],
|
||||||
|
[0, 9, 13],
|
||||||
|
[0, 13, 17],
|
||||||
# Thumb
|
# Thumb
|
||||||
[1, 2, 3], [2, 3, 4],
|
[1, 2, 3],
|
||||||
|
[2, 3, 4],
|
||||||
# Index finger
|
# Index finger
|
||||||
[5, 6, 7], [6, 7, 8],
|
[5, 6, 7],
|
||||||
|
[6, 7, 8],
|
||||||
# Middle finger
|
# Middle finger
|
||||||
[9, 10, 11], [10, 11, 12],
|
[9, 10, 11],
|
||||||
|
[10, 11, 12],
|
||||||
# Ring finger
|
# Ring finger
|
||||||
[13, 14, 15], [14, 15, 16],
|
[13, 14, 15],
|
||||||
|
[14, 15, 16],
|
||||||
# Pinky
|
# Pinky
|
||||||
[17, 18, 19], [18, 19, 20]
|
[17, 18, 19],
|
||||||
|
[18, 19, 20],
|
||||||
]
|
]
|
||||||
|
|
||||||
dense_vertices = list(vertices)
|
dense_vertices = list(vertices)
|
||||||
@ -166,11 +173,12 @@ class HandProcessor:
|
|||||||
|
|
||||||
# Add 2 interpolated points between each connected pair
|
# Add 2 interpolated points between each connected pair
|
||||||
for t in [0.33, 0.66]:
|
for t in [0.33, 0.66]:
|
||||||
interp_point = start_point * (1-t) + end_point * t
|
interp_point = start_point * (1 - t) + end_point * t
|
||||||
dense_vertices.append(interp_point.tolist())
|
dense_vertices.append(interp_point.tolist())
|
||||||
|
|
||||||
return np.array(dense_vertices), np.array(faces)
|
return np.array(dense_vertices), np.array(faces)
|
||||||
|
|
||||||
|
# TODO: Improve ICP registration for better alignment
|
||||||
def _icp_registration(self, cloud: np.ndarray, vertices: np.ndarray) -> np.ndarray:
|
def _icp_registration(self, cloud: np.ndarray, vertices: np.ndarray) -> np.ndarray:
|
||||||
source = o3d.geometry.PointCloud()
|
source = o3d.geometry.PointCloud()
|
||||||
source.points = o3d.utility.Vector3dVector(vertices)
|
source.points = o3d.utility.Vector3dVector(vertices)
|
||||||
@ -178,28 +186,41 @@ class HandProcessor:
|
|||||||
target = o3d.geometry.PointCloud()
|
target = o3d.geometry.PointCloud()
|
||||||
target.points = o3d.utility.Vector3dVector(cloud)
|
target.points = o3d.utility.Vector3dVector(cloud)
|
||||||
|
|
||||||
|
# Calculate centroids
|
||||||
|
source_centroid = np.mean(vertices, axis=0)
|
||||||
|
target_centroid = np.mean(cloud, axis=0)
|
||||||
|
|
||||||
|
# Calculate initial translation to align centroids
|
||||||
|
initial_translation = target_centroid - source_centroid
|
||||||
|
|
||||||
|
# Create initial transformation matrix
|
||||||
|
trans_init = np.eye(4)
|
||||||
|
trans_init[:3, 3] = initial_translation
|
||||||
|
|
||||||
result = o3d.pipelines.registration.registration_icp(
|
result = o3d.pipelines.registration.registration_icp(
|
||||||
source,
|
source,
|
||||||
target,
|
target,
|
||||||
max_correspondence_distance=0.05,
|
max_correspondence_distance=0.1, # Increased distance
|
||||||
|
init=trans_init, # Initial transformation
|
||||||
estimation_method=o3d.pipelines.registration.TransformationEstimationPointToPoint(),
|
estimation_method=o3d.pipelines.registration.TransformationEstimationPointToPoint(),
|
||||||
|
criteria=o3d.pipelines.registration.ICPConvergenceCriteria(
|
||||||
|
max_iteration=100
|
||||||
|
), # Increased iterations
|
||||||
)
|
)
|
||||||
|
|
||||||
transformation = result.transformation
|
transformation = result.transformation
|
||||||
|
|
||||||
return transformation
|
return transformation
|
||||||
|
|
||||||
|
# TODO: Look into better depth scaling
|
||||||
def _refine_landmarks(self, landmarks: list, transform: int, image_dims: tuple):
|
def _refine_landmarks(self, landmarks: list, transform: int, image_dims: tuple):
|
||||||
width, height = image_dims
|
width, height = image_dims
|
||||||
|
|
||||||
refined_landmarks = []
|
refined_landmarks = []
|
||||||
for landmark in landmarks.landmark:
|
for landmark in landmarks.landmark:
|
||||||
point = np.array([
|
point = np.array(
|
||||||
landmark.x * width,
|
[landmark.x * width, landmark.y * height, landmark.z * width, 1.0]
|
||||||
landmark.y * height,
|
)
|
||||||
landmark.z * width, # TODO: Figure out better scaling factor
|
|
||||||
1.0
|
|
||||||
])
|
|
||||||
|
|
||||||
# Apply transformation to 3D point
|
# Apply transformation to 3D point
|
||||||
transformed = np.dot(transform, point)
|
transformed = np.dot(transform, point)
|
||||||
@ -244,42 +265,87 @@ class HandProcessor:
|
|||||||
|
|
||||||
return constrained
|
return constrained
|
||||||
|
|
||||||
def _get_robot_params(self, refined_landmarks) -> tuple:
|
def _get_robot_params(self, refined_landmarks):
|
||||||
# Extract key landmarks
|
# Extract keypoints
|
||||||
wrist = refined_landmarks[0] # Wrist landmark
|
landmarks = np.array(refined_landmarks)
|
||||||
thumb_tip = refined_landmarks[4] # Thumb tip
|
|
||||||
index_tip = refined_landmarks[8] # Index finger tip
|
|
||||||
|
|
||||||
# Calculate end effector position (midpoint between thumb and index tips)
|
# Define indices for specific parts of the hand
|
||||||
|
thumb_indices = [1, 2, 3, 4] # Thumb landmarks
|
||||||
|
index_indices = [5, 6, 7, 8] # Index finger landmarks
|
||||||
|
thumb_tip_idx = 4
|
||||||
|
index_tip_idx = 8
|
||||||
|
|
||||||
|
# 1. Set target position as midpoint between thumb tip and index tip
|
||||||
|
thumb_tip = landmarks[thumb_tip_idx]
|
||||||
|
index_tip = landmarks[index_tip_idx]
|
||||||
position = (thumb_tip + index_tip) / 2
|
position = (thumb_tip + index_tip) / 2
|
||||||
|
|
||||||
# Calculate vectors for orientation
|
# 2. Calculate orientation
|
||||||
v1 = thumb_tip - wrist # Vector from wrist to thumb tip
|
# Get all thumb and index finger points for plane fitting
|
||||||
v2 = index_tip - wrist # Vector from wrist to index tip
|
thumb_points = landmarks[thumb_indices]
|
||||||
|
index_points = landmarks[index_indices]
|
||||||
|
finger_points = np.vstack([thumb_points, index_points])
|
||||||
|
|
||||||
# Calculate normal to hand plane
|
# Fit plane to finger points
|
||||||
normal = np.cross(v1, v2)
|
centroid = np.mean(finger_points, axis=0)
|
||||||
if np.linalg.norm(normal) > 0:
|
centered_points = finger_points - centroid
|
||||||
|
|
||||||
|
# Use SVD to find the normal to the best-fitting plane
|
||||||
|
u, s, vh = np.linalg.svd(centered_points)
|
||||||
|
# The normal is the last right singular vector
|
||||||
|
normal = vh[2, :]
|
||||||
|
|
||||||
|
# Ensure normal is a unit vector
|
||||||
normal = normal / np.linalg.norm(normal)
|
normal = normal / np.linalg.norm(normal)
|
||||||
else:
|
|
||||||
# Default if vectors are collinear
|
|
||||||
normal = np.array([0, 0, 1])
|
|
||||||
|
|
||||||
# Calculate main direction along thumb
|
# Fit a principal axis through thumb points
|
||||||
direction = thumb_tip - wrist
|
thumb_centroid = np.mean(thumb_points, axis=0)
|
||||||
if np.linalg.norm(direction) > 0:
|
thumb_centered = thumb_points - thumb_centroid
|
||||||
direction = direction / np.linalg.norm(direction)
|
|
||||||
else:
|
|
||||||
# Default if thumb is at wrist (unlikely)
|
|
||||||
direction = np.array([1, 0, 0])
|
|
||||||
|
|
||||||
# Calculate gripper width
|
# Use SVD again to find direction of maximum variance (principal axis)
|
||||||
|
u, s, vh = np.linalg.svd(thumb_centered)
|
||||||
|
principal_axis = vh[0, :] # First singular vector
|
||||||
|
|
||||||
|
# Ensure principal axis is a unit vector
|
||||||
|
principal_axis = principal_axis / np.linalg.norm(principal_axis)
|
||||||
|
|
||||||
|
# Construct orthogonal vectors for orientation matrix
|
||||||
|
z_axis = normal # Normal to the plane
|
||||||
|
x_axis = principal_axis # Along the thumb
|
||||||
|
|
||||||
|
# Ensure x_axis is orthogonal to z_axis
|
||||||
|
x_axis = x_axis - np.dot(x_axis, z_axis) * z_axis
|
||||||
|
x_axis = x_axis / np.linalg.norm(x_axis)
|
||||||
|
|
||||||
|
# Compute y_axis as cross product to ensure orthogonality
|
||||||
|
y_axis = np.cross(z_axis, x_axis)
|
||||||
|
|
||||||
|
# Create rotation matrix
|
||||||
|
rotation_matrix = np.column_stack([x_axis, y_axis, z_axis])
|
||||||
|
|
||||||
|
# 3. Calculate gripper width
|
||||||
gripper_width = np.linalg.norm(thumb_tip - index_tip)
|
gripper_width = np.linalg.norm(thumb_tip - index_tip)
|
||||||
|
|
||||||
return position, direction, normal, gripper_width
|
# Apply percentile threshold to mitigate slippage (as mentioned in the paper)
|
||||||
|
# Store gripper widths in a buffer to compute percentiles
|
||||||
|
if not hasattr(self, "gripper_width_buffer"):
|
||||||
|
self.gripper_width_buffer = []
|
||||||
|
|
||||||
|
self.gripper_width_buffer.append(gripper_width)
|
||||||
|
# Keep only the last 100 values
|
||||||
|
if len(self.gripper_width_buffer) > 100:
|
||||||
|
self.gripper_width_buffer.pop(0)
|
||||||
|
|
||||||
|
# Apply 20th percentile threshold
|
||||||
|
if len(self.gripper_width_buffer) > 5: # Need enough samples
|
||||||
|
min_width = np.percentile(self.gripper_width_buffer, 20)
|
||||||
|
if gripper_width < min_width:
|
||||||
|
gripper_width = min_width
|
||||||
|
|
||||||
|
return position, rotation_matrix, gripper_width
|
||||||
|
|
||||||
def record_video(self) -> str:
|
def record_video(self) -> str:
|
||||||
output_dir = os.path.join(os.getcwd(), 'recordings')
|
output_dir = os.path.join(os.getcwd(), "recordings")
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
timestamp = time.strftime("%Y%m%d-%H%M%S")
|
timestamp = time.strftime("%Y%m%d-%H%M%S")
|
||||||
@ -296,7 +362,7 @@ class HandProcessor:
|
|||||||
# Update camera intrinsics based on video dimensions
|
# Update camera intrinsics based on video dimensions
|
||||||
self.camera_intrinsics = (width * 0.8, height * 0.8, width / 2, height / 2)
|
self.camera_intrinsics = (width * 0.8, height * 0.8, width / 2, height / 2)
|
||||||
|
|
||||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
|
||||||
out = None
|
out = None
|
||||||
|
|
||||||
recording = False
|
recording = False
|
||||||
@ -317,11 +383,19 @@ class HandProcessor:
|
|||||||
# Display recording indicator ONLY on the display frame
|
# Display recording indicator ONLY on the display frame
|
||||||
if recording:
|
if recording:
|
||||||
cv2.circle(display_frame, (30, 30), 15, (0, 0, 255), -1)
|
cv2.circle(display_frame, (30, 30), 15, (0, 0, 255), -1)
|
||||||
cv2.putText(display_frame, "RECORDING", (50, 40), cv2.FONT_HERSHEY_SIMPLEX,
|
cv2.putText(
|
||||||
1, (0, 0, 255), 2, cv2.LINE_AA)
|
display_frame,
|
||||||
|
"RECORDING",
|
||||||
|
(50, 40),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
1,
|
||||||
|
(0, 0, 255),
|
||||||
|
2,
|
||||||
|
cv2.LINE_AA,
|
||||||
|
)
|
||||||
|
|
||||||
# Show the display frame (with indicator if recording)
|
# Show the display frame (with indicator if recording)
|
||||||
cv2.imshow('Video Recording', display_frame)
|
cv2.imshow("Video Recording", display_frame)
|
||||||
|
|
||||||
# Write the original frame (without indicator) to video file if recording
|
# Write the original frame (without indicator) to video file if recording
|
||||||
if recording and out is not None:
|
if recording and out is not None:
|
||||||
@ -329,10 +403,7 @@ class HandProcessor:
|
|||||||
|
|
||||||
key = cv2.waitKey(1) & 0xFF
|
key = cv2.waitKey(1) & 0xFF
|
||||||
|
|
||||||
# NOTE: Haven't tested what happens if user records multiple videos in one session (probably overwrites?)
|
if key == ord("r"):
|
||||||
if key == ord('q'):
|
|
||||||
break
|
|
||||||
elif key == ord('r'):
|
|
||||||
recording = not recording
|
recording = not recording
|
||||||
if recording:
|
if recording:
|
||||||
print(f"Started recording to {video_filename}")
|
print(f"Started recording to {video_filename}")
|
||||||
@ -342,6 +413,7 @@ class HandProcessor:
|
|||||||
out.release()
|
out.release()
|
||||||
print(f"Stopped recording. Video saved to {video_filename}")
|
print(f"Stopped recording. Video saved to {video_filename}")
|
||||||
did_record = True
|
did_record = True
|
||||||
|
break
|
||||||
|
|
||||||
if out is not None:
|
if out is not None:
|
||||||
out.release()
|
out.release()
|
||||||
@ -373,12 +445,18 @@ class HandProcessor:
|
|||||||
constraints_output_path = f"{base_path}_constraints.mp4"
|
constraints_output_path = f"{base_path}_constraints.mp4"
|
||||||
robot_output_path = f"{base_path}_robot.mp4"
|
robot_output_path = f"{base_path}_robot.mp4"
|
||||||
|
|
||||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
|
||||||
segmented_out = cv2.VideoWriter(segmented_output_path, fourcc, fps, (width, height))
|
segmented_out = cv2.VideoWriter(
|
||||||
|
segmented_output_path, fourcc, fps, (width, height)
|
||||||
|
)
|
||||||
depth_out = cv2.VideoWriter(depth_output_path, fourcc, fps, (width, height))
|
depth_out = cv2.VideoWriter(depth_output_path, fourcc, fps, (width, height))
|
||||||
mesh_out = cv2.VideoWriter(mesh_output_path, fourcc, fps, (width, height))
|
mesh_out = cv2.VideoWriter(mesh_output_path, fourcc, fps, (width, height))
|
||||||
reg_out = cv2.VideoWriter(registration_output_path, fourcc, fps, (640, 480)) # Fixed size
|
reg_out = cv2.VideoWriter(
|
||||||
constraints_out = cv2.VideoWriter(constraints_output_path, fourcc, fps, (width, height))
|
registration_output_path, fourcc, fps, (640, 480)
|
||||||
|
) # Fixed size
|
||||||
|
constraints_out = cv2.VideoWriter(
|
||||||
|
constraints_output_path, fourcc, fps, (width, height)
|
||||||
|
)
|
||||||
robot_out = cv2.VideoWriter(robot_output_path, fourcc, fps, (width, height))
|
robot_out = cv2.VideoWriter(robot_output_path, fourcc, fps, (width, height))
|
||||||
|
|
||||||
print(f"Processing video with {total_frames} frames...")
|
print(f"Processing video with {total_frames} frames...")
|
||||||
@ -396,48 +474,60 @@ class HandProcessor:
|
|||||||
segmented_frame = frame.copy()
|
segmented_frame = frame.copy()
|
||||||
depth_frame = np.zeros((height, width, 3), dtype=np.uint8)
|
depth_frame = np.zeros((height, width, 3), dtype=np.uint8)
|
||||||
mesh_frame = frame.copy()
|
mesh_frame = frame.copy()
|
||||||
reg_frame = np.ones((480, 640, 3), dtype=np.uint8) * 255 # Fixed size, white background
|
reg_frame = (
|
||||||
|
np.ones((480, 640, 3), dtype=np.uint8) * 255
|
||||||
|
) # Fixed size, white background
|
||||||
constraints_frame = frame.copy()
|
constraints_frame = frame.copy()
|
||||||
robot_frame = frame.copy()
|
robot_frame = frame.copy()
|
||||||
|
|
||||||
# Process if hand is detected
|
# Process if hand is detected
|
||||||
if results.multi_hand_landmarks:
|
if results.multi_hand_landmarks:
|
||||||
for hand_landmarks in results.multi_hand_landmarks:
|
for hand_landmarks in results.multi_hand_landmarks:
|
||||||
# Segment hand
|
|
||||||
hand_mask = self._create_mask(frame, hand_landmarks)
|
hand_mask = self._create_mask(frame, hand_landmarks)
|
||||||
mask_overlay = frame.copy()
|
mask_overlay = frame.copy()
|
||||||
mask_overlay[hand_mask > 0] = [0, 0, 255] # Red color for mask
|
mask_overlay[hand_mask > 0] = [0, 0, 255] # Red color for mask
|
||||||
segmented_frame = cv2.addWeighted(frame, 0.7, mask_overlay, 0.3, 0)
|
segmented_frame = cv2.addWeighted(frame, 0.7, mask_overlay, 0.3, 0)
|
||||||
|
|
||||||
# Depth estimation
|
|
||||||
depth_map, depth_vis = self._estimate_depth(frame)
|
depth_map, depth_vis = self._estimate_depth(frame)
|
||||||
depth_colored = cv2.applyColorMap(depth_vis, cv2.COLORMAP_JET)
|
depth_colored = cv2.applyColorMap(depth_vis, cv2.COLORMAP_JET)
|
||||||
depth_frame = depth_colored.copy()
|
depth_frame = depth_colored.copy()
|
||||||
|
|
||||||
# Create hand mesh
|
|
||||||
hand_vertices, hand_faces = self._create_mesh(hand_landmarks, (width, height))
|
|
||||||
mesh_frame = visualize_mesh(frame, hand_vertices, hand_faces)
|
|
||||||
|
|
||||||
# Create point cloud from depth
|
|
||||||
cloud = self._create_cloud(depth_map, hand_mask)
|
cloud = self._create_cloud(depth_map, hand_mask)
|
||||||
|
|
||||||
# Perform ICP registration
|
hand_vertices, hand_faces = self._create_mesh(
|
||||||
|
hand_landmarks, (width, height)
|
||||||
|
)
|
||||||
|
mesh_frame = visualize_mesh(frame, hand_vertices, hand_faces)
|
||||||
|
|
||||||
icp_transform = self._icp_registration(cloud, hand_vertices)
|
icp_transform = self._icp_registration(cloud, hand_vertices)
|
||||||
reg_frame = visualize_registration(cloud, hand_vertices, icp_transform)
|
reg_frame = visualize_registration(
|
||||||
|
cloud, hand_vertices, icp_transform
|
||||||
|
)
|
||||||
|
|
||||||
# Refine landmarks using ICP transformation
|
refined_landmarks = self._refine_landmarks(
|
||||||
refined_landmarks = self._refine_landmarks(hand_landmarks, icp_transform, (width, height))
|
hand_landmarks, icp_transform, (width, height)
|
||||||
|
)
|
||||||
|
|
||||||
# Store pre-constraint landmarks for visualization
|
|
||||||
original_refined = refined_landmarks.copy()
|
original_refined = refined_landmarks.copy()
|
||||||
|
|
||||||
# Apply anatomical constraints
|
|
||||||
constrained_landmarks = self._apply_constraints(refined_landmarks)
|
constrained_landmarks = self._apply_constraints(refined_landmarks)
|
||||||
constraints_frame = visualize_constraints(frame, original_refined, constrained_landmarks, self.camera_intrinsics)
|
constraints_frame = visualize_constraints(
|
||||||
|
frame,
|
||||||
|
original_refined,
|
||||||
|
constrained_landmarks,
|
||||||
|
self.camera_intrinsics,
|
||||||
|
)
|
||||||
|
|
||||||
# # Calculate robot parameters
|
position, orientation, gripper_width = self._get_robot_params(
|
||||||
# position, direction, normal, gripper_width = self._get_robot_params(constrained_landmarks)
|
constrained_landmarks
|
||||||
# robot_frame = visualize_robot_params(frame, position, direction, normal, gripper_width)
|
)
|
||||||
|
robot_frame = visualize_robot_params(
|
||||||
|
frame,
|
||||||
|
position,
|
||||||
|
orientation,
|
||||||
|
gripper_width,
|
||||||
|
self.camera_intrinsics,
|
||||||
|
)
|
||||||
|
|
||||||
segmented_out.write(segmented_frame)
|
segmented_out.write(segmented_frame)
|
||||||
depth_out.write(depth_frame)
|
depth_out.write(depth_frame)
|
||||||
@ -450,12 +540,12 @@ class HandProcessor:
|
|||||||
display_size = (int(width * display_scale), int(height * display_scale))
|
display_size = (int(width * display_scale), int(height * display_scale))
|
||||||
reg_display_size = (int(640 * display_scale), int(480 * display_scale))
|
reg_display_size = (int(640 * display_scale), int(480 * display_scale))
|
||||||
|
|
||||||
cv2.imshow('Segmented', cv2.resize(segmented_frame, display_size))
|
cv2.imshow("Segmented", cv2.resize(segmented_frame, display_size))
|
||||||
cv2.imshow('Depth', cv2.resize(depth_frame, display_size))
|
cv2.imshow("Depth", cv2.resize(depth_frame, display_size))
|
||||||
cv2.imshow('Mesh', cv2.resize(mesh_frame, display_size))
|
cv2.imshow("Mesh", cv2.resize(mesh_frame, display_size))
|
||||||
cv2.imshow('Registration', cv2.resize(reg_frame, reg_display_size))
|
cv2.imshow("Registration", cv2.resize(reg_frame, reg_display_size))
|
||||||
cv2.imshow('Constraints', cv2.resize(constraints_frame, display_size))
|
cv2.imshow("Constraints", cv2.resize(constraints_frame, display_size))
|
||||||
cv2.imshow('Robot Parameters', cv2.resize(robot_frame, display_size))
|
cv2.imshow("Robot Parameters", cv2.resize(robot_frame, display_size))
|
||||||
|
|
||||||
if cv2.waitKey(1) & 0xFF == 27: # ESC to exit
|
if cv2.waitKey(1) & 0xFF == 27: # ESC to exit
|
||||||
break
|
break
|
||||||
@ -483,5 +573,5 @@ class HandProcessor:
|
|||||||
"mesh": mesh_output_path,
|
"mesh": mesh_output_path,
|
||||||
"registration": registration_output_path,
|
"registration": registration_output_path,
|
||||||
"constraints": constraints_output_path,
|
"constraints": constraints_output_path,
|
||||||
"robot": robot_output_path
|
"robot": robot_output_path,
|
||||||
}
|
}
|
@ -8,7 +8,7 @@ def main():
|
|||||||
|
|
||||||
record_option = input("Record a new video? (y/n): ")
|
record_option = input("Record a new video? (y/n): ")
|
||||||
|
|
||||||
if record_option.lower() == 'y':
|
if record_option.lower() == "y":
|
||||||
print("Starting video recording...")
|
print("Starting video recording...")
|
||||||
video_path = processor.record_video()
|
video_path = processor.record_video()
|
||||||
if not video_path:
|
if not video_path:
|
||||||
|
@ -1,11 +1,9 @@
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pybullet as p
|
import pybullet as p
|
||||||
|
|
||||||
from utils.math import *
|
|
||||||
from utils.handle_urdf import handle_urdf
|
from utils.handle_urdf import handle_urdf
|
||||||
from scipy.spatial.transform import Rotation as R
|
|
||||||
|
|
||||||
|
|
||||||
class RobotManager:
|
class RobotManager:
|
||||||
@ -30,13 +28,13 @@ class RobotManager:
|
|||||||
robot_urdf,
|
robot_urdf,
|
||||||
basePosition=[0, 0, 0],
|
basePosition=[0, 0, 0],
|
||||||
useFixedBase=True,
|
useFixedBase=True,
|
||||||
flags=p.URDF_USE_SELF_COLLISION | p.URDF_USE_INERTIA_FROM_FILE
|
flags=p.URDF_USE_SELF_COLLISION | p.URDF_USE_INERTIA_FROM_FILE,
|
||||||
)
|
)
|
||||||
except p.error as e:
|
except p.error as e:
|
||||||
print(f"PyBullet error when loading URDF: {e}")
|
print(f"PyBullet error when loading URDF: {e}")
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
robot_name = p.getBodyInfo(robot_id)[1].decode('utf-8')
|
robot_name = p.getBodyInfo(robot_id)[1].decode("utf-8")
|
||||||
print(f"Successfully loaded {robot_name} robot with ID: {robot_id}")
|
print(f"Successfully loaded {robot_name} robot with ID: {robot_id}")
|
||||||
|
|
||||||
return robot_id
|
return robot_id
|
||||||
@ -46,11 +44,11 @@ class RobotManager:
|
|||||||
assert self.joint_count > 0, "Robot has no joints"
|
assert self.joint_count > 0, "Robot has no joints"
|
||||||
|
|
||||||
# Keywords to look for in joint names to identify end effector
|
# Keywords to look for in joint names to identify end effector
|
||||||
keywords = ['gripper', 'tool', 'tcp', 'end_effector', 'hand']
|
keywords = ["gripper", "tool", "tcp", "end_effector", "hand"]
|
||||||
|
|
||||||
for i in range(self.joint_count):
|
for i in range(self.joint_count):
|
||||||
info = p.getJointInfo(self.robot_id, i)
|
info = p.getJointInfo(self.robot_id, i)
|
||||||
joint_name = info[1].decode('utf-8').lower()
|
joint_name = info[1].decode("utf-8").lower()
|
||||||
|
|
||||||
# Check if any keyword is in the joint name
|
# Check if any keyword is in the joint name
|
||||||
if any(keyword in joint_name for keyword in keywords):
|
if any(keyword in joint_name for keyword in keywords):
|
||||||
@ -60,7 +58,12 @@ class RobotManager:
|
|||||||
return self.joint_count - 1
|
return self.joint_count - 1
|
||||||
|
|
||||||
# TODO: Use inverse kinematics to set the robot pose
|
# TODO: Use inverse kinematics to set the robot pose
|
||||||
def set_robot_pose(self, position: np.ndarray, orientation_vectors: np.ndarray, gripper_width: float) -> None:
|
def set_robot_pose(
|
||||||
|
self,
|
||||||
|
position: np.ndarray,
|
||||||
|
orientation_vectors: np.ndarray,
|
||||||
|
gripper_width: float,
|
||||||
|
) -> None:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Render the robot in some scene using some camera parameters
|
# Render the robot in some scene using some camera parameters
|
||||||
@ -85,16 +88,13 @@ class RobotManager:
|
|||||||
yaw=cam_yaw,
|
yaw=cam_yaw,
|
||||||
pitch=cam_pitch,
|
pitch=cam_pitch,
|
||||||
roll=cam_roll,
|
roll=cam_roll,
|
||||||
upAxisIndex=2
|
upAxisIndex=2,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Compute projection matrix
|
# Compute projection matrix
|
||||||
aspect = self.img_width / self.img_height
|
aspect = self.img_width / self.img_height
|
||||||
proj_matrix = p.computeProjectionMatrixFOV(
|
proj_matrix = p.computeProjectionMatrixFOV(
|
||||||
fov=60,
|
fov=60, aspect=aspect, nearVal=0.01, farVal=100.0
|
||||||
aspect=aspect,
|
|
||||||
nearVal=0.01,
|
|
||||||
farVal=100.0
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Render the scene
|
# Render the scene
|
||||||
@ -103,7 +103,7 @@ class RobotManager:
|
|||||||
height=self.img_height,
|
height=self.img_height,
|
||||||
viewMatrix=view_matrix,
|
viewMatrix=view_matrix,
|
||||||
projectionMatrix=proj_matrix,
|
projectionMatrix=proj_matrix,
|
||||||
renderer=p.ER_BULLET_HARDWARE_OPENGL
|
renderer=p.ER_BULLET_HARDWARE_OPENGL,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Extract RGB and depth
|
# Extract RGB and depth
|
||||||
@ -131,17 +131,19 @@ class RobotManager:
|
|||||||
return rgb.astype(np.uint8)
|
return rgb.astype(np.uint8)
|
||||||
|
|
||||||
def __del__(self) -> None:
|
def __del__(self) -> None:
|
||||||
if hasattr(self, 'physics_client'):
|
if hasattr(self, "physics_client"):
|
||||||
try:
|
try:
|
||||||
p.disconnect(self.physics_client)
|
p.disconnect(self.physics_client)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
cwd = os.getcwd()
|
cwd = os.getcwd()
|
||||||
urdf_path = os.path.join(cwd, "notebook/phantom/urdf/SO_5DOF_ARM100_05d.SLDASM/urdf/SO_5DOF_ARM100_05d.SLDASM.urdf")
|
urdf_path = os.path.join(
|
||||||
|
cwd,
|
||||||
|
"notebook/phantom/urdf/SO_5DOF_ARM100_05d.SLDASM/urdf/SO_5DOF_ARM100_05d.SLDASM.urdf",
|
||||||
|
)
|
||||||
camera_intrinsics = (320, 240, 320, 240) # Random intrinsics for example
|
camera_intrinsics = (320, 240, 320, 240) # Random intrinsics for example
|
||||||
|
|
||||||
robot_vis = RobotManager(urdf_path, camera_intrinsics)
|
robot_vis = RobotManager(urdf_path, camera_intrinsics)
|
||||||
|
1
open_phantom/utils/calibrations.py
Normal file
1
open_phantom/utils/calibrations.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
# TODO: Implement camera calibration functions to extract camera extrinsics and intrinsics
|
@ -2,7 +2,6 @@ import os
|
|||||||
import xml.dom.minidom as minidom
|
import xml.dom.minidom as minidom
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Fixes a specific path that uses the ROS-style 'package://' prefix.
|
Fixes a specific path that uses the ROS-style 'package://' prefix.
|
||||||
The 'package://' prefix is used in URDF files to refer to files in the same package.
|
The 'package://' prefix is used in URDF files to refer to files in the same package.
|
||||||
@ -10,18 +9,20 @@ However, when we're not running in a ROS environment, the paths are not valid.
|
|||||||
This function tries to find the absolute path of the mesh file.
|
This function tries to find the absolute path of the mesh file.
|
||||||
If the mesh file is not found, the original path is used.
|
If the mesh file is not found, the original path is used.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def fix_path(path: str, urdf_dir: str) -> str:
|
def fix_path(path: str, urdf_dir: str) -> str:
|
||||||
if path.startswith('package://'):
|
if path.startswith("package://"):
|
||||||
parts = path[len('package://'):].split('/', 1)
|
parts = path[len("package://") :].split("/", 1)
|
||||||
if len(parts) == 2:
|
if len(parts) == 2:
|
||||||
package_name, rel_path = parts
|
package_name, rel_path = parts
|
||||||
|
|
||||||
# Try potential locations for the mesh
|
# Try potential locations for the mesh
|
||||||
potential_paths = [
|
potential_paths = [
|
||||||
os.path.join(urdf_dir, rel_path),
|
os.path.join(urdf_dir, rel_path),
|
||||||
os.path.join(urdf_dir, '../meshes', rel_path),
|
os.path.join(urdf_dir, "../meshes", rel_path),
|
||||||
os.path.join(urdf_dir, f'../{package_name}', rel_path),
|
os.path.join(urdf_dir, f"../{package_name}", rel_path),
|
||||||
os.path.join(urdf_dir, '../..', rel_path)
|
os.path.join(urdf_dir, "../..", rel_path),
|
||||||
]
|
]
|
||||||
|
|
||||||
for possible_path in potential_paths:
|
for possible_path in potential_paths:
|
||||||
@ -37,18 +38,20 @@ def fix_path(path: str, urdf_dir: str) -> str:
|
|||||||
Iterates through the URDF file and fixes the paths of all mesh files.
|
Iterates through the URDF file and fixes the paths of all mesh files.
|
||||||
The URDF file is parsed and the mesh paths are modified in-place.
|
The URDF file is parsed and the mesh paths are modified in-place.
|
||||||
"""
|
"""
|
||||||
def fix_mesh_paths(urdf_path:str, urdf_dir: str) -> str:
|
|
||||||
|
|
||||||
|
def fix_mesh_paths(urdf_path: str, urdf_dir: str) -> str:
|
||||||
root = ET.parse(urdf_path).getroot()
|
root = ET.parse(urdf_path).getroot()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
for mesh in root.findall('.//mesh'):
|
for mesh in root.findall(".//mesh"):
|
||||||
if 'filename' in mesh.attrib:
|
if "filename" in mesh.attrib:
|
||||||
mesh.attrib['filename'] = fix_path(mesh.attrib['filename'], urdf_dir)
|
mesh.attrib["filename"] = fix_path(mesh.attrib["filename"], urdf_dir)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error fixing mesh paths: {e}")
|
print(f"Error fixing mesh paths: {e}")
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
fixed_path = urdf_path.replace('.urdf', '_fixed.urdf')
|
fixed_path = urdf_path.replace(".urdf", "_fixed.urdf")
|
||||||
|
|
||||||
return root, fixed_path
|
return root, fixed_path
|
||||||
|
|
||||||
@ -56,15 +59,19 @@ def fix_mesh_paths(urdf_path:str, urdf_dir: str) -> str:
|
|||||||
"""
|
"""
|
||||||
Formats the XML tree to be human-readable and writes it to a file.
|
Formats the XML tree to be human-readable and writes it to a file.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def format_xml(root: ET.Element, fixed_path: str) -> None:
|
def format_xml(root: ET.Element, fixed_path: str) -> None:
|
||||||
xml_str = ET.tostring(root, encoding='utf-8')
|
xml_str = ET.tostring(root, encoding="utf-8")
|
||||||
dom = minidom.parseString(xml_str)
|
dom = minidom.parseString(xml_str)
|
||||||
|
|
||||||
with open(fixed_path, 'w', encoding='utf-8') as f:
|
with open(fixed_path, "w", encoding="utf-8") as f:
|
||||||
# Write with nice indentation but remove extra whitespace
|
# Write with nice indentation but remove extra whitespace
|
||||||
pretty_xml = dom.toprettyxml(indent=' ')
|
pretty_xml = dom.toprettyxml(indent=" ")
|
||||||
# Remove extra blank lines that minidom sometimes adds
|
# Remove extra blank lines that minidom sometimes adds
|
||||||
pretty_xml = '\n'.join([line for line in pretty_xml.split('\n') if line.strip()])
|
pretty_xml = "\n".join(
|
||||||
|
[line for line in pretty_xml.split("\n") if line.strip()]
|
||||||
|
)
|
||||||
f.write(pretty_xml)
|
f.write(pretty_xml)
|
||||||
|
|
||||||
|
|
||||||
@ -87,8 +94,11 @@ def handle_urdf(urdf_path: str) -> str:
|
|||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
# Example usage
|
# Example usage
|
||||||
cwd = os.getcwd()
|
cwd = os.getcwd()
|
||||||
urdf_path = os.path.join(cwd, "notebook/phantom/urdf/SO_5DOF_ARM100_05d.SLDASM/urdf/SO_5DOF_ARM100_05d.SLDASM.urdf")
|
urdf_path = os.path.join(
|
||||||
|
cwd,
|
||||||
|
"open_phantom/urdf/SO_5DOF_ARM100_05d.SLDASM/urdf/SO_5DOF_ARM100_05d.SLDASM.urdf",
|
||||||
|
)
|
||||||
handle_urdf(urdf_path)
|
handle_urdf(urdf_path)
|
@ -11,13 +11,17 @@ def visualize_mesh(frame, hand_vertices, faces):
|
|||||||
for i in range(21):
|
for i in range(21):
|
||||||
pt = tuple(map(int, hand_vertices[i][:2]))
|
pt = tuple(map(int, hand_vertices[i][:2]))
|
||||||
if 0 <= pt[0] < width and 0 <= pt[1] < height:
|
if 0 <= pt[0] < width and 0 <= pt[1] < height:
|
||||||
cv2.circle(mesh_vis, pt, 4, (255, 0, 0), -1) # Red circles for base vertices
|
cv2.circle(
|
||||||
|
mesh_vis, pt, 4, (255, 0, 0), -1
|
||||||
|
) # Red circles for base vertices
|
||||||
|
|
||||||
# Draw the interpolated vertices
|
# Draw the interpolated vertices
|
||||||
for i in range(21, len(hand_vertices)):
|
for i in range(21, len(hand_vertices)):
|
||||||
pt = tuple(map(int, hand_vertices[i][:2]))
|
pt = tuple(map(int, hand_vertices[i][:2]))
|
||||||
if 0 <= pt[0] < width and 0 <= pt[1] < height:
|
if 0 <= pt[0] < width and 0 <= pt[1] < height:
|
||||||
cv2.circle(mesh_vis, pt, 2, (0, 255, 255), -1) # Yellow circles for interpolated vertices
|
cv2.circle(
|
||||||
|
mesh_vis, pt, 2, (0, 255, 255), -1
|
||||||
|
) # Yellow circles for interpolated vertices
|
||||||
|
|
||||||
# Draw the faces of the mesh
|
# Draw the faces of the mesh
|
||||||
for face in faces:
|
for face in faces:
|
||||||
@ -26,25 +30,53 @@ def visualize_mesh(frame, hand_vertices, faces):
|
|||||||
pt2 = tuple(map(int, hand_vertices[face[1]][:2]))
|
pt2 = tuple(map(int, hand_vertices[face[1]][:2]))
|
||||||
pt3 = tuple(map(int, hand_vertices[face[2]][:2]))
|
pt3 = tuple(map(int, hand_vertices[face[2]][:2]))
|
||||||
|
|
||||||
if (0 <= pt1[0] < width and 0 <= pt1[1] < height and
|
if (
|
||||||
0 <= pt2[0] < width and 0 <= pt2[1] < height and
|
0 <= pt1[0] < width
|
||||||
0 <= pt3[0] < width and 0 <= pt3[1] < height):
|
and 0 <= pt1[1] < height
|
||||||
cv2.line(mesh_vis, pt1, pt2, (0, 255, 0), 1) # Green lines for mesh edges
|
and 0 <= pt2[0] < width
|
||||||
|
and 0 <= pt2[1] < height
|
||||||
|
and 0 <= pt3[0] < width
|
||||||
|
and 0 <= pt3[1] < height
|
||||||
|
):
|
||||||
|
cv2.line(
|
||||||
|
mesh_vis, pt1, pt2, (0, 255, 0), 1
|
||||||
|
) # Green lines for mesh edges
|
||||||
cv2.line(mesh_vis, pt2, pt3, (0, 255, 0), 1)
|
cv2.line(mesh_vis, pt2, pt3, (0, 255, 0), 1)
|
||||||
cv2.line(mesh_vis, pt3, pt1, (0, 255, 0), 1)
|
cv2.line(mesh_vis, pt3, pt1, (0, 255, 0), 1)
|
||||||
|
|
||||||
# Add explanation text
|
# Add explanation text
|
||||||
cv2.putText(mesh_vis, "Red: Base vertices", (10, 30),
|
cv2.putText(
|
||||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
|
mesh_vis,
|
||||||
cv2.putText(mesh_vis, "Yellow: Interpolated vertices", (10, 60),
|
"Red: Base vertices",
|
||||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
|
(10, 30),
|
||||||
cv2.putText(mesh_vis, "Green: Mesh edges", (10, 90),
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
|
0.6,
|
||||||
|
(255, 0, 0),
|
||||||
|
2,
|
||||||
|
)
|
||||||
|
cv2.putText(
|
||||||
|
mesh_vis,
|
||||||
|
"Yellow: Interpolated vertices",
|
||||||
|
(10, 60),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
0.6,
|
||||||
|
(0, 255, 255),
|
||||||
|
2,
|
||||||
|
)
|
||||||
|
cv2.putText(
|
||||||
|
mesh_vis,
|
||||||
|
"Green: Mesh edges",
|
||||||
|
(10, 90),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
0.6,
|
||||||
|
(0, 255, 0),
|
||||||
|
2,
|
||||||
|
)
|
||||||
|
|
||||||
return mesh_vis
|
return mesh_vis
|
||||||
|
|
||||||
|
|
||||||
def visualize_registration(cloud, vertices, icp_transform):
|
def visualize_registration(cloud, vertices, icp_transform):
|
||||||
"""Visualize the ICP registration process"""
|
|
||||||
height, width = 480, 640 # Fixed size visualization
|
height, width = 480, 640 # Fixed size visualization
|
||||||
reg_vis = np.ones((height, width, 3), dtype=np.uint8) * 255 # White background
|
reg_vis = np.ones((height, width, 3), dtype=np.uint8) * 255 # White background
|
||||||
|
|
||||||
@ -90,25 +122,63 @@ def visualize_registration(cloud, vertices, icp_transform):
|
|||||||
cv2.circle(reg_vis, pt, 2, (0, 255, 0), -1) # Green dots
|
cv2.circle(reg_vis, pt, 2, (0, 255, 0), -1) # Green dots
|
||||||
|
|
||||||
# Add transformation matrix display
|
# Add transformation matrix display
|
||||||
cv2.putText(reg_vis, "ICP Transformation Matrix:", (10, height - 120),
|
cv2.putText(
|
||||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
|
reg_vis,
|
||||||
|
"ICP Transformation Matrix:",
|
||||||
|
(10, height - 120),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
0.5,
|
||||||
|
(0, 0, 0),
|
||||||
|
1,
|
||||||
|
)
|
||||||
|
|
||||||
for i in range(4):
|
for i in range(4):
|
||||||
matrix_text = f"[{icp_transform[i][0]:.2f}, {icp_transform[i][1]:.2f}, {icp_transform[i][2]:.2f}, {icp_transform[i][3]:.2f}]"
|
matrix_text = f"[{icp_transform[i][0]:.2f}, {icp_transform[i][1]:.2f}, {icp_transform[i][2]:.2f}, {icp_transform[i][3]:.2f}]"
|
||||||
cv2.putText(reg_vis, matrix_text, (10, height - 90 + i * 20),
|
cv2.putText(
|
||||||
cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 0), 1)
|
reg_vis,
|
||||||
|
matrix_text,
|
||||||
|
(10, height - 90 + i * 20),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
0.4,
|
||||||
|
(0, 0, 0),
|
||||||
|
1,
|
||||||
|
)
|
||||||
|
|
||||||
# Add legend
|
# Add legend
|
||||||
cv2.putText(reg_vis, "Red: Point Cloud", (width - 200, 30),
|
cv2.putText(
|
||||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
|
reg_vis,
|
||||||
cv2.putText(reg_vis, "Blue: Original Mesh", (width - 200, 60),
|
"Red: Point Cloud",
|
||||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
|
(width - 200, 30),
|
||||||
cv2.putText(reg_vis, "Green: Transformed Mesh", (width - 200, 90),
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
|
0.5,
|
||||||
|
(0, 0, 255),
|
||||||
|
1,
|
||||||
|
)
|
||||||
|
cv2.putText(
|
||||||
|
reg_vis,
|
||||||
|
"Blue: Original Mesh",
|
||||||
|
(width - 200, 60),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
0.5,
|
||||||
|
(255, 0, 0),
|
||||||
|
1,
|
||||||
|
)
|
||||||
|
cv2.putText(
|
||||||
|
reg_vis,
|
||||||
|
"Green: Transformed Mesh",
|
||||||
|
(width - 200, 90),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
0.5,
|
||||||
|
(0, 255, 0),
|
||||||
|
1,
|
||||||
|
)
|
||||||
|
|
||||||
return reg_vis
|
return reg_vis
|
||||||
|
|
||||||
def visualize_constraints(frame, refined_landmarks, constrained_landmarks, camera_intrinsics):
|
|
||||||
|
def visualize_constraints(
|
||||||
|
frame, refined_landmarks, constrained_landmarks, camera_intrinsics
|
||||||
|
):
|
||||||
"""Visualize before and after applying anatomical constraints"""
|
"""Visualize before and after applying anatomical constraints"""
|
||||||
constraints_vis = frame.copy()
|
constraints_vis = frame.copy()
|
||||||
height, width = frame.shape[:2]
|
height, width = frame.shape[:2]
|
||||||
@ -129,7 +199,9 @@ def visualize_constraints(frame, refined_landmarks, constrained_landmarks, camer
|
|||||||
if 0 <= u < width and 0 <= v < height:
|
if 0 <= u < width and 0 <= v < height:
|
||||||
if i in highlighted_indices:
|
if i in highlighted_indices:
|
||||||
# Draw larger circles for thumb and index fingers (the constrained ones)
|
# Draw larger circles for thumb and index fingers (the constrained ones)
|
||||||
cv2.circle(constraints_vis, (u, v), 5, (0, 0, 255), -1) # Red circles
|
cv2.circle(
|
||||||
|
constraints_vis, (u, v), 5, (0, 0, 255), -1
|
||||||
|
) # Red circles
|
||||||
else:
|
else:
|
||||||
cv2.circle(constraints_vis, (u, v), 3, (0, 0, 255), -1)
|
cv2.circle(constraints_vis, (u, v), 3, (0, 0, 255), -1)
|
||||||
|
|
||||||
@ -146,9 +218,19 @@ def visualize_constraints(frame, refined_landmarks, constrained_landmarks, camer
|
|||||||
end_u = int(end[0] * focal_x / end[2] + center_x)
|
end_u = int(end[0] * focal_x / end[2] + center_x)
|
||||||
end_v = int(end[1] * focal_y / end[2] + center_y)
|
end_v = int(end[1] * focal_y / end[2] + center_y)
|
||||||
|
|
||||||
if (0 <= start_u < width and 0 <= start_v < height and
|
if (
|
||||||
0 <= end_u < width and 0 <= end_v < height):
|
0 <= start_u < width
|
||||||
cv2.line(constraints_vis, (start_u, start_v), (end_u, end_v), (0, 0, 255), 2)
|
and 0 <= start_v < height
|
||||||
|
and 0 <= end_u < width
|
||||||
|
and 0 <= end_v < height
|
||||||
|
):
|
||||||
|
cv2.line(
|
||||||
|
constraints_vis,
|
||||||
|
(start_u, start_v),
|
||||||
|
(end_u, end_v),
|
||||||
|
(0, 0, 255),
|
||||||
|
2,
|
||||||
|
)
|
||||||
|
|
||||||
# Draw constrained landmarks
|
# Draw constrained landmarks
|
||||||
for i, landmark in enumerate(constrained_landmarks):
|
for i, landmark in enumerate(constrained_landmarks):
|
||||||
@ -160,7 +242,9 @@ def visualize_constraints(frame, refined_landmarks, constrained_landmarks, camer
|
|||||||
if 0 <= u < width and 0 <= v < height:
|
if 0 <= u < width and 0 <= v < height:
|
||||||
if i in highlighted_indices:
|
if i in highlighted_indices:
|
||||||
# Draw larger circles for thumb and index fingers
|
# Draw larger circles for thumb and index fingers
|
||||||
cv2.circle(constraints_vis, (u, v), 5, (0, 255, 0), -1) # Green circles
|
cv2.circle(
|
||||||
|
constraints_vis, (u, v), 5, (0, 255, 0), -1
|
||||||
|
) # Green circles
|
||||||
else:
|
else:
|
||||||
cv2.circle(constraints_vis, (u, v), 3, (0, 255, 0), -1)
|
cv2.circle(constraints_vis, (u, v), 3, (0, 255, 0), -1)
|
||||||
|
|
||||||
@ -177,24 +261,56 @@ def visualize_constraints(frame, refined_landmarks, constrained_landmarks, camer
|
|||||||
end_u = int(end[0] * focal_x / end[2] + center_x)
|
end_u = int(end[0] * focal_x / end[2] + center_x)
|
||||||
end_v = int(end[1] * focal_y / end[2] + center_y)
|
end_v = int(end[1] * focal_y / end[2] + center_y)
|
||||||
|
|
||||||
if (0 <= start_u < width and 0 <= start_v < height and
|
if (
|
||||||
0 <= end_u < width and 0 <= end_v < height):
|
0 <= start_u < width
|
||||||
cv2.line(constraints_vis, (start_u, start_v), (end_u, end_v), (0, 255, 0), 2)
|
and 0 <= start_v < height
|
||||||
|
and 0 <= end_u < width
|
||||||
|
and 0 <= end_v < height
|
||||||
|
):
|
||||||
|
cv2.line(
|
||||||
|
constraints_vis,
|
||||||
|
(start_u, start_v),
|
||||||
|
(end_u, end_v),
|
||||||
|
(0, 255, 0),
|
||||||
|
2,
|
||||||
|
)
|
||||||
|
|
||||||
# Add legend
|
# Add legend
|
||||||
cv2.putText(constraints_vis, "Red: Before constraints", (10, 30),
|
cv2.putText(
|
||||||
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
|
constraints_vis,
|
||||||
cv2.putText(constraints_vis, "Green: After constraints", (10, 60),
|
"Red: Before constraints",
|
||||||
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
|
(10, 30),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
0.7,
|
||||||
|
(0, 0, 255),
|
||||||
|
2,
|
||||||
|
)
|
||||||
|
cv2.putText(
|
||||||
|
constraints_vis,
|
||||||
|
"Green: After constraints",
|
||||||
|
(10, 60),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
0.7,
|
||||||
|
(0, 255, 0),
|
||||||
|
2,
|
||||||
|
)
|
||||||
|
|
||||||
return constraints_vis
|
return constraints_vis
|
||||||
|
|
||||||
def visualize_robot_params(frame, position, direction, normal, gripper_width, camera_intrinsics):
|
|
||||||
"""Visualize robot parameters (position, orientation, gripper width)"""
|
def visualize_robot_params(
|
||||||
|
frame, position, orientation_matrix, gripper_width, camera_intrinsics
|
||||||
|
):
|
||||||
|
"""Visualize robot parameters (position, orientation matrix, gripper width)"""
|
||||||
robot_vis = frame.copy()
|
robot_vis = frame.copy()
|
||||||
height, width = frame.shape[:2]
|
height, width = frame.shape[:2]
|
||||||
focal_x, focal_y, center_x, center_y = camera_intrinsics
|
focal_x, focal_y, center_x, center_y = camera_intrinsics
|
||||||
|
|
||||||
|
# Extract axes from orientation matrix
|
||||||
|
x_axis = orientation_matrix[:, 0] # First column (principal axis)
|
||||||
|
y_axis = orientation_matrix[:, 1] # Second column
|
||||||
|
z_axis = orientation_matrix[:, 2] # Third column (normal)
|
||||||
|
|
||||||
# Project position to image coordinates
|
# Project position to image coordinates
|
||||||
x, y, z = position
|
x, y, z = position
|
||||||
if z > 0:
|
if z > 0:
|
||||||
@ -205,59 +321,113 @@ def visualize_robot_params(frame, position, direction, normal, gripper_width, ca
|
|||||||
# Draw end effector position
|
# Draw end effector position
|
||||||
cv2.circle(robot_vis, (u, v), 10, (255, 0, 0), -1) # Blue circle
|
cv2.circle(robot_vis, (u, v), 10, (255, 0, 0), -1) # Blue circle
|
||||||
|
|
||||||
# Draw direction vector (X axis)
|
# Draw X axis (principal axis)
|
||||||
dx, dy, dz = direction
|
dx, dy, dz = x_axis
|
||||||
scale = 50 # Scale for better visualization
|
scale = 50 # Scale for better visualization
|
||||||
end_u = int((x + dx * scale) * focal_x / (z + dz * scale) + center_x)
|
end_u = int((x + dx * scale) * focal_x / (z + dz * scale) + center_x)
|
||||||
end_v = int((y + dy * scale) * focal_y / (z + dz * scale) + center_y)
|
end_v = int((y + dy * scale) * focal_y / (z + dz * scale) + center_y)
|
||||||
|
|
||||||
if 0 <= end_u < width and 0 <= end_v < height:
|
if 0 <= end_u < width and 0 <= end_v < height:
|
||||||
cv2.line(robot_vis, (u, v), (end_u, end_v), (0, 0, 255), 2) # Red line (X axis)
|
cv2.line(
|
||||||
cv2.putText(robot_vis, "X", (end_u, end_v),
|
robot_vis, (u, v), (end_u, end_v), (0, 0, 255), 2
|
||||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
|
) # Red line (X axis)
|
||||||
|
cv2.putText(
|
||||||
|
robot_vis,
|
||||||
|
"X",
|
||||||
|
(end_u, end_v),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
0.5,
|
||||||
|
(0, 0, 255),
|
||||||
|
2,
|
||||||
|
)
|
||||||
|
|
||||||
# Draw normal vector (Z axis)
|
# Draw Z axis (normal)
|
||||||
nx, ny, nz = normal
|
nx, ny, nz = z_axis
|
||||||
end_u = int((x + nx * scale) * focal_x / (z + nz * scale) + center_x)
|
end_u = int((x + nx * scale) * focal_x / (z + nz * scale) + center_x)
|
||||||
end_v = int((y + ny * scale) * focal_y / (z + nz * scale) + center_y)
|
end_v = int((y + ny * scale) * focal_y / (z + nz * scale) + center_y)
|
||||||
|
|
||||||
if 0 <= end_u < width and 0 <= end_v < height:
|
if 0 <= end_u < width and 0 <= end_v < height:
|
||||||
cv2.line(robot_vis, (u, v), (end_u, end_v), (0, 255, 0), 2) # Green line (Z axis)
|
cv2.line(
|
||||||
cv2.putText(robot_vis, "Z", (end_u, end_v),
|
robot_vis, (u, v), (end_u, end_v), (0, 255, 0), 2
|
||||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
) # Green line (Z axis)
|
||||||
|
cv2.putText(
|
||||||
|
robot_vis,
|
||||||
|
"Z",
|
||||||
|
(end_u, end_v),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
0.5,
|
||||||
|
(0, 255, 0),
|
||||||
|
2,
|
||||||
|
)
|
||||||
|
|
||||||
# Calculate Y axis (cross product of Z and X for right-hand coordinate system)
|
# Draw Y axis
|
||||||
y_axis = np.cross(normal, direction)
|
|
||||||
y_axis = y_axis / np.linalg.norm(y_axis)
|
|
||||||
yx, yy, yz = y_axis
|
yx, yy, yz = y_axis
|
||||||
|
|
||||||
end_u = int((x + yx * scale) * focal_x / (z + yz * scale) + center_x)
|
end_u = int((x + yx * scale) * focal_x / (z + yz * scale) + center_x)
|
||||||
end_v = int((y + yy * scale) * focal_y / (z + yz * scale) + center_y)
|
end_v = int((y + yy * scale) * focal_y / (z + yz * scale) + center_y)
|
||||||
|
|
||||||
if 0 <= end_u < width and 0 <= end_v < height:
|
if 0 <= end_u < width and 0 <= end_v < height:
|
||||||
cv2.line(robot_vis, (u, v), (end_u, end_v), (255, 0, 0), 2) # Blue line (Y axis)
|
cv2.line(
|
||||||
cv2.putText(robot_vis, "Y", (end_u, end_v),
|
robot_vis, (u, v), (end_u, end_v), (255, 0, 0), 2
|
||||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
|
) # Blue line (Y axis)
|
||||||
|
cv2.putText(
|
||||||
|
robot_vis,
|
||||||
|
"Y",
|
||||||
|
(end_u, end_v),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
0.5,
|
||||||
|
(255, 0, 0),
|
||||||
|
2,
|
||||||
|
)
|
||||||
|
|
||||||
# Visualize gripper width
|
# Visualize gripper width
|
||||||
gripper_radius = int(gripper_width * 100) # Scale for better visualization
|
gripper_radius = int(gripper_width * 100) # Scale for better visualization
|
||||||
cv2.circle(robot_vis, (u, v), gripper_radius, (0, 255, 255), 2) # Yellow circle
|
cv2.circle(
|
||||||
|
robot_vis, (u, v), gripper_radius, (0, 255, 255), 2
|
||||||
|
) # Yellow circle
|
||||||
|
|
||||||
# Add parameter values as text
|
# Add parameter values as text
|
||||||
y_offset = height - 160
|
y_offset = height - 160
|
||||||
cv2.putText(robot_vis, f"Position: ({x:.2f}, {y:.2f}, {z:.2f})", (10, y_offset),
|
cv2.putText(
|
||||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
|
robot_vis,
|
||||||
|
f"Position: ({x:.2f}, {y:.2f}, {z:.2f})",
|
||||||
|
(10, y_offset),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
0.5,
|
||||||
|
(255, 255, 255),
|
||||||
|
1,
|
||||||
|
)
|
||||||
|
|
||||||
y_offset += 30
|
y_offset += 30
|
||||||
cv2.putText(robot_vis, f"Direction: ({dx:.2f}, {dy:.2f}, {dz:.2f})", (10, y_offset),
|
cv2.putText(
|
||||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
|
robot_vis,
|
||||||
|
f"X axis: ({x_axis[0]:.2f}, {x_axis[1]:.2f}, {x_axis[2]:.2f})",
|
||||||
|
(10, y_offset),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
0.5,
|
||||||
|
(0, 0, 255),
|
||||||
|
1,
|
||||||
|
)
|
||||||
|
|
||||||
y_offset += 30
|
y_offset += 30
|
||||||
cv2.putText(robot_vis, f"Normal: ({nx:.2f}, {ny:.2f}, {nz:.2f})", (10, y_offset),
|
cv2.putText(
|
||||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
|
robot_vis,
|
||||||
|
f"Z axis: ({z_axis[0]:.2f}, {z_axis[1]:.2f}, {z_axis[2]:.2f})",
|
||||||
|
(10, y_offset),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
0.5,
|
||||||
|
(0, 255, 0),
|
||||||
|
1,
|
||||||
|
)
|
||||||
|
|
||||||
y_offset += 30
|
y_offset += 30
|
||||||
cv2.putText(robot_vis, f"Gripper Width: {gripper_width:.2f}", (10, y_offset),
|
cv2.putText(
|
||||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1)
|
robot_vis,
|
||||||
|
f"Gripper Width: {gripper_width:.2f}",
|
||||||
|
(10, y_offset),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
0.5,
|
||||||
|
(0, 255, 255),
|
||||||
|
1,
|
||||||
|
)
|
||||||
|
|
||||||
return robot_vis
|
return robot_vis
|
Loading…
x
Reference in New Issue
Block a user