improve icp and robot params

This commit is contained in:
Ethan Clark 2025-03-21 17:05:51 -07:00
parent aae4e91f54
commit b38527b5db
6 changed files with 690 additions and 417 deletions

View File

@ -1,15 +1,17 @@
import os import os
import cv2
import time import time
import torch
import cv2
import depth_pro
import mediapipe as mp
import numpy as np import numpy as np
import open3d as o3d import open3d as o3d
import mediapipe as mp import torch
from PIL import Image
from robot_manager import RobotManager
from sam2.sam2_image_predictor import SAM2ImagePredictor
from tqdm import tqdm from tqdm import tqdm
from utils.visualizations import * from utils.visualizations import *
from robot_manager import RobotManager
# from diffusers import StableDiffusionInpaintPipeline
class HandProcessor: class HandProcessor:
@ -37,39 +39,38 @@ class HandProcessor:
self.midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms") self.midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
self.transform = self.midas_transforms.dpt_transform self.transform = self.midas_transforms.dpt_transform
# Segment hand from image using MediaPipe Hands landmarks # Load SAM2 model for hand segmentation
def _create_mask(self, image: np.ndarray, landmarks: list, thickness: int=5, padding:int=10) -> np.ndarray: print("Loading SAM2 model...")
height, width = image.shape[:2] self.sam2_predictor = SAM2ImagePredictor.from_pretrained(
mask = np.zeros((height, width), dtype=np.uint8) "facebook/sam2-hiera-large"
)
points = [(int(landmark.x * width), int(landmark.y * height)) for landmark in landmarks.landmark] def _create_mask(self, frame: np.ndarray, landmarks: list) -> np.ndarray:
height, width = frame.shape[:2]
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Draw filled circles at each landmark # Set image in SAM2 predictor
for point in points: with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
cv2.circle(mask, point, thickness, 255, -1) self.sam2_predictor.set_image(frame_rgb) # Set image for prediction
# Connect all landmarks with thick lines # Convert landmarks to point prompts
for connection in self.mp_hands.HAND_CONNECTIONS: points = []
start_idx, end_idx = connection for landmark in landmarks.landmark:
cv2.line(mask, points[start_idx], points[end_idx], 255, thickness) x, y = int(landmark.x * width), int(landmark.y * height)
points.append([x, y])
# Create palm by connecting base of fingers with wrist input_points = np.array(points)
palm_points = [points[0], points[1], points[5], points[9], points[13], points[17]]
cv2.fillPoly(mask, [np.array(palm_points)], 255)
# Create shape between fingers # Predict mask with point prompts
finger_bases = [(1,5), (5,9), (9,13), (13,17)] masks, _, _ = self.sam2_predictor.predict(
for base1, base2 in finger_bases: point_coords=input_points, # Pass the points as prompts
triangle = np.array([points[0], points[base1], points[base2]]) point_labels=np.ones(
cv2.fillPoly(mask, [triangle], 255) len(input_points)
), # All points from hand are foreground
multimask_output=False, # Just get one mask
)
# Dilate to smooth and expand slightly mask = masks[0].astype(np.uint8) * 255
kernel = np.ones((padding, padding), np.uint8)
mask = cv2.dilate(mask, kernel, iterations=1)
# Smooth the mask
mask = cv2.GaussianBlur(mask, (21, 21), 0)
_, mask = cv2.threshold(mask, 50, 255, cv2.THRESH_BINARY)
return mask return mask
@ -77,7 +78,8 @@ class HandProcessor:
Transform input image to match MiDaS model requirements Transform input image to match MiDaS model requirements
Estimate depth map using MiDaS model Estimate depth map using MiDaS model
""" """
# TODO: Look into why screen goes black sometimes
# TODO: Swap MiDaS for ml-depth-pro model
def _estimate_depth(self, image: np.ndarray) -> tuple: def _estimate_depth(self, image: np.ndarray) -> tuple:
img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
@ -116,7 +118,6 @@ class HandProcessor:
v_coords = v_coords[valid_indices] v_coords = v_coords[valid_indices]
z_values = z_values[valid_indices] z_values = z_values[valid_indices]
# NOTE: Abritrary scaling factor for depth
z_metric = z_values * 0.5 z_metric = z_values * 0.5
# Back-project to 3D using camera intrinsics # Back-project to 3D using camera intrinsics
@ -127,32 +128,38 @@ class HandProcessor:
return points return points
# TODO: Look into better Z scaling # TODO: Look into better depth scaling
def _create_mesh(self, landmarks: list, image_dims: tuple) -> np.ndarray: def _create_mesh(self, landmarks: list, image_dims: tuple) -> np.ndarray:
width, height = image_dims width, height = image_dims
vertices = [] vertices = []
for landmark in landmarks.landmark: for landmark in landmarks.landmark:
vertices.append([ vertices.append(
landmark.x * width, [landmark.x * width, landmark.y * height, landmark.z * width]
landmark.y * height, )
landmark.z * width
])
# Define faces (triangles) connecting landmarks # Define faces (triangles) connecting landmarks
faces = [ faces = [
# Palm # Palm
[0, 1, 5], [0, 5, 9], [0, 9, 13], [0, 13, 17], [0, 1, 5],
[0, 5, 9],
[0, 9, 13],
[0, 13, 17],
# Thumb # Thumb
[1, 2, 3], [2, 3, 4], [1, 2, 3],
[2, 3, 4],
# Index finger # Index finger
[5, 6, 7], [6, 7, 8], [5, 6, 7],
[6, 7, 8],
# Middle finger # Middle finger
[9, 10, 11], [10, 11, 12], [9, 10, 11],
[10, 11, 12],
# Ring finger # Ring finger
[13, 14, 15], [14, 15, 16], [13, 14, 15],
[14, 15, 16],
# Pinky # Pinky
[17, 18, 19], [18, 19, 20] [17, 18, 19],
[18, 19, 20],
] ]
dense_vertices = list(vertices) dense_vertices = list(vertices)
@ -171,6 +178,7 @@ class HandProcessor:
return np.array(dense_vertices), np.array(faces) return np.array(dense_vertices), np.array(faces)
# TODO: Improve ICP registration for better alignment
def _icp_registration(self, cloud: np.ndarray, vertices: np.ndarray) -> np.ndarray: def _icp_registration(self, cloud: np.ndarray, vertices: np.ndarray) -> np.ndarray:
source = o3d.geometry.PointCloud() source = o3d.geometry.PointCloud()
source.points = o3d.utility.Vector3dVector(vertices) source.points = o3d.utility.Vector3dVector(vertices)
@ -178,28 +186,41 @@ class HandProcessor:
target = o3d.geometry.PointCloud() target = o3d.geometry.PointCloud()
target.points = o3d.utility.Vector3dVector(cloud) target.points = o3d.utility.Vector3dVector(cloud)
# Calculate centroids
source_centroid = np.mean(vertices, axis=0)
target_centroid = np.mean(cloud, axis=0)
# Calculate initial translation to align centroids
initial_translation = target_centroid - source_centroid
# Create initial transformation matrix
trans_init = np.eye(4)
trans_init[:3, 3] = initial_translation
result = o3d.pipelines.registration.registration_icp( result = o3d.pipelines.registration.registration_icp(
source, source,
target, target,
max_correspondence_distance=0.05, max_correspondence_distance=0.1, # Increased distance
init=trans_init, # Initial transformation
estimation_method=o3d.pipelines.registration.TransformationEstimationPointToPoint(), estimation_method=o3d.pipelines.registration.TransformationEstimationPointToPoint(),
criteria=o3d.pipelines.registration.ICPConvergenceCriteria(
max_iteration=100
), # Increased iterations
) )
transformation = result.transformation transformation = result.transformation
return transformation return transformation
# TODO: Look into better depth scaling
def _refine_landmarks(self, landmarks: list, transform: int, image_dims: tuple): def _refine_landmarks(self, landmarks: list, transform: int, image_dims: tuple):
width, height = image_dims width, height = image_dims
refined_landmarks = [] refined_landmarks = []
for landmark in landmarks.landmark: for landmark in landmarks.landmark:
point = np.array([ point = np.array(
landmark.x * width, [landmark.x * width, landmark.y * height, landmark.z * width, 1.0]
landmark.y * height, )
landmark.z * width, # TODO: Figure out better scaling factor
1.0
])
# Apply transformation to 3D point # Apply transformation to 3D point
transformed = np.dot(transform, point) transformed = np.dot(transform, point)
@ -244,42 +265,87 @@ class HandProcessor:
return constrained return constrained
def _get_robot_params(self, refined_landmarks) -> tuple: def _get_robot_params(self, refined_landmarks):
# Extract key landmarks # Extract keypoints
wrist = refined_landmarks[0] # Wrist landmark landmarks = np.array(refined_landmarks)
thumb_tip = refined_landmarks[4] # Thumb tip
index_tip = refined_landmarks[8] # Index finger tip
# Calculate end effector position (midpoint between thumb and index tips) # Define indices for specific parts of the hand
thumb_indices = [1, 2, 3, 4] # Thumb landmarks
index_indices = [5, 6, 7, 8] # Index finger landmarks
thumb_tip_idx = 4
index_tip_idx = 8
# 1. Set target position as midpoint between thumb tip and index tip
thumb_tip = landmarks[thumb_tip_idx]
index_tip = landmarks[index_tip_idx]
position = (thumb_tip + index_tip) / 2 position = (thumb_tip + index_tip) / 2
# Calculate vectors for orientation # 2. Calculate orientation
v1 = thumb_tip - wrist # Vector from wrist to thumb tip # Get all thumb and index finger points for plane fitting
v2 = index_tip - wrist # Vector from wrist to index tip thumb_points = landmarks[thumb_indices]
index_points = landmarks[index_indices]
finger_points = np.vstack([thumb_points, index_points])
# Calculate normal to hand plane # Fit plane to finger points
normal = np.cross(v1, v2) centroid = np.mean(finger_points, axis=0)
if np.linalg.norm(normal) > 0: centered_points = finger_points - centroid
# Use SVD to find the normal to the best-fitting plane
u, s, vh = np.linalg.svd(centered_points)
# The normal is the last right singular vector
normal = vh[2, :]
# Ensure normal is a unit vector
normal = normal / np.linalg.norm(normal) normal = normal / np.linalg.norm(normal)
else:
# Default if vectors are collinear
normal = np.array([0, 0, 1])
# Calculate main direction along thumb # Fit a principal axis through thumb points
direction = thumb_tip - wrist thumb_centroid = np.mean(thumb_points, axis=0)
if np.linalg.norm(direction) > 0: thumb_centered = thumb_points - thumb_centroid
direction = direction / np.linalg.norm(direction)
else:
# Default if thumb is at wrist (unlikely)
direction = np.array([1, 0, 0])
# Calculate gripper width # Use SVD again to find direction of maximum variance (principal axis)
u, s, vh = np.linalg.svd(thumb_centered)
principal_axis = vh[0, :] # First singular vector
# Ensure principal axis is a unit vector
principal_axis = principal_axis / np.linalg.norm(principal_axis)
# Construct orthogonal vectors for orientation matrix
z_axis = normal # Normal to the plane
x_axis = principal_axis # Along the thumb
# Ensure x_axis is orthogonal to z_axis
x_axis = x_axis - np.dot(x_axis, z_axis) * z_axis
x_axis = x_axis / np.linalg.norm(x_axis)
# Compute y_axis as cross product to ensure orthogonality
y_axis = np.cross(z_axis, x_axis)
# Create rotation matrix
rotation_matrix = np.column_stack([x_axis, y_axis, z_axis])
# 3. Calculate gripper width
gripper_width = np.linalg.norm(thumb_tip - index_tip) gripper_width = np.linalg.norm(thumb_tip - index_tip)
return position, direction, normal, gripper_width # Apply percentile threshold to mitigate slippage (as mentioned in the paper)
# Store gripper widths in a buffer to compute percentiles
if not hasattr(self, "gripper_width_buffer"):
self.gripper_width_buffer = []
self.gripper_width_buffer.append(gripper_width)
# Keep only the last 100 values
if len(self.gripper_width_buffer) > 100:
self.gripper_width_buffer.pop(0)
# Apply 20th percentile threshold
if len(self.gripper_width_buffer) > 5: # Need enough samples
min_width = np.percentile(self.gripper_width_buffer, 20)
if gripper_width < min_width:
gripper_width = min_width
return position, rotation_matrix, gripper_width
def record_video(self) -> str: def record_video(self) -> str:
output_dir = os.path.join(os.getcwd(), 'recordings') output_dir = os.path.join(os.getcwd(), "recordings")
os.makedirs(output_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True)
timestamp = time.strftime("%Y%m%d-%H%M%S") timestamp = time.strftime("%Y%m%d-%H%M%S")
@ -296,7 +362,7 @@ class HandProcessor:
# Update camera intrinsics based on video dimensions # Update camera intrinsics based on video dimensions
self.camera_intrinsics = (width * 0.8, height * 0.8, width / 2, height / 2) self.camera_intrinsics = (width * 0.8, height * 0.8, width / 2, height / 2)
fourcc = cv2.VideoWriter_fourcc(*'mp4v') fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = None out = None
recording = False recording = False
@ -317,11 +383,19 @@ class HandProcessor:
# Display recording indicator ONLY on the display frame # Display recording indicator ONLY on the display frame
if recording: if recording:
cv2.circle(display_frame, (30, 30), 15, (0, 0, 255), -1) cv2.circle(display_frame, (30, 30), 15, (0, 0, 255), -1)
cv2.putText(display_frame, "RECORDING", (50, 40), cv2.FONT_HERSHEY_SIMPLEX, cv2.putText(
1, (0, 0, 255), 2, cv2.LINE_AA) display_frame,
"RECORDING",
(50, 40),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(0, 0, 255),
2,
cv2.LINE_AA,
)
# Show the display frame (with indicator if recording) # Show the display frame (with indicator if recording)
cv2.imshow('Video Recording', display_frame) cv2.imshow("Video Recording", display_frame)
# Write the original frame (without indicator) to video file if recording # Write the original frame (without indicator) to video file if recording
if recording and out is not None: if recording and out is not None:
@ -329,10 +403,7 @@ class HandProcessor:
key = cv2.waitKey(1) & 0xFF key = cv2.waitKey(1) & 0xFF
# NOTE: Haven't tested what happens if user records multiple videos in one session (probably overwrites?) if key == ord("r"):
if key == ord('q'):
break
elif key == ord('r'):
recording = not recording recording = not recording
if recording: if recording:
print(f"Started recording to {video_filename}") print(f"Started recording to {video_filename}")
@ -342,6 +413,7 @@ class HandProcessor:
out.release() out.release()
print(f"Stopped recording. Video saved to {video_filename}") print(f"Stopped recording. Video saved to {video_filename}")
did_record = True did_record = True
break
if out is not None: if out is not None:
out.release() out.release()
@ -373,12 +445,18 @@ class HandProcessor:
constraints_output_path = f"{base_path}_constraints.mp4" constraints_output_path = f"{base_path}_constraints.mp4"
robot_output_path = f"{base_path}_robot.mp4" robot_output_path = f"{base_path}_robot.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v') fourcc = cv2.VideoWriter_fourcc(*"mp4v")
segmented_out = cv2.VideoWriter(segmented_output_path, fourcc, fps, (width, height)) segmented_out = cv2.VideoWriter(
segmented_output_path, fourcc, fps, (width, height)
)
depth_out = cv2.VideoWriter(depth_output_path, fourcc, fps, (width, height)) depth_out = cv2.VideoWriter(depth_output_path, fourcc, fps, (width, height))
mesh_out = cv2.VideoWriter(mesh_output_path, fourcc, fps, (width, height)) mesh_out = cv2.VideoWriter(mesh_output_path, fourcc, fps, (width, height))
reg_out = cv2.VideoWriter(registration_output_path, fourcc, fps, (640, 480)) # Fixed size reg_out = cv2.VideoWriter(
constraints_out = cv2.VideoWriter(constraints_output_path, fourcc, fps, (width, height)) registration_output_path, fourcc, fps, (640, 480)
) # Fixed size
constraints_out = cv2.VideoWriter(
constraints_output_path, fourcc, fps, (width, height)
)
robot_out = cv2.VideoWriter(robot_output_path, fourcc, fps, (width, height)) robot_out = cv2.VideoWriter(robot_output_path, fourcc, fps, (width, height))
print(f"Processing video with {total_frames} frames...") print(f"Processing video with {total_frames} frames...")
@ -396,48 +474,60 @@ class HandProcessor:
segmented_frame = frame.copy() segmented_frame = frame.copy()
depth_frame = np.zeros((height, width, 3), dtype=np.uint8) depth_frame = np.zeros((height, width, 3), dtype=np.uint8)
mesh_frame = frame.copy() mesh_frame = frame.copy()
reg_frame = np.ones((480, 640, 3), dtype=np.uint8) * 255 # Fixed size, white background reg_frame = (
np.ones((480, 640, 3), dtype=np.uint8) * 255
) # Fixed size, white background
constraints_frame = frame.copy() constraints_frame = frame.copy()
robot_frame = frame.copy() robot_frame = frame.copy()
# Process if hand is detected # Process if hand is detected
if results.multi_hand_landmarks: if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks: for hand_landmarks in results.multi_hand_landmarks:
# Segment hand
hand_mask = self._create_mask(frame, hand_landmarks) hand_mask = self._create_mask(frame, hand_landmarks)
mask_overlay = frame.copy() mask_overlay = frame.copy()
mask_overlay[hand_mask > 0] = [0, 0, 255] # Red color for mask mask_overlay[hand_mask > 0] = [0, 0, 255] # Red color for mask
segmented_frame = cv2.addWeighted(frame, 0.7, mask_overlay, 0.3, 0) segmented_frame = cv2.addWeighted(frame, 0.7, mask_overlay, 0.3, 0)
# Depth estimation
depth_map, depth_vis = self._estimate_depth(frame) depth_map, depth_vis = self._estimate_depth(frame)
depth_colored = cv2.applyColorMap(depth_vis, cv2.COLORMAP_JET) depth_colored = cv2.applyColorMap(depth_vis, cv2.COLORMAP_JET)
depth_frame = depth_colored.copy() depth_frame = depth_colored.copy()
# Create hand mesh
hand_vertices, hand_faces = self._create_mesh(hand_landmarks, (width, height))
mesh_frame = visualize_mesh(frame, hand_vertices, hand_faces)
# Create point cloud from depth
cloud = self._create_cloud(depth_map, hand_mask) cloud = self._create_cloud(depth_map, hand_mask)
# Perform ICP registration hand_vertices, hand_faces = self._create_mesh(
hand_landmarks, (width, height)
)
mesh_frame = visualize_mesh(frame, hand_vertices, hand_faces)
icp_transform = self._icp_registration(cloud, hand_vertices) icp_transform = self._icp_registration(cloud, hand_vertices)
reg_frame = visualize_registration(cloud, hand_vertices, icp_transform) reg_frame = visualize_registration(
cloud, hand_vertices, icp_transform
)
# Refine landmarks using ICP transformation refined_landmarks = self._refine_landmarks(
refined_landmarks = self._refine_landmarks(hand_landmarks, icp_transform, (width, height)) hand_landmarks, icp_transform, (width, height)
)
# Store pre-constraint landmarks for visualization
original_refined = refined_landmarks.copy() original_refined = refined_landmarks.copy()
# Apply anatomical constraints
constrained_landmarks = self._apply_constraints(refined_landmarks) constrained_landmarks = self._apply_constraints(refined_landmarks)
constraints_frame = visualize_constraints(frame, original_refined, constrained_landmarks, self.camera_intrinsics) constraints_frame = visualize_constraints(
frame,
original_refined,
constrained_landmarks,
self.camera_intrinsics,
)
# # Calculate robot parameters position, orientation, gripper_width = self._get_robot_params(
# position, direction, normal, gripper_width = self._get_robot_params(constrained_landmarks) constrained_landmarks
# robot_frame = visualize_robot_params(frame, position, direction, normal, gripper_width) )
robot_frame = visualize_robot_params(
frame,
position,
orientation,
gripper_width,
self.camera_intrinsics,
)
segmented_out.write(segmented_frame) segmented_out.write(segmented_frame)
depth_out.write(depth_frame) depth_out.write(depth_frame)
@ -450,12 +540,12 @@ class HandProcessor:
display_size = (int(width * display_scale), int(height * display_scale)) display_size = (int(width * display_scale), int(height * display_scale))
reg_display_size = (int(640 * display_scale), int(480 * display_scale)) reg_display_size = (int(640 * display_scale), int(480 * display_scale))
cv2.imshow('Segmented', cv2.resize(segmented_frame, display_size)) cv2.imshow("Segmented", cv2.resize(segmented_frame, display_size))
cv2.imshow('Depth', cv2.resize(depth_frame, display_size)) cv2.imshow("Depth", cv2.resize(depth_frame, display_size))
cv2.imshow('Mesh', cv2.resize(mesh_frame, display_size)) cv2.imshow("Mesh", cv2.resize(mesh_frame, display_size))
cv2.imshow('Registration', cv2.resize(reg_frame, reg_display_size)) cv2.imshow("Registration", cv2.resize(reg_frame, reg_display_size))
cv2.imshow('Constraints', cv2.resize(constraints_frame, display_size)) cv2.imshow("Constraints", cv2.resize(constraints_frame, display_size))
cv2.imshow('Robot Parameters', cv2.resize(robot_frame, display_size)) cv2.imshow("Robot Parameters", cv2.resize(robot_frame, display_size))
if cv2.waitKey(1) & 0xFF == 27: # ESC to exit if cv2.waitKey(1) & 0xFF == 27: # ESC to exit
break break
@ -483,5 +573,5 @@ class HandProcessor:
"mesh": mesh_output_path, "mesh": mesh_output_path,
"registration": registration_output_path, "registration": registration_output_path,
"constraints": constraints_output_path, "constraints": constraints_output_path,
"robot": robot_output_path "robot": robot_output_path,
} }

View File

@ -8,7 +8,7 @@ def main():
record_option = input("Record a new video? (y/n): ") record_option = input("Record a new video? (y/n): ")
if record_option.lower() == 'y': if record_option.lower() == "y":
print("Starting video recording...") print("Starting video recording...")
video_path = processor.record_video() video_path = processor.record_video()
if not video_path: if not video_path:

View File

@ -1,11 +1,9 @@
import os import os
import cv2 import cv2
import numpy as np import numpy as np
import pybullet as p import pybullet as p
from utils.math import *
from utils.handle_urdf import handle_urdf from utils.handle_urdf import handle_urdf
from scipy.spatial.transform import Rotation as R
class RobotManager: class RobotManager:
@ -30,13 +28,13 @@ class RobotManager:
robot_urdf, robot_urdf,
basePosition=[0, 0, 0], basePosition=[0, 0, 0],
useFixedBase=True, useFixedBase=True,
flags=p.URDF_USE_SELF_COLLISION | p.URDF_USE_INERTIA_FROM_FILE flags=p.URDF_USE_SELF_COLLISION | p.URDF_USE_INERTIA_FROM_FILE,
) )
except p.error as e: except p.error as e:
print(f"PyBullet error when loading URDF: {e}") print(f"PyBullet error when loading URDF: {e}")
raise e raise e
robot_name = p.getBodyInfo(robot_id)[1].decode('utf-8') robot_name = p.getBodyInfo(robot_id)[1].decode("utf-8")
print(f"Successfully loaded {robot_name} robot with ID: {robot_id}") print(f"Successfully loaded {robot_name} robot with ID: {robot_id}")
return robot_id return robot_id
@ -46,11 +44,11 @@ class RobotManager:
assert self.joint_count > 0, "Robot has no joints" assert self.joint_count > 0, "Robot has no joints"
# Keywords to look for in joint names to identify end effector # Keywords to look for in joint names to identify end effector
keywords = ['gripper', 'tool', 'tcp', 'end_effector', 'hand'] keywords = ["gripper", "tool", "tcp", "end_effector", "hand"]
for i in range(self.joint_count): for i in range(self.joint_count):
info = p.getJointInfo(self.robot_id, i) info = p.getJointInfo(self.robot_id, i)
joint_name = info[1].decode('utf-8').lower() joint_name = info[1].decode("utf-8").lower()
# Check if any keyword is in the joint name # Check if any keyword is in the joint name
if any(keyword in joint_name for keyword in keywords): if any(keyword in joint_name for keyword in keywords):
@ -60,7 +58,12 @@ class RobotManager:
return self.joint_count - 1 return self.joint_count - 1
# TODO: Use inverse kinematics to set the robot pose # TODO: Use inverse kinematics to set the robot pose
def set_robot_pose(self, position: np.ndarray, orientation_vectors: np.ndarray, gripper_width: float) -> None: def set_robot_pose(
self,
position: np.ndarray,
orientation_vectors: np.ndarray,
gripper_width: float,
) -> None:
pass pass
# Render the robot in some scene using some camera parameters # Render the robot in some scene using some camera parameters
@ -85,16 +88,13 @@ class RobotManager:
yaw=cam_yaw, yaw=cam_yaw,
pitch=cam_pitch, pitch=cam_pitch,
roll=cam_roll, roll=cam_roll,
upAxisIndex=2 upAxisIndex=2,
) )
# Compute projection matrix # Compute projection matrix
aspect = self.img_width / self.img_height aspect = self.img_width / self.img_height
proj_matrix = p.computeProjectionMatrixFOV( proj_matrix = p.computeProjectionMatrixFOV(
fov=60, fov=60, aspect=aspect, nearVal=0.01, farVal=100.0
aspect=aspect,
nearVal=0.01,
farVal=100.0
) )
# Render the scene # Render the scene
@ -103,7 +103,7 @@ class RobotManager:
height=self.img_height, height=self.img_height,
viewMatrix=view_matrix, viewMatrix=view_matrix,
projectionMatrix=proj_matrix, projectionMatrix=proj_matrix,
renderer=p.ER_BULLET_HARDWARE_OPENGL renderer=p.ER_BULLET_HARDWARE_OPENGL,
) )
# Extract RGB and depth # Extract RGB and depth
@ -131,17 +131,19 @@ class RobotManager:
return rgb.astype(np.uint8) return rgb.astype(np.uint8)
def __del__(self) -> None: def __del__(self) -> None:
if hasattr(self, 'physics_client'): if hasattr(self, "physics_client"):
try: try:
p.disconnect(self.physics_client) p.disconnect(self.physics_client)
except: except:
pass pass
if __name__ == "__main__": if __name__ == "__main__":
cwd = os.getcwd() cwd = os.getcwd()
urdf_path = os.path.join(cwd, "notebook/phantom/urdf/SO_5DOF_ARM100_05d.SLDASM/urdf/SO_5DOF_ARM100_05d.SLDASM.urdf") urdf_path = os.path.join(
cwd,
"notebook/phantom/urdf/SO_5DOF_ARM100_05d.SLDASM/urdf/SO_5DOF_ARM100_05d.SLDASM.urdf",
)
camera_intrinsics = (320, 240, 320, 240) # Random intrinsics for example camera_intrinsics = (320, 240, 320, 240) # Random intrinsics for example
robot_vis = RobotManager(urdf_path, camera_intrinsics) robot_vis = RobotManager(urdf_path, camera_intrinsics)

View File

@ -0,0 +1 @@
# TODO: Implement camera calibration functions to extract camera extrinsics and intrinsics

View File

@ -2,7 +2,6 @@ import os
import xml.dom.minidom as minidom import xml.dom.minidom as minidom
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
""" """
Fixes a specific path that uses the ROS-style 'package://' prefix. Fixes a specific path that uses the ROS-style 'package://' prefix.
The 'package://' prefix is used in URDF files to refer to files in the same package. The 'package://' prefix is used in URDF files to refer to files in the same package.
@ -10,18 +9,20 @@ However, when we're not running in a ROS environment, the paths are not valid.
This function tries to find the absolute path of the mesh file. This function tries to find the absolute path of the mesh file.
If the mesh file is not found, the original path is used. If the mesh file is not found, the original path is used.
""" """
def fix_path(path: str, urdf_dir: str) -> str: def fix_path(path: str, urdf_dir: str) -> str:
if path.startswith('package://'): if path.startswith("package://"):
parts = path[len('package://'):].split('/', 1) parts = path[len("package://") :].split("/", 1)
if len(parts) == 2: if len(parts) == 2:
package_name, rel_path = parts package_name, rel_path = parts
# Try potential locations for the mesh # Try potential locations for the mesh
potential_paths = [ potential_paths = [
os.path.join(urdf_dir, rel_path), os.path.join(urdf_dir, rel_path),
os.path.join(urdf_dir, '../meshes', rel_path), os.path.join(urdf_dir, "../meshes", rel_path),
os.path.join(urdf_dir, f'../{package_name}', rel_path), os.path.join(urdf_dir, f"../{package_name}", rel_path),
os.path.join(urdf_dir, '../..', rel_path) os.path.join(urdf_dir, "../..", rel_path),
] ]
for possible_path in potential_paths: for possible_path in potential_paths:
@ -37,18 +38,20 @@ def fix_path(path: str, urdf_dir: str) -> str:
Iterates through the URDF file and fixes the paths of all mesh files. Iterates through the URDF file and fixes the paths of all mesh files.
The URDF file is parsed and the mesh paths are modified in-place. The URDF file is parsed and the mesh paths are modified in-place.
""" """
def fix_mesh_paths(urdf_path: str, urdf_dir: str) -> str: def fix_mesh_paths(urdf_path: str, urdf_dir: str) -> str:
root = ET.parse(urdf_path).getroot() root = ET.parse(urdf_path).getroot()
try: try:
for mesh in root.findall('.//mesh'): for mesh in root.findall(".//mesh"):
if 'filename' in mesh.attrib: if "filename" in mesh.attrib:
mesh.attrib['filename'] = fix_path(mesh.attrib['filename'], urdf_dir) mesh.attrib["filename"] = fix_path(mesh.attrib["filename"], urdf_dir)
except Exception as e: except Exception as e:
print(f"Error fixing mesh paths: {e}") print(f"Error fixing mesh paths: {e}")
raise e raise e
fixed_path = urdf_path.replace('.urdf', '_fixed.urdf') fixed_path = urdf_path.replace(".urdf", "_fixed.urdf")
return root, fixed_path return root, fixed_path
@ -56,15 +59,19 @@ def fix_mesh_paths(urdf_path:str, urdf_dir: str) -> str:
""" """
Formats the XML tree to be human-readable and writes it to a file. Formats the XML tree to be human-readable and writes it to a file.
""" """
def format_xml(root: ET.Element, fixed_path: str) -> None: def format_xml(root: ET.Element, fixed_path: str) -> None:
xml_str = ET.tostring(root, encoding='utf-8') xml_str = ET.tostring(root, encoding="utf-8")
dom = minidom.parseString(xml_str) dom = minidom.parseString(xml_str)
with open(fixed_path, 'w', encoding='utf-8') as f: with open(fixed_path, "w", encoding="utf-8") as f:
# Write with nice indentation but remove extra whitespace # Write with nice indentation but remove extra whitespace
pretty_xml = dom.toprettyxml(indent=' ') pretty_xml = dom.toprettyxml(indent=" ")
# Remove extra blank lines that minidom sometimes adds # Remove extra blank lines that minidom sometimes adds
pretty_xml = '\n'.join([line for line in pretty_xml.split('\n') if line.strip()]) pretty_xml = "\n".join(
[line for line in pretty_xml.split("\n") if line.strip()]
)
f.write(pretty_xml) f.write(pretty_xml)
@ -87,8 +94,11 @@ def handle_urdf(urdf_path: str) -> str:
raise e raise e
if __name__ == '__main__': if __name__ == "__main__":
# Example usage # Example usage
cwd = os.getcwd() cwd = os.getcwd()
urdf_path = os.path.join(cwd, "notebook/phantom/urdf/SO_5DOF_ARM100_05d.SLDASM/urdf/SO_5DOF_ARM100_05d.SLDASM.urdf") urdf_path = os.path.join(
cwd,
"open_phantom/urdf/SO_5DOF_ARM100_05d.SLDASM/urdf/SO_5DOF_ARM100_05d.SLDASM.urdf",
)
handle_urdf(urdf_path) handle_urdf(urdf_path)

View File

@ -11,13 +11,17 @@ def visualize_mesh(frame, hand_vertices, faces):
for i in range(21): for i in range(21):
pt = tuple(map(int, hand_vertices[i][:2])) pt = tuple(map(int, hand_vertices[i][:2]))
if 0 <= pt[0] < width and 0 <= pt[1] < height: if 0 <= pt[0] < width and 0 <= pt[1] < height:
cv2.circle(mesh_vis, pt, 4, (255, 0, 0), -1) # Red circles for base vertices cv2.circle(
mesh_vis, pt, 4, (255, 0, 0), -1
) # Red circles for base vertices
# Draw the interpolated vertices # Draw the interpolated vertices
for i in range(21, len(hand_vertices)): for i in range(21, len(hand_vertices)):
pt = tuple(map(int, hand_vertices[i][:2])) pt = tuple(map(int, hand_vertices[i][:2]))
if 0 <= pt[0] < width and 0 <= pt[1] < height: if 0 <= pt[0] < width and 0 <= pt[1] < height:
cv2.circle(mesh_vis, pt, 2, (0, 255, 255), -1) # Yellow circles for interpolated vertices cv2.circle(
mesh_vis, pt, 2, (0, 255, 255), -1
) # Yellow circles for interpolated vertices
# Draw the faces of the mesh # Draw the faces of the mesh
for face in faces: for face in faces:
@ -26,25 +30,53 @@ def visualize_mesh(frame, hand_vertices, faces):
pt2 = tuple(map(int, hand_vertices[face[1]][:2])) pt2 = tuple(map(int, hand_vertices[face[1]][:2]))
pt3 = tuple(map(int, hand_vertices[face[2]][:2])) pt3 = tuple(map(int, hand_vertices[face[2]][:2]))
if (0 <= pt1[0] < width and 0 <= pt1[1] < height and if (
0 <= pt2[0] < width and 0 <= pt2[1] < height and 0 <= pt1[0] < width
0 <= pt3[0] < width and 0 <= pt3[1] < height): and 0 <= pt1[1] < height
cv2.line(mesh_vis, pt1, pt2, (0, 255, 0), 1) # Green lines for mesh edges and 0 <= pt2[0] < width
and 0 <= pt2[1] < height
and 0 <= pt3[0] < width
and 0 <= pt3[1] < height
):
cv2.line(
mesh_vis, pt1, pt2, (0, 255, 0), 1
) # Green lines for mesh edges
cv2.line(mesh_vis, pt2, pt3, (0, 255, 0), 1) cv2.line(mesh_vis, pt2, pt3, (0, 255, 0), 1)
cv2.line(mesh_vis, pt3, pt1, (0, 255, 0), 1) cv2.line(mesh_vis, pt3, pt1, (0, 255, 0), 1)
# Add explanation text # Add explanation text
cv2.putText(mesh_vis, "Red: Base vertices", (10, 30), cv2.putText(
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2) mesh_vis,
cv2.putText(mesh_vis, "Yellow: Interpolated vertices", (10, 60), "Red: Base vertices",
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2) (10, 30),
cv2.putText(mesh_vis, "Green: Mesh edges", (10, 90), cv2.FONT_HERSHEY_SIMPLEX,
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) 0.6,
(255, 0, 0),
2,
)
cv2.putText(
mesh_vis,
"Yellow: Interpolated vertices",
(10, 60),
cv2.FONT_HERSHEY_SIMPLEX,
0.6,
(0, 255, 255),
2,
)
cv2.putText(
mesh_vis,
"Green: Mesh edges",
(10, 90),
cv2.FONT_HERSHEY_SIMPLEX,
0.6,
(0, 255, 0),
2,
)
return mesh_vis return mesh_vis
def visualize_registration(cloud, vertices, icp_transform): def visualize_registration(cloud, vertices, icp_transform):
"""Visualize the ICP registration process"""
height, width = 480, 640 # Fixed size visualization height, width = 480, 640 # Fixed size visualization
reg_vis = np.ones((height, width, 3), dtype=np.uint8) * 255 # White background reg_vis = np.ones((height, width, 3), dtype=np.uint8) * 255 # White background
@ -90,25 +122,63 @@ def visualize_registration(cloud, vertices, icp_transform):
cv2.circle(reg_vis, pt, 2, (0, 255, 0), -1) # Green dots cv2.circle(reg_vis, pt, 2, (0, 255, 0), -1) # Green dots
# Add transformation matrix display # Add transformation matrix display
cv2.putText(reg_vis, "ICP Transformation Matrix:", (10, height - 120), cv2.putText(
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) reg_vis,
"ICP Transformation Matrix:",
(10, height - 120),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 0, 0),
1,
)
for i in range(4): for i in range(4):
matrix_text = f"[{icp_transform[i][0]:.2f}, {icp_transform[i][1]:.2f}, {icp_transform[i][2]:.2f}, {icp_transform[i][3]:.2f}]" matrix_text = f"[{icp_transform[i][0]:.2f}, {icp_transform[i][1]:.2f}, {icp_transform[i][2]:.2f}, {icp_transform[i][3]:.2f}]"
cv2.putText(reg_vis, matrix_text, (10, height - 90 + i * 20), cv2.putText(
cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 0), 1) reg_vis,
matrix_text,
(10, height - 90 + i * 20),
cv2.FONT_HERSHEY_SIMPLEX,
0.4,
(0, 0, 0),
1,
)
# Add legend # Add legend
cv2.putText(reg_vis, "Red: Point Cloud", (width - 200, 30), cv2.putText(
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1) reg_vis,
cv2.putText(reg_vis, "Blue: Original Mesh", (width - 200, 60), "Red: Point Cloud",
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1) (width - 200, 30),
cv2.putText(reg_vis, "Green: Transformed Mesh", (width - 200, 90), cv2.FONT_HERSHEY_SIMPLEX,
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) 0.5,
(0, 0, 255),
1,
)
cv2.putText(
reg_vis,
"Blue: Original Mesh",
(width - 200, 60),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(255, 0, 0),
1,
)
cv2.putText(
reg_vis,
"Green: Transformed Mesh",
(width - 200, 90),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 255, 0),
1,
)
return reg_vis return reg_vis
def visualize_constraints(frame, refined_landmarks, constrained_landmarks, camera_intrinsics):
def visualize_constraints(
frame, refined_landmarks, constrained_landmarks, camera_intrinsics
):
"""Visualize before and after applying anatomical constraints""" """Visualize before and after applying anatomical constraints"""
constraints_vis = frame.copy() constraints_vis = frame.copy()
height, width = frame.shape[:2] height, width = frame.shape[:2]
@ -129,7 +199,9 @@ def visualize_constraints(frame, refined_landmarks, constrained_landmarks, camer
if 0 <= u < width and 0 <= v < height: if 0 <= u < width and 0 <= v < height:
if i in highlighted_indices: if i in highlighted_indices:
# Draw larger circles for thumb and index fingers (the constrained ones) # Draw larger circles for thumb and index fingers (the constrained ones)
cv2.circle(constraints_vis, (u, v), 5, (0, 0, 255), -1) # Red circles cv2.circle(
constraints_vis, (u, v), 5, (0, 0, 255), -1
) # Red circles
else: else:
cv2.circle(constraints_vis, (u, v), 3, (0, 0, 255), -1) cv2.circle(constraints_vis, (u, v), 3, (0, 0, 255), -1)
@ -146,9 +218,19 @@ def visualize_constraints(frame, refined_landmarks, constrained_landmarks, camer
end_u = int(end[0] * focal_x / end[2] + center_x) end_u = int(end[0] * focal_x / end[2] + center_x)
end_v = int(end[1] * focal_y / end[2] + center_y) end_v = int(end[1] * focal_y / end[2] + center_y)
if (0 <= start_u < width and 0 <= start_v < height and if (
0 <= end_u < width and 0 <= end_v < height): 0 <= start_u < width
cv2.line(constraints_vis, (start_u, start_v), (end_u, end_v), (0, 0, 255), 2) and 0 <= start_v < height
and 0 <= end_u < width
and 0 <= end_v < height
):
cv2.line(
constraints_vis,
(start_u, start_v),
(end_u, end_v),
(0, 0, 255),
2,
)
# Draw constrained landmarks # Draw constrained landmarks
for i, landmark in enumerate(constrained_landmarks): for i, landmark in enumerate(constrained_landmarks):
@ -160,7 +242,9 @@ def visualize_constraints(frame, refined_landmarks, constrained_landmarks, camer
if 0 <= u < width and 0 <= v < height: if 0 <= u < width and 0 <= v < height:
if i in highlighted_indices: if i in highlighted_indices:
# Draw larger circles for thumb and index fingers # Draw larger circles for thumb and index fingers
cv2.circle(constraints_vis, (u, v), 5, (0, 255, 0), -1) # Green circles cv2.circle(
constraints_vis, (u, v), 5, (0, 255, 0), -1
) # Green circles
else: else:
cv2.circle(constraints_vis, (u, v), 3, (0, 255, 0), -1) cv2.circle(constraints_vis, (u, v), 3, (0, 255, 0), -1)
@ -177,24 +261,56 @@ def visualize_constraints(frame, refined_landmarks, constrained_landmarks, camer
end_u = int(end[0] * focal_x / end[2] + center_x) end_u = int(end[0] * focal_x / end[2] + center_x)
end_v = int(end[1] * focal_y / end[2] + center_y) end_v = int(end[1] * focal_y / end[2] + center_y)
if (0 <= start_u < width and 0 <= start_v < height and if (
0 <= end_u < width and 0 <= end_v < height): 0 <= start_u < width
cv2.line(constraints_vis, (start_u, start_v), (end_u, end_v), (0, 255, 0), 2) and 0 <= start_v < height
and 0 <= end_u < width
and 0 <= end_v < height
):
cv2.line(
constraints_vis,
(start_u, start_v),
(end_u, end_v),
(0, 255, 0),
2,
)
# Add legend # Add legend
cv2.putText(constraints_vis, "Red: Before constraints", (10, 30), cv2.putText(
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) constraints_vis,
cv2.putText(constraints_vis, "Green: After constraints", (10, 60), "Red: Before constraints",
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) (10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
0.7,
(0, 0, 255),
2,
)
cv2.putText(
constraints_vis,
"Green: After constraints",
(10, 60),
cv2.FONT_HERSHEY_SIMPLEX,
0.7,
(0, 255, 0),
2,
)
return constraints_vis return constraints_vis
def visualize_robot_params(frame, position, direction, normal, gripper_width, camera_intrinsics):
"""Visualize robot parameters (position, orientation, gripper width)""" def visualize_robot_params(
frame, position, orientation_matrix, gripper_width, camera_intrinsics
):
"""Visualize robot parameters (position, orientation matrix, gripper width)"""
robot_vis = frame.copy() robot_vis = frame.copy()
height, width = frame.shape[:2] height, width = frame.shape[:2]
focal_x, focal_y, center_x, center_y = camera_intrinsics focal_x, focal_y, center_x, center_y = camera_intrinsics
# Extract axes from orientation matrix
x_axis = orientation_matrix[:, 0] # First column (principal axis)
y_axis = orientation_matrix[:, 1] # Second column
z_axis = orientation_matrix[:, 2] # Third column (normal)
# Project position to image coordinates # Project position to image coordinates
x, y, z = position x, y, z = position
if z > 0: if z > 0:
@ -205,59 +321,113 @@ def visualize_robot_params(frame, position, direction, normal, gripper_width, ca
# Draw end effector position # Draw end effector position
cv2.circle(robot_vis, (u, v), 10, (255, 0, 0), -1) # Blue circle cv2.circle(robot_vis, (u, v), 10, (255, 0, 0), -1) # Blue circle
# Draw direction vector (X axis) # Draw X axis (principal axis)
dx, dy, dz = direction dx, dy, dz = x_axis
scale = 50 # Scale for better visualization scale = 50 # Scale for better visualization
end_u = int((x + dx * scale) * focal_x / (z + dz * scale) + center_x) end_u = int((x + dx * scale) * focal_x / (z + dz * scale) + center_x)
end_v = int((y + dy * scale) * focal_y / (z + dz * scale) + center_y) end_v = int((y + dy * scale) * focal_y / (z + dz * scale) + center_y)
if 0 <= end_u < width and 0 <= end_v < height: if 0 <= end_u < width and 0 <= end_v < height:
cv2.line(robot_vis, (u, v), (end_u, end_v), (0, 0, 255), 2) # Red line (X axis) cv2.line(
cv2.putText(robot_vis, "X", (end_u, end_v), robot_vis, (u, v), (end_u, end_v), (0, 0, 255), 2
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) ) # Red line (X axis)
cv2.putText(
robot_vis,
"X",
(end_u, end_v),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 0, 255),
2,
)
# Draw normal vector (Z axis) # Draw Z axis (normal)
nx, ny, nz = normal nx, ny, nz = z_axis
end_u = int((x + nx * scale) * focal_x / (z + nz * scale) + center_x) end_u = int((x + nx * scale) * focal_x / (z + nz * scale) + center_x)
end_v = int((y + ny * scale) * focal_y / (z + nz * scale) + center_y) end_v = int((y + ny * scale) * focal_y / (z + nz * scale) + center_y)
if 0 <= end_u < width and 0 <= end_v < height: if 0 <= end_u < width and 0 <= end_v < height:
cv2.line(robot_vis, (u, v), (end_u, end_v), (0, 255, 0), 2) # Green line (Z axis) cv2.line(
cv2.putText(robot_vis, "Z", (end_u, end_v), robot_vis, (u, v), (end_u, end_v), (0, 255, 0), 2
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) ) # Green line (Z axis)
cv2.putText(
robot_vis,
"Z",
(end_u, end_v),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 255, 0),
2,
)
# Calculate Y axis (cross product of Z and X for right-hand coordinate system) # Draw Y axis
y_axis = np.cross(normal, direction)
y_axis = y_axis / np.linalg.norm(y_axis)
yx, yy, yz = y_axis yx, yy, yz = y_axis
end_u = int((x + yx * scale) * focal_x / (z + yz * scale) + center_x) end_u = int((x + yx * scale) * focal_x / (z + yz * scale) + center_x)
end_v = int((y + yy * scale) * focal_y / (z + yz * scale) + center_y) end_v = int((y + yy * scale) * focal_y / (z + yz * scale) + center_y)
if 0 <= end_u < width and 0 <= end_v < height: if 0 <= end_u < width and 0 <= end_v < height:
cv2.line(robot_vis, (u, v), (end_u, end_v), (255, 0, 0), 2) # Blue line (Y axis) cv2.line(
cv2.putText(robot_vis, "Y", (end_u, end_v), robot_vis, (u, v), (end_u, end_v), (255, 0, 0), 2
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2) ) # Blue line (Y axis)
cv2.putText(
robot_vis,
"Y",
(end_u, end_v),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(255, 0, 0),
2,
)
# Visualize gripper width # Visualize gripper width
gripper_radius = int(gripper_width * 100) # Scale for better visualization gripper_radius = int(gripper_width * 100) # Scale for better visualization
cv2.circle(robot_vis, (u, v), gripper_radius, (0, 255, 255), 2) # Yellow circle cv2.circle(
robot_vis, (u, v), gripper_radius, (0, 255, 255), 2
) # Yellow circle
# Add parameter values as text # Add parameter values as text
y_offset = height - 160 y_offset = height - 160
cv2.putText(robot_vis, f"Position: ({x:.2f}, {y:.2f}, {z:.2f})", (10, y_offset), cv2.putText(
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) robot_vis,
f"Position: ({x:.2f}, {y:.2f}, {z:.2f})",
(10, y_offset),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(255, 255, 255),
1,
)
y_offset += 30 y_offset += 30
cv2.putText(robot_vis, f"Direction: ({dx:.2f}, {dy:.2f}, {dz:.2f})", (10, y_offset), cv2.putText(
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1) robot_vis,
f"X axis: ({x_axis[0]:.2f}, {x_axis[1]:.2f}, {x_axis[2]:.2f})",
(10, y_offset),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 0, 255),
1,
)
y_offset += 30 y_offset += 30
cv2.putText(robot_vis, f"Normal: ({nx:.2f}, {ny:.2f}, {nz:.2f})", (10, y_offset), cv2.putText(
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) robot_vis,
f"Z axis: ({z_axis[0]:.2f}, {z_axis[1]:.2f}, {z_axis[2]:.2f})",
(10, y_offset),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 255, 0),
1,
)
y_offset += 30 y_offset += 30
cv2.putText(robot_vis, f"Gripper Width: {gripper_width:.2f}", (10, y_offset), cv2.putText(
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1) robot_vis,
f"Gripper Width: {gripper_width:.2f}",
(10, y_offset),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 255, 255),
1,
)
return robot_vis return robot_vis