improve icp and robot params

This commit is contained in:
Ethan Clark 2025-03-21 17:05:51 -07:00
parent aae4e91f54
commit b38527b5db
6 changed files with 690 additions and 417 deletions

View File

@ -1,15 +1,17 @@
import os
import cv2
import time
import torch
import cv2
import depth_pro
import mediapipe as mp
import numpy as np
import open3d as o3d
import mediapipe as mp
import torch
from PIL import Image
from robot_manager import RobotManager
from sam2.sam2_image_predictor import SAM2ImagePredictor
from tqdm import tqdm
from utils.visualizations import *
from robot_manager import RobotManager
# from diffusers import StableDiffusionInpaintPipeline
class HandProcessor:
@ -37,39 +39,38 @@ class HandProcessor:
self.midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
self.transform = self.midas_transforms.dpt_transform
# Segment hand from image using MediaPipe Hands landmarks
def _create_mask(self, image: np.ndarray, landmarks: list, thickness: int=5, padding:int=10) -> np.ndarray:
height, width = image.shape[:2]
mask = np.zeros((height, width), dtype=np.uint8)
# Load SAM2 model for hand segmentation
print("Loading SAM2 model...")
self.sam2_predictor = SAM2ImagePredictor.from_pretrained(
"facebook/sam2-hiera-large"
)
points = [(int(landmark.x * width), int(landmark.y * height)) for landmark in landmarks.landmark]
def _create_mask(self, frame: np.ndarray, landmarks: list) -> np.ndarray:
height, width = frame.shape[:2]
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Draw filled circles at each landmark
for point in points:
cv2.circle(mask, point, thickness, 255, -1)
# Set image in SAM2 predictor
with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
self.sam2_predictor.set_image(frame_rgb) # Set image for prediction
# Connect all landmarks with thick lines
for connection in self.mp_hands.HAND_CONNECTIONS:
start_idx, end_idx = connection
cv2.line(mask, points[start_idx], points[end_idx], 255, thickness)
# Convert landmarks to point prompts
points = []
for landmark in landmarks.landmark:
x, y = int(landmark.x * width), int(landmark.y * height)
points.append([x, y])
# Create palm by connecting base of fingers with wrist
palm_points = [points[0], points[1], points[5], points[9], points[13], points[17]]
cv2.fillPoly(mask, [np.array(palm_points)], 255)
input_points = np.array(points)
# Create shape between fingers
finger_bases = [(1,5), (5,9), (9,13), (13,17)]
for base1, base2 in finger_bases:
triangle = np.array([points[0], points[base1], points[base2]])
cv2.fillPoly(mask, [triangle], 255)
# Predict mask with point prompts
masks, _, _ = self.sam2_predictor.predict(
point_coords=input_points, # Pass the points as prompts
point_labels=np.ones(
len(input_points)
), # All points from hand are foreground
multimask_output=False, # Just get one mask
)
# Dilate to smooth and expand slightly
kernel = np.ones((padding, padding), np.uint8)
mask = cv2.dilate(mask, kernel, iterations=1)
# Smooth the mask
mask = cv2.GaussianBlur(mask, (21, 21), 0)
_, mask = cv2.threshold(mask, 50, 255, cv2.THRESH_BINARY)
mask = masks[0].astype(np.uint8) * 255
return mask
@ -77,7 +78,8 @@ class HandProcessor:
Transform input image to match MiDaS model requirements
Estimate depth map using MiDaS model
"""
# TODO: Look into why screen goes black sometimes
# TODO: Swap MiDaS for ml-depth-pro model
def _estimate_depth(self, image: np.ndarray) -> tuple:
img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
@ -116,7 +118,6 @@ class HandProcessor:
v_coords = v_coords[valid_indices]
z_values = z_values[valid_indices]
# NOTE: Abritrary scaling factor for depth
z_metric = z_values * 0.5
# Back-project to 3D using camera intrinsics
@ -127,32 +128,38 @@ class HandProcessor:
return points
# TODO: Look into better Z scaling
# TODO: Look into better depth scaling
def _create_mesh(self, landmarks: list, image_dims: tuple) -> np.ndarray:
width, height = image_dims
vertices = []
for landmark in landmarks.landmark:
vertices.append([
landmark.x * width,
landmark.y * height,
landmark.z * width
])
vertices.append(
[landmark.x * width, landmark.y * height, landmark.z * width]
)
# Define faces (triangles) connecting landmarks
faces = [
# Palm
[0, 1, 5], [0, 5, 9], [0, 9, 13], [0, 13, 17],
[0, 1, 5],
[0, 5, 9],
[0, 9, 13],
[0, 13, 17],
# Thumb
[1, 2, 3], [2, 3, 4],
[1, 2, 3],
[2, 3, 4],
# Index finger
[5, 6, 7], [6, 7, 8],
[5, 6, 7],
[6, 7, 8],
# Middle finger
[9, 10, 11], [10, 11, 12],
[9, 10, 11],
[10, 11, 12],
# Ring finger
[13, 14, 15], [14, 15, 16],
[13, 14, 15],
[14, 15, 16],
# Pinky
[17, 18, 19], [18, 19, 20]
[17, 18, 19],
[18, 19, 20],
]
dense_vertices = list(vertices)
@ -166,11 +173,12 @@ class HandProcessor:
# Add 2 interpolated points between each connected pair
for t in [0.33, 0.66]:
interp_point = start_point * (1-t) + end_point * t
interp_point = start_point * (1 - t) + end_point * t
dense_vertices.append(interp_point.tolist())
return np.array(dense_vertices), np.array(faces)
# TODO: Improve ICP registration for better alignment
def _icp_registration(self, cloud: np.ndarray, vertices: np.ndarray) -> np.ndarray:
source = o3d.geometry.PointCloud()
source.points = o3d.utility.Vector3dVector(vertices)
@ -178,28 +186,41 @@ class HandProcessor:
target = o3d.geometry.PointCloud()
target.points = o3d.utility.Vector3dVector(cloud)
# Calculate centroids
source_centroid = np.mean(vertices, axis=0)
target_centroid = np.mean(cloud, axis=0)
# Calculate initial translation to align centroids
initial_translation = target_centroid - source_centroid
# Create initial transformation matrix
trans_init = np.eye(4)
trans_init[:3, 3] = initial_translation
result = o3d.pipelines.registration.registration_icp(
source,
target,
max_correspondence_distance=0.05,
max_correspondence_distance=0.1, # Increased distance
init=trans_init, # Initial transformation
estimation_method=o3d.pipelines.registration.TransformationEstimationPointToPoint(),
criteria=o3d.pipelines.registration.ICPConvergenceCriteria(
max_iteration=100
), # Increased iterations
)
transformation = result.transformation
return transformation
# TODO: Look into better depth scaling
def _refine_landmarks(self, landmarks: list, transform: int, image_dims: tuple):
width, height = image_dims
refined_landmarks = []
for landmark in landmarks.landmark:
point = np.array([
landmark.x * width,
landmark.y * height,
landmark.z * width, # TODO: Figure out better scaling factor
1.0
])
point = np.array(
[landmark.x * width, landmark.y * height, landmark.z * width, 1.0]
)
# Apply transformation to 3D point
transformed = np.dot(transform, point)
@ -244,42 +265,87 @@ class HandProcessor:
return constrained
def _get_robot_params(self, refined_landmarks) -> tuple:
# Extract key landmarks
wrist = refined_landmarks[0] # Wrist landmark
thumb_tip = refined_landmarks[4] # Thumb tip
index_tip = refined_landmarks[8] # Index finger tip
def _get_robot_params(self, refined_landmarks):
# Extract keypoints
landmarks = np.array(refined_landmarks)
# Calculate end effector position (midpoint between thumb and index tips)
# Define indices for specific parts of the hand
thumb_indices = [1, 2, 3, 4] # Thumb landmarks
index_indices = [5, 6, 7, 8] # Index finger landmarks
thumb_tip_idx = 4
index_tip_idx = 8
# 1. Set target position as midpoint between thumb tip and index tip
thumb_tip = landmarks[thumb_tip_idx]
index_tip = landmarks[index_tip_idx]
position = (thumb_tip + index_tip) / 2
# Calculate vectors for orientation
v1 = thumb_tip - wrist # Vector from wrist to thumb tip
v2 = index_tip - wrist # Vector from wrist to index tip
# 2. Calculate orientation
# Get all thumb and index finger points for plane fitting
thumb_points = landmarks[thumb_indices]
index_points = landmarks[index_indices]
finger_points = np.vstack([thumb_points, index_points])
# Calculate normal to hand plane
normal = np.cross(v1, v2)
if np.linalg.norm(normal) > 0:
normal = normal / np.linalg.norm(normal)
else:
# Default if vectors are collinear
normal = np.array([0, 0, 1])
# Fit plane to finger points
centroid = np.mean(finger_points, axis=0)
centered_points = finger_points - centroid
# Calculate main direction along thumb
direction = thumb_tip - wrist
if np.linalg.norm(direction) > 0:
direction = direction / np.linalg.norm(direction)
else:
# Default if thumb is at wrist (unlikely)
direction = np.array([1, 0, 0])
# Use SVD to find the normal to the best-fitting plane
u, s, vh = np.linalg.svd(centered_points)
# The normal is the last right singular vector
normal = vh[2, :]
# Calculate gripper width
# Ensure normal is a unit vector
normal = normal / np.linalg.norm(normal)
# Fit a principal axis through thumb points
thumb_centroid = np.mean(thumb_points, axis=0)
thumb_centered = thumb_points - thumb_centroid
# Use SVD again to find direction of maximum variance (principal axis)
u, s, vh = np.linalg.svd(thumb_centered)
principal_axis = vh[0, :] # First singular vector
# Ensure principal axis is a unit vector
principal_axis = principal_axis / np.linalg.norm(principal_axis)
# Construct orthogonal vectors for orientation matrix
z_axis = normal # Normal to the plane
x_axis = principal_axis # Along the thumb
# Ensure x_axis is orthogonal to z_axis
x_axis = x_axis - np.dot(x_axis, z_axis) * z_axis
x_axis = x_axis / np.linalg.norm(x_axis)
# Compute y_axis as cross product to ensure orthogonality
y_axis = np.cross(z_axis, x_axis)
# Create rotation matrix
rotation_matrix = np.column_stack([x_axis, y_axis, z_axis])
# 3. Calculate gripper width
gripper_width = np.linalg.norm(thumb_tip - index_tip)
return position, direction, normal, gripper_width
# Apply percentile threshold to mitigate slippage (as mentioned in the paper)
# Store gripper widths in a buffer to compute percentiles
if not hasattr(self, "gripper_width_buffer"):
self.gripper_width_buffer = []
self.gripper_width_buffer.append(gripper_width)
# Keep only the last 100 values
if len(self.gripper_width_buffer) > 100:
self.gripper_width_buffer.pop(0)
# Apply 20th percentile threshold
if len(self.gripper_width_buffer) > 5: # Need enough samples
min_width = np.percentile(self.gripper_width_buffer, 20)
if gripper_width < min_width:
gripper_width = min_width
return position, rotation_matrix, gripper_width
def record_video(self) -> str:
output_dir = os.path.join(os.getcwd(), 'recordings')
output_dir = os.path.join(os.getcwd(), "recordings")
os.makedirs(output_dir, exist_ok=True)
timestamp = time.strftime("%Y%m%d-%H%M%S")
@ -296,7 +362,7 @@ class HandProcessor:
# Update camera intrinsics based on video dimensions
self.camera_intrinsics = (width * 0.8, height * 0.8, width / 2, height / 2)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = None
recording = False
@ -317,11 +383,19 @@ class HandProcessor:
# Display recording indicator ONLY on the display frame
if recording:
cv2.circle(display_frame, (30, 30), 15, (0, 0, 255), -1)
cv2.putText(display_frame, "RECORDING", (50, 40), cv2.FONT_HERSHEY_SIMPLEX,
1, (0, 0, 255), 2, cv2.LINE_AA)
cv2.putText(
display_frame,
"RECORDING",
(50, 40),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(0, 0, 255),
2,
cv2.LINE_AA,
)
# Show the display frame (with indicator if recording)
cv2.imshow('Video Recording', display_frame)
cv2.imshow("Video Recording", display_frame)
# Write the original frame (without indicator) to video file if recording
if recording and out is not None:
@ -329,10 +403,7 @@ class HandProcessor:
key = cv2.waitKey(1) & 0xFF
# NOTE: Haven't tested what happens if user records multiple videos in one session (probably overwrites?)
if key == ord('q'):
break
elif key == ord('r'):
if key == ord("r"):
recording = not recording
if recording:
print(f"Started recording to {video_filename}")
@ -342,6 +413,7 @@ class HandProcessor:
out.release()
print(f"Stopped recording. Video saved to {video_filename}")
did_record = True
break
if out is not None:
out.release()
@ -373,12 +445,18 @@ class HandProcessor:
constraints_output_path = f"{base_path}_constraints.mp4"
robot_output_path = f"{base_path}_robot.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
segmented_out = cv2.VideoWriter(segmented_output_path, fourcc, fps, (width, height))
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
segmented_out = cv2.VideoWriter(
segmented_output_path, fourcc, fps, (width, height)
)
depth_out = cv2.VideoWriter(depth_output_path, fourcc, fps, (width, height))
mesh_out = cv2.VideoWriter(mesh_output_path, fourcc, fps, (width, height))
reg_out = cv2.VideoWriter(registration_output_path, fourcc, fps, (640, 480)) # Fixed size
constraints_out = cv2.VideoWriter(constraints_output_path, fourcc, fps, (width, height))
reg_out = cv2.VideoWriter(
registration_output_path, fourcc, fps, (640, 480)
) # Fixed size
constraints_out = cv2.VideoWriter(
constraints_output_path, fourcc, fps, (width, height)
)
robot_out = cv2.VideoWriter(robot_output_path, fourcc, fps, (width, height))
print(f"Processing video with {total_frames} frames...")
@ -396,48 +474,60 @@ class HandProcessor:
segmented_frame = frame.copy()
depth_frame = np.zeros((height, width, 3), dtype=np.uint8)
mesh_frame = frame.copy()
reg_frame = np.ones((480, 640, 3), dtype=np.uint8) * 255 # Fixed size, white background
reg_frame = (
np.ones((480, 640, 3), dtype=np.uint8) * 255
) # Fixed size, white background
constraints_frame = frame.copy()
robot_frame = frame.copy()
# Process if hand is detected
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
# Segment hand
hand_mask = self._create_mask(frame, hand_landmarks)
mask_overlay = frame.copy()
mask_overlay[hand_mask > 0] = [0, 0, 255] # Red color for mask
segmented_frame = cv2.addWeighted(frame, 0.7, mask_overlay, 0.3, 0)
# Depth estimation
depth_map, depth_vis = self._estimate_depth(frame)
depth_colored = cv2.applyColorMap(depth_vis, cv2.COLORMAP_JET)
depth_frame = depth_colored.copy()
# Create hand mesh
hand_vertices, hand_faces = self._create_mesh(hand_landmarks, (width, height))
mesh_frame = visualize_mesh(frame, hand_vertices, hand_faces)
# Create point cloud from depth
cloud = self._create_cloud(depth_map, hand_mask)
# Perform ICP registration
hand_vertices, hand_faces = self._create_mesh(
hand_landmarks, (width, height)
)
mesh_frame = visualize_mesh(frame, hand_vertices, hand_faces)
icp_transform = self._icp_registration(cloud, hand_vertices)
reg_frame = visualize_registration(cloud, hand_vertices, icp_transform)
reg_frame = visualize_registration(
cloud, hand_vertices, icp_transform
)
# Refine landmarks using ICP transformation
refined_landmarks = self._refine_landmarks(hand_landmarks, icp_transform, (width, height))
refined_landmarks = self._refine_landmarks(
hand_landmarks, icp_transform, (width, height)
)
# Store pre-constraint landmarks for visualization
original_refined = refined_landmarks.copy()
# Apply anatomical constraints
constrained_landmarks = self._apply_constraints(refined_landmarks)
constraints_frame = visualize_constraints(frame, original_refined, constrained_landmarks, self.camera_intrinsics)
constraints_frame = visualize_constraints(
frame,
original_refined,
constrained_landmarks,
self.camera_intrinsics,
)
# # Calculate robot parameters
# position, direction, normal, gripper_width = self._get_robot_params(constrained_landmarks)
# robot_frame = visualize_robot_params(frame, position, direction, normal, gripper_width)
position, orientation, gripper_width = self._get_robot_params(
constrained_landmarks
)
robot_frame = visualize_robot_params(
frame,
position,
orientation,
gripper_width,
self.camera_intrinsics,
)
segmented_out.write(segmented_frame)
depth_out.write(depth_frame)
@ -450,12 +540,12 @@ class HandProcessor:
display_size = (int(width * display_scale), int(height * display_scale))
reg_display_size = (int(640 * display_scale), int(480 * display_scale))
cv2.imshow('Segmented', cv2.resize(segmented_frame, display_size))
cv2.imshow('Depth', cv2.resize(depth_frame, display_size))
cv2.imshow('Mesh', cv2.resize(mesh_frame, display_size))
cv2.imshow('Registration', cv2.resize(reg_frame, reg_display_size))
cv2.imshow('Constraints', cv2.resize(constraints_frame, display_size))
cv2.imshow('Robot Parameters', cv2.resize(robot_frame, display_size))
cv2.imshow("Segmented", cv2.resize(segmented_frame, display_size))
cv2.imshow("Depth", cv2.resize(depth_frame, display_size))
cv2.imshow("Mesh", cv2.resize(mesh_frame, display_size))
cv2.imshow("Registration", cv2.resize(reg_frame, reg_display_size))
cv2.imshow("Constraints", cv2.resize(constraints_frame, display_size))
cv2.imshow("Robot Parameters", cv2.resize(robot_frame, display_size))
if cv2.waitKey(1) & 0xFF == 27: # ESC to exit
break
@ -483,5 +573,5 @@ class HandProcessor:
"mesh": mesh_output_path,
"registration": registration_output_path,
"constraints": constraints_output_path,
"robot": robot_output_path
"robot": robot_output_path,
}

View File

@ -8,7 +8,7 @@ def main():
record_option = input("Record a new video? (y/n): ")
if record_option.lower() == 'y':
if record_option.lower() == "y":
print("Starting video recording...")
video_path = processor.record_video()
if not video_path:

View File

@ -1,11 +1,9 @@
import os
import cv2
import numpy as np
import pybullet as p
from utils.math import *
from utils.handle_urdf import handle_urdf
from scipy.spatial.transform import Rotation as R
class RobotManager:
@ -30,13 +28,13 @@ class RobotManager:
robot_urdf,
basePosition=[0, 0, 0],
useFixedBase=True,
flags=p.URDF_USE_SELF_COLLISION | p.URDF_USE_INERTIA_FROM_FILE
flags=p.URDF_USE_SELF_COLLISION | p.URDF_USE_INERTIA_FROM_FILE,
)
except p.error as e:
print(f"PyBullet error when loading URDF: {e}")
raise e
robot_name = p.getBodyInfo(robot_id)[1].decode('utf-8')
robot_name = p.getBodyInfo(robot_id)[1].decode("utf-8")
print(f"Successfully loaded {robot_name} robot with ID: {robot_id}")
return robot_id
@ -46,11 +44,11 @@ class RobotManager:
assert self.joint_count > 0, "Robot has no joints"
# Keywords to look for in joint names to identify end effector
keywords = ['gripper', 'tool', 'tcp', 'end_effector', 'hand']
keywords = ["gripper", "tool", "tcp", "end_effector", "hand"]
for i in range(self.joint_count):
info = p.getJointInfo(self.robot_id, i)
joint_name = info[1].decode('utf-8').lower()
joint_name = info[1].decode("utf-8").lower()
# Check if any keyword is in the joint name
if any(keyword in joint_name for keyword in keywords):
@ -60,7 +58,12 @@ class RobotManager:
return self.joint_count - 1
# TODO: Use inverse kinematics to set the robot pose
def set_robot_pose(self, position: np.ndarray, orientation_vectors: np.ndarray, gripper_width: float) -> None:
def set_robot_pose(
self,
position: np.ndarray,
orientation_vectors: np.ndarray,
gripper_width: float,
) -> None:
pass
# Render the robot in some scene using some camera parameters
@ -85,16 +88,13 @@ class RobotManager:
yaw=cam_yaw,
pitch=cam_pitch,
roll=cam_roll,
upAxisIndex=2
upAxisIndex=2,
)
# Compute projection matrix
aspect = self.img_width / self.img_height
proj_matrix = p.computeProjectionMatrixFOV(
fov=60,
aspect=aspect,
nearVal=0.01,
farVal=100.0
fov=60, aspect=aspect, nearVal=0.01, farVal=100.0
)
# Render the scene
@ -103,7 +103,7 @@ class RobotManager:
height=self.img_height,
viewMatrix=view_matrix,
projectionMatrix=proj_matrix,
renderer=p.ER_BULLET_HARDWARE_OPENGL
renderer=p.ER_BULLET_HARDWARE_OPENGL,
)
# Extract RGB and depth
@ -131,18 +131,20 @@ class RobotManager:
return rgb.astype(np.uint8)
def __del__(self) -> None:
if hasattr(self, 'physics_client'):
if hasattr(self, "physics_client"):
try:
p.disconnect(self.physics_client)
except:
pass
if __name__ == "__main__":
cwd = os.getcwd()
urdf_path = os.path.join(cwd, "notebook/phantom/urdf/SO_5DOF_ARM100_05d.SLDASM/urdf/SO_5DOF_ARM100_05d.SLDASM.urdf")
camera_intrinsics = (320, 240, 320, 240) # Random intrinsics for example
urdf_path = os.path.join(
cwd,
"notebook/phantom/urdf/SO_5DOF_ARM100_05d.SLDASM/urdf/SO_5DOF_ARM100_05d.SLDASM.urdf",
)
camera_intrinsics = (320, 240, 320, 240) # Random intrinsics for example
robot_vis = RobotManager(urdf_path, camera_intrinsics)
rendered_image = robot_vis.render_robot()

View File

@ -0,0 +1 @@
# TODO: Implement camera calibration functions to extract camera extrinsics and intrinsics

View File

@ -2,7 +2,6 @@ import os
import xml.dom.minidom as minidom
import xml.etree.ElementTree as ET
"""
Fixes a specific path that uses the ROS-style 'package://' prefix.
The 'package://' prefix is used in URDF files to refer to files in the same package.
@ -10,18 +9,20 @@ However, when we're not running in a ROS environment, the paths are not valid.
This function tries to find the absolute path of the mesh file.
If the mesh file is not found, the original path is used.
"""
def fix_path(path: str, urdf_dir: str) -> str:
if path.startswith('package://'):
parts = path[len('package://'):].split('/', 1)
if path.startswith("package://"):
parts = path[len("package://") :].split("/", 1)
if len(parts) == 2:
package_name, rel_path = parts
# Try potential locations for the mesh
potential_paths = [
os.path.join(urdf_dir, rel_path),
os.path.join(urdf_dir, '../meshes', rel_path),
os.path.join(urdf_dir, f'../{package_name}', rel_path),
os.path.join(urdf_dir, '../..', rel_path)
os.path.join(urdf_dir, "../meshes", rel_path),
os.path.join(urdf_dir, f"../{package_name}", rel_path),
os.path.join(urdf_dir, "../..", rel_path),
]
for possible_path in potential_paths:
@ -37,18 +38,20 @@ def fix_path(path: str, urdf_dir: str) -> str:
Iterates through the URDF file and fixes the paths of all mesh files.
The URDF file is parsed and the mesh paths are modified in-place.
"""
def fix_mesh_paths(urdf_path:str, urdf_dir: str) -> str:
def fix_mesh_paths(urdf_path: str, urdf_dir: str) -> str:
root = ET.parse(urdf_path).getroot()
try:
for mesh in root.findall('.//mesh'):
if 'filename' in mesh.attrib:
mesh.attrib['filename'] = fix_path(mesh.attrib['filename'], urdf_dir)
for mesh in root.findall(".//mesh"):
if "filename" in mesh.attrib:
mesh.attrib["filename"] = fix_path(mesh.attrib["filename"], urdf_dir)
except Exception as e:
print(f"Error fixing mesh paths: {e}")
raise e
fixed_path = urdf_path.replace('.urdf', '_fixed.urdf')
fixed_path = urdf_path.replace(".urdf", "_fixed.urdf")
return root, fixed_path
@ -56,15 +59,19 @@ def fix_mesh_paths(urdf_path:str, urdf_dir: str) -> str:
"""
Formats the XML tree to be human-readable and writes it to a file.
"""
def format_xml(root: ET.Element, fixed_path: str) -> None:
xml_str = ET.tostring(root, encoding='utf-8')
xml_str = ET.tostring(root, encoding="utf-8")
dom = minidom.parseString(xml_str)
with open(fixed_path, 'w', encoding='utf-8') as f:
with open(fixed_path, "w", encoding="utf-8") as f:
# Write with nice indentation but remove extra whitespace
pretty_xml = dom.toprettyxml(indent=' ')
pretty_xml = dom.toprettyxml(indent=" ")
# Remove extra blank lines that minidom sometimes adds
pretty_xml = '\n'.join([line for line in pretty_xml.split('\n') if line.strip()])
pretty_xml = "\n".join(
[line for line in pretty_xml.split("\n") if line.strip()]
)
f.write(pretty_xml)
@ -87,8 +94,11 @@ def handle_urdf(urdf_path: str) -> str:
raise e
if __name__ == '__main__':
if __name__ == "__main__":
# Example usage
cwd = os.getcwd()
urdf_path = os.path.join(cwd, "notebook/phantom/urdf/SO_5DOF_ARM100_05d.SLDASM/urdf/SO_5DOF_ARM100_05d.SLDASM.urdf")
urdf_path = os.path.join(
cwd,
"open_phantom/urdf/SO_5DOF_ARM100_05d.SLDASM/urdf/SO_5DOF_ARM100_05d.SLDASM.urdf",
)
handle_urdf(urdf_path)

View File

@ -11,13 +11,17 @@ def visualize_mesh(frame, hand_vertices, faces):
for i in range(21):
pt = tuple(map(int, hand_vertices[i][:2]))
if 0 <= pt[0] < width and 0 <= pt[1] < height:
cv2.circle(mesh_vis, pt, 4, (255, 0, 0), -1) # Red circles for base vertices
cv2.circle(
mesh_vis, pt, 4, (255, 0, 0), -1
) # Red circles for base vertices
# Draw the interpolated vertices
for i in range(21, len(hand_vertices)):
pt = tuple(map(int, hand_vertices[i][:2]))
if 0 <= pt[0] < width and 0 <= pt[1] < height:
cv2.circle(mesh_vis, pt, 2, (0, 255, 255), -1) # Yellow circles for interpolated vertices
cv2.circle(
mesh_vis, pt, 2, (0, 255, 255), -1
) # Yellow circles for interpolated vertices
# Draw the faces of the mesh
for face in faces:
@ -26,25 +30,53 @@ def visualize_mesh(frame, hand_vertices, faces):
pt2 = tuple(map(int, hand_vertices[face[1]][:2]))
pt3 = tuple(map(int, hand_vertices[face[2]][:2]))
if (0 <= pt1[0] < width and 0 <= pt1[1] < height and
0 <= pt2[0] < width and 0 <= pt2[1] < height and
0 <= pt3[0] < width and 0 <= pt3[1] < height):
cv2.line(mesh_vis, pt1, pt2, (0, 255, 0), 1) # Green lines for mesh edges
if (
0 <= pt1[0] < width
and 0 <= pt1[1] < height
and 0 <= pt2[0] < width
and 0 <= pt2[1] < height
and 0 <= pt3[0] < width
and 0 <= pt3[1] < height
):
cv2.line(
mesh_vis, pt1, pt2, (0, 255, 0), 1
) # Green lines for mesh edges
cv2.line(mesh_vis, pt2, pt3, (0, 255, 0), 1)
cv2.line(mesh_vis, pt3, pt1, (0, 255, 0), 1)
# Add explanation text
cv2.putText(mesh_vis, "Red: Base vertices", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
cv2.putText(mesh_vis, "Yellow: Interpolated vertices", (10, 60),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
cv2.putText(mesh_vis, "Green: Mesh edges", (10, 90),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
cv2.putText(
mesh_vis,
"Red: Base vertices",
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
0.6,
(255, 0, 0),
2,
)
cv2.putText(
mesh_vis,
"Yellow: Interpolated vertices",
(10, 60),
cv2.FONT_HERSHEY_SIMPLEX,
0.6,
(0, 255, 255),
2,
)
cv2.putText(
mesh_vis,
"Green: Mesh edges",
(10, 90),
cv2.FONT_HERSHEY_SIMPLEX,
0.6,
(0, 255, 0),
2,
)
return mesh_vis
def visualize_registration(cloud, vertices, icp_transform):
"""Visualize the ICP registration process"""
height, width = 480, 640 # Fixed size visualization
reg_vis = np.ones((height, width, 3), dtype=np.uint8) * 255 # White background
@ -90,25 +122,63 @@ def visualize_registration(cloud, vertices, icp_transform):
cv2.circle(reg_vis, pt, 2, (0, 255, 0), -1) # Green dots
# Add transformation matrix display
cv2.putText(reg_vis, "ICP Transformation Matrix:", (10, height - 120),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
cv2.putText(
reg_vis,
"ICP Transformation Matrix:",
(10, height - 120),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 0, 0),
1,
)
for i in range(4):
matrix_text = f"[{icp_transform[i][0]:.2f}, {icp_transform[i][1]:.2f}, {icp_transform[i][2]:.2f}, {icp_transform[i][3]:.2f}]"
cv2.putText(reg_vis, matrix_text, (10, height - 90 + i * 20),
cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 0), 1)
cv2.putText(
reg_vis,
matrix_text,
(10, height - 90 + i * 20),
cv2.FONT_HERSHEY_SIMPLEX,
0.4,
(0, 0, 0),
1,
)
# Add legend
cv2.putText(reg_vis, "Red: Point Cloud", (width - 200, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
cv2.putText(reg_vis, "Blue: Original Mesh", (width - 200, 60),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
cv2.putText(reg_vis, "Green: Transformed Mesh", (width - 200, 90),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
cv2.putText(
reg_vis,
"Red: Point Cloud",
(width - 200, 30),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 0, 255),
1,
)
cv2.putText(
reg_vis,
"Blue: Original Mesh",
(width - 200, 60),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(255, 0, 0),
1,
)
cv2.putText(
reg_vis,
"Green: Transformed Mesh",
(width - 200, 90),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 255, 0),
1,
)
return reg_vis
def visualize_constraints(frame, refined_landmarks, constrained_landmarks, camera_intrinsics):
def visualize_constraints(
frame, refined_landmarks, constrained_landmarks, camera_intrinsics
):
"""Visualize before and after applying anatomical constraints"""
constraints_vis = frame.copy()
height, width = frame.shape[:2]
@ -129,7 +199,9 @@ def visualize_constraints(frame, refined_landmarks, constrained_landmarks, camer
if 0 <= u < width and 0 <= v < height:
if i in highlighted_indices:
# Draw larger circles for thumb and index fingers (the constrained ones)
cv2.circle(constraints_vis, (u, v), 5, (0, 0, 255), -1) # Red circles
cv2.circle(
constraints_vis, (u, v), 5, (0, 0, 255), -1
) # Red circles
else:
cv2.circle(constraints_vis, (u, v), 3, (0, 0, 255), -1)
@ -146,9 +218,19 @@ def visualize_constraints(frame, refined_landmarks, constrained_landmarks, camer
end_u = int(end[0] * focal_x / end[2] + center_x)
end_v = int(end[1] * focal_y / end[2] + center_y)
if (0 <= start_u < width and 0 <= start_v < height and
0 <= end_u < width and 0 <= end_v < height):
cv2.line(constraints_vis, (start_u, start_v), (end_u, end_v), (0, 0, 255), 2)
if (
0 <= start_u < width
and 0 <= start_v < height
and 0 <= end_u < width
and 0 <= end_v < height
):
cv2.line(
constraints_vis,
(start_u, start_v),
(end_u, end_v),
(0, 0, 255),
2,
)
# Draw constrained landmarks
for i, landmark in enumerate(constrained_landmarks):
@ -160,7 +242,9 @@ def visualize_constraints(frame, refined_landmarks, constrained_landmarks, camer
if 0 <= u < width and 0 <= v < height:
if i in highlighted_indices:
# Draw larger circles for thumb and index fingers
cv2.circle(constraints_vis, (u, v), 5, (0, 255, 0), -1) # Green circles
cv2.circle(
constraints_vis, (u, v), 5, (0, 255, 0), -1
) # Green circles
else:
cv2.circle(constraints_vis, (u, v), 3, (0, 255, 0), -1)
@ -177,24 +261,56 @@ def visualize_constraints(frame, refined_landmarks, constrained_landmarks, camer
end_u = int(end[0] * focal_x / end[2] + center_x)
end_v = int(end[1] * focal_y / end[2] + center_y)
if (0 <= start_u < width and 0 <= start_v < height and
0 <= end_u < width and 0 <= end_v < height):
cv2.line(constraints_vis, (start_u, start_v), (end_u, end_v), (0, 255, 0), 2)
if (
0 <= start_u < width
and 0 <= start_v < height
and 0 <= end_u < width
and 0 <= end_v < height
):
cv2.line(
constraints_vis,
(start_u, start_v),
(end_u, end_v),
(0, 255, 0),
2,
)
# Add legend
cv2.putText(constraints_vis, "Red: Before constraints", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
cv2.putText(constraints_vis, "Green: After constraints", (10, 60),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
cv2.putText(
constraints_vis,
"Red: Before constraints",
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
0.7,
(0, 0, 255),
2,
)
cv2.putText(
constraints_vis,
"Green: After constraints",
(10, 60),
cv2.FONT_HERSHEY_SIMPLEX,
0.7,
(0, 255, 0),
2,
)
return constraints_vis
def visualize_robot_params(frame, position, direction, normal, gripper_width, camera_intrinsics):
"""Visualize robot parameters (position, orientation, gripper width)"""
def visualize_robot_params(
frame, position, orientation_matrix, gripper_width, camera_intrinsics
):
"""Visualize robot parameters (position, orientation matrix, gripper width)"""
robot_vis = frame.copy()
height, width = frame.shape[:2]
focal_x, focal_y, center_x, center_y = camera_intrinsics
# Extract axes from orientation matrix
x_axis = orientation_matrix[:, 0] # First column (principal axis)
y_axis = orientation_matrix[:, 1] # Second column
z_axis = orientation_matrix[:, 2] # Third column (normal)
# Project position to image coordinates
x, y, z = position
if z > 0:
@ -205,59 +321,113 @@ def visualize_robot_params(frame, position, direction, normal, gripper_width, ca
# Draw end effector position
cv2.circle(robot_vis, (u, v), 10, (255, 0, 0), -1) # Blue circle
# Draw direction vector (X axis)
dx, dy, dz = direction
# Draw X axis (principal axis)
dx, dy, dz = x_axis
scale = 50 # Scale for better visualization
end_u = int((x + dx * scale) * focal_x / (z + dz * scale) + center_x)
end_v = int((y + dy * scale) * focal_y / (z + dz * scale) + center_y)
if 0 <= end_u < width and 0 <= end_v < height:
cv2.line(robot_vis, (u, v), (end_u, end_v), (0, 0, 255), 2) # Red line (X axis)
cv2.putText(robot_vis, "X", (end_u, end_v),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
cv2.line(
robot_vis, (u, v), (end_u, end_v), (0, 0, 255), 2
) # Red line (X axis)
cv2.putText(
robot_vis,
"X",
(end_u, end_v),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 0, 255),
2,
)
# Draw normal vector (Z axis)
nx, ny, nz = normal
# Draw Z axis (normal)
nx, ny, nz = z_axis
end_u = int((x + nx * scale) * focal_x / (z + nz * scale) + center_x)
end_v = int((y + ny * scale) * focal_y / (z + nz * scale) + center_y)
if 0 <= end_u < width and 0 <= end_v < height:
cv2.line(robot_vis, (u, v), (end_u, end_v), (0, 255, 0), 2) # Green line (Z axis)
cv2.putText(robot_vis, "Z", (end_u, end_v),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
cv2.line(
robot_vis, (u, v), (end_u, end_v), (0, 255, 0), 2
) # Green line (Z axis)
cv2.putText(
robot_vis,
"Z",
(end_u, end_v),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 255, 0),
2,
)
# Calculate Y axis (cross product of Z and X for right-hand coordinate system)
y_axis = np.cross(normal, direction)
y_axis = y_axis / np.linalg.norm(y_axis)
# Draw Y axis
yx, yy, yz = y_axis
end_u = int((x + yx * scale) * focal_x / (z + yz * scale) + center_x)
end_v = int((y + yy * scale) * focal_y / (z + yz * scale) + center_y)
if 0 <= end_u < width and 0 <= end_v < height:
cv2.line(robot_vis, (u, v), (end_u, end_v), (255, 0, 0), 2) # Blue line (Y axis)
cv2.putText(robot_vis, "Y", (end_u, end_v),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
cv2.line(
robot_vis, (u, v), (end_u, end_v), (255, 0, 0), 2
) # Blue line (Y axis)
cv2.putText(
robot_vis,
"Y",
(end_u, end_v),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(255, 0, 0),
2,
)
# Visualize gripper width
gripper_radius = int(gripper_width * 100) # Scale for better visualization
cv2.circle(robot_vis, (u, v), gripper_radius, (0, 255, 255), 2) # Yellow circle
cv2.circle(
robot_vis, (u, v), gripper_radius, (0, 255, 255), 2
) # Yellow circle
# Add parameter values as text
y_offset = height - 160
cv2.putText(robot_vis, f"Position: ({x:.2f}, {y:.2f}, {z:.2f})", (10, y_offset),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
cv2.putText(
robot_vis,
f"Position: ({x:.2f}, {y:.2f}, {z:.2f})",
(10, y_offset),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(255, 255, 255),
1,
)
y_offset += 30
cv2.putText(robot_vis, f"Direction: ({dx:.2f}, {dy:.2f}, {dz:.2f})", (10, y_offset),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
cv2.putText(
robot_vis,
f"X axis: ({x_axis[0]:.2f}, {x_axis[1]:.2f}, {x_axis[2]:.2f})",
(10, y_offset),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 0, 255),
1,
)
y_offset += 30
cv2.putText(robot_vis, f"Normal: ({nx:.2f}, {ny:.2f}, {nz:.2f})", (10, y_offset),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
cv2.putText(
robot_vis,
f"Z axis: ({z_axis[0]:.2f}, {z_axis[1]:.2f}, {z_axis[2]:.2f})",
(10, y_offset),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 255, 0),
1,
)
y_offset += 30
cv2.putText(robot_vis, f"Gripper Width: {gripper_width:.2f}", (10, y_offset),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1)
cv2.putText(
robot_vis,
f"Gripper Width: {gripper_width:.2f}",
(10, y_offset),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 255, 255),
1,
)
return robot_vis