get IK to work

2025-04-03 10:32:19 +00:00 · 2025-03-25 16:44:19 -07:00 · 2025-03-25 16:44:19 -07:00 · e123ed2bfe
commit e123ed2bfe
parent c49a57a876
1 changed files with 133 additions and 82 deletions
--- a/open_phantom/robot_manager.py
+++ b/open_phantom/robot_manager.py
@ -4,83 +4,144 @@ import numpy as np
 import pybullet as p

 from utils.handle_urdf import handle_urdf
+from scipy.spatial.transform import Rotation


 class RobotManager:
    def __init__(self, urdf_path: str, camera_intrinsics: tuple) -> None:
-        self.physics_client = p.connect(p.DIRECT)
+        self.physics_client = p.connect(p.DIRECT)  # Headless mode

        robot_urdf = handle_urdf(urdf_path)
-        self.robot_id = self._load_robot(robot_urdf)

+        parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+        absolute_urdf_path = os.path.join(parent_dir, robot_urdf)
+
+        self.robot_id = p.loadURDF(absolute_urdf_path, useFixedBase=True)
        self.joint_count = p.getNumJoints(self.robot_id)
-        self.end_effector_index = self._find_end_effector()
+        # TODO: Figure out a way to handle multiple end effectors
+        self.end_effector_index = self._find_gripper_joints()[-1]

-        self.fx, self.fy, self.cx, self.cy = camera_intrinsics
+        _, _, center_x, center_y = camera_intrinsics
        # Set up rendering parameters
-        self.img_width = int(self.cx * 2)
-        self.img_height = int(self.cy * 2)
+        self.img_width = int(center_x * 2)
+        self.img_height = int(center_y * 2)

-    # Load robot URDF into PyBullet
-    def _load_robot(self, robot_urdf: str) -> int:
-        try:
-            robot_id = p.loadURDF(
-                robot_urdf,
-                basePosition=[0, 0, 0],
-                useFixedBase=True,
-                flags=p.URDF_USE_SELF_COLLISION | p.URDF_USE_INERTIA_FROM_FILE,
-            )
-        except p.error as e:
-            print(f"PyBullet error when loading URDF: {e}")
-            raise e
+    def set_robot_pose(self, position, orientation, gripper_width):
+        # Convert orientation matrix to quaternion
+        r = Rotation.from_matrix(orientation)
+        quaternion = r.as_quat()

-        robot_name = p.getBodyInfo(robot_id)[1].decode("utf-8")
-        print(f"Successfully loaded {robot_name} robot with ID: {robot_id}")
+        # Get current joint positions as seed
+        joint_indices = list(range(self.joint_count))
+        joint_states = p.getJointStates(self.robot_id, joint_indices)
+        current_positions = [state[0] for state in joint_states]

-        return robot_id
+        # Exclude gripper joints for IK calculation
+        gripper_joints = self._find_gripper_joints()
+        ik_joint_indices = [j for j in joint_indices if j not in gripper_joints]

-    # NOTE: Only applicable if the robot has one end effector
-    def _find_end_effector(self) -> int:
-        assert self.joint_count > 0, "Robot has no joints"
+        solution = p.calculateInverseKinematics(
+            self.robot_id,
+            self.end_effector_index,
+            targetPosition=position,
+            targetOrientation=quaternion,
+            currentPositions=current_positions,
+            maxNumIterations=100,
+            residualThreshold=1e-5,
+        )

-        # Keywords to look for in joint names to identify end effector
-        keywords = ["gripper", "tool", "tcp", "end_effector", "hand"]
+        # Apply best solution
+        for i, joint_idx in enumerate(ik_joint_indices):
+            if i < len(solution):
+                p.resetJointState(self.robot_id, joint_idx, solution[i])
+
+        self._set_gripper_width(gripper_width)
+
+        return solution
+
+    def _find_gripper_joints(self) -> list:
+        gripper_joints = []
+        gripper_keywords = [
+            "gripper",
+            "tool",
+            "tcp",
+            "end_effector",
+            "hand",
+            "finger",
+            "claw",
+            "pinch",
+        ]

        for i in range(self.joint_count):
            info = p.getJointInfo(self.robot_id, i)
            joint_name = info[1].decode("utf-8").lower()

-            # Check if any keyword is in the joint name
-            if any(keyword in joint_name for keyword in keywords):
-                return i
+            if any(keyword in joint_name for keyword in gripper_keywords):
+                gripper_joints.append(i)

-        # If no specific end effector found, use the last joint in the chain
-        return self.joint_count - 1
+        return gripper_joints

-    # TODO: Use inverse kinematics to set the robot pose
-    def set_robot_pose(
-        self,
-        position: np.ndarray,
-        orientation_vectors: np.ndarray,
-        gripper_width: float,
-    ) -> None:
-        pass
+    def _set_gripper_width(self, width: float) -> None:
+        gripper_joints = self._find_gripper_joints()

-    # Render the robot in some scene using some camera parameters
-    def render_robot(self, bg_image=None, camera_params=None) -> np.ndarray:
+        assert gripper_joints, "No gripper joints found, cannot set gripper width"
+
+        # Clamp width to valid range
+        width = max(0.0, min(1.0, width))
+
+        # Get joint info to determine limits
+        for joint_idx in gripper_joints:
+            info = p.getJointInfo(self.robot_id, joint_idx)
+            lower_limit = info[8]  # Lower limit
+            upper_limit = info[9]  # Upper limit
+
+            # Calculate target position based on width
+            # For some grippers, smaller values mean close and larger values mean open
+            # For others, it's the opposite, so we need to check the joint info
+            if (
+                "left" in info[1].decode("utf-8").lower()
+                or "open" in info[1].decode("utf-8").lower()
+            ):
+                # For left/open joints (opening movement)
+                target_pos = lower_limit + width * (upper_limit - lower_limit)
+            else:
+                # For right/close joints (closing movement)
+                target_pos = upper_limit - width * (upper_limit - lower_limit)
+
+            p.setJointMotorControl2(
+                self.robot_id,
+                joint_idx,
+                p.POSITION_CONTROL,
+                targetPosition=target_pos,
+                force=100,  # Lower force for gripper to prevent damage
+            )
+
+    def render_robot(
+        self, inpainted_frame: np.ndarray, depth_map: np.ndarray, camera_params=None
+    ) -> np.ndarray:
        assert self.robot_id >= 0, "Robot not properly loaded"

+        # Resize depth map if needed
+        if depth_map.shape[:2] != (self.img_height, self.img_width):
+            depth_map = cv2.resize(depth_map, (self.img_width, self.img_height))
+
+        # Get current robot pose for camera targeting
+        link_state = p.getLinkState(self.robot_id, self.end_effector_index)
+        robot_pos = link_state[0]  # Position of the end effector
+
        # Set up camera parameters
        if camera_params is None:
-            # Default camera setup
-            cam_target = [0, 0, 0]
-            cam_distance = 1.0
+            cam_target = robot_pos
+            cam_distance = 0.3  # Closer view
            cam_yaw = 0
            cam_pitch = -30
            cam_roll = 0
        else:
            cam_target, cam_distance, cam_yaw, cam_pitch, cam_roll = camera_params

+        print(f"Robot position: {robot_pos}")
+        print(f"Camera target: {cam_target}, distance: {cam_distance}")
+
        # Compute view matrix
        view_matrix = p.computeViewMatrixFromYawPitchRoll(
            cameraTargetPosition=cam_target,
@ -109,26 +170,38 @@ class RobotManager:
        # Extract RGB and depth
        rgb = np.reshape(img_arr[2], (self.img_height, self.img_width, 4))
        rgb = rgb[:, :, :3]  # Remove alpha channel
-        depth = np.reshape(img_arr[3], (self.img_height, self.img_width))
+        robot_depth = np.reshape(img_arr[3], (self.img_height, self.img_width))

-        # If background image is provided, composite
-        if bg_image is not None:
-            # Resize background if needed
-            bg_h, bg_w = bg_image.shape[:2]
-            if bg_w != self.img_width or bg_h != self.img_height:
-                bg_resized = cv2.resize(bg_image, (self.img_width, self.img_height))
-            else:
-                bg_resized = bg_image
+        # Save the raw robot rendering for debugging
+        cv2.imwrite("robot_debug_rgb.png", rgb)

-            # Create mask from depth
-            mask = (depth < 0.99).astype(np.float32)
-            mask = np.stack([mask, mask, mask], axis=2)
+        # Resize background if needed
+        frame_h, frame_w = inpainted_frame.shape[:2]
+        if frame_w != self.img_width or frame_h != self.img_height:
+            frame_resized = cv2.resize(
+                inpainted_frame, (self.img_width, self.img_height)
+            )
+        else:
+            frame_resized = inpainted_frame

-            # Composite
-            composite = bg_resized * (1 - mask) + rgb * mask
-            return composite.astype(np.uint8)
+        # Create basic robot mask (where robot pixels are visible)
+        robot_mask = (robot_depth < 0.99).astype(np.float32)

-        return rgb.astype(np.uint8)
+        # Save the robot mask for debugging
+        cv2.imwrite("robot_debug_mask.png", (robot_mask * 255).astype(np.uint8))
+
+        # Check if robot is visible at all
+        if np.sum(robot_mask) == 0:
+            print("WARNING: Robot is not visible in the rendered image!")
+            # If robot not visible, return the inpainted frame
+            return frame_resized
+
+        # More straightforward compositing without occlusion for testing
+        # Just overlay the robot on the frame where the robot mask is active
+        final_mask = np.stack([robot_mask, robot_mask, robot_mask], axis=2)
+        composite = frame_resized * (1 - final_mask) + rgb * final_mask
+
+        return composite.astype(np.uint8)

    def __del__(self) -> None:
        if hasattr(self, "physics_client"):
@ -136,25 +209,3 @@ class RobotManager:
                p.disconnect(self.physics_client)
            except:
                pass
-
-
-if __name__ == "__main__":
-    cwd = os.getcwd()
-    urdf_path = os.path.join(
-        cwd,
-        "notebook/phantom/urdf/SO_5DOF_ARM100_05d.SLDASM/urdf/SO_5DOF_ARM100_05d.SLDASM.urdf",
-    )
-    camera_intrinsics = (320, 240, 320, 240)  # Random intrinsics for example
-
-    robot_vis = RobotManager(urdf_path, camera_intrinsics)
-    rendered_image = robot_vis.render_robot()
-
-    # Option 1: Display the image using OpenCV
-    cv2.imshow("Robot Render", rendered_image)
-    cv2.waitKey(0)  # Wait for a key press
-    cv2.destroyAllWindows()
-
-    # Option 2: Save the image to a file
-    output_path = "robot_render.png"
-    cv2.imwrite(output_path, rendered_image)
-    print(f"Render saved to {output_path}")