515 lines
20 KiB
Python
515 lines
20 KiB
Python
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
||
#
|
||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
# you may not use this file except in compliance with the License.
|
||
# You may obtain a copy of the License at
|
||
#
|
||
# http://www.apache.org/licenses/LICENSE-2.0
|
||
#
|
||
# Unless required by applicable law or agreed to in writing, software
|
||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
# See the License for the specific language governing permissions and
|
||
# limitations under the License.
|
||
|
||
import base64
|
||
import json
|
||
import logging
|
||
from typing import Any
|
||
|
||
import cv2
|
||
import numpy as np
|
||
import torch
|
||
import zmq
|
||
|
||
from lerobot.common.constants import OBS_IMAGES, OBS_STATE
|
||
from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError, InvalidActionError
|
||
from lerobot.common.robots.config import RobotMode
|
||
|
||
from ..robot import Robot
|
||
from .config_lekiwi import LeKiwiClientConfig
|
||
|
||
|
||
# TODO(Steven): This doesn't need to inherit from Robot
|
||
# But we do it for now to offer a familiar API
|
||
# TODO(Steven): This doesn't need to take care of the
|
||
# mapping from teleop to motor commands, but given that
|
||
# we already have a middle-man (this class) we add it here
|
||
# Other options include:
|
||
# 1. Adding it to the Telop implementation for lekiwi
|
||
# (meaning each robot will need a teleop imple) or
|
||
# 2. Adding it into the robot implementation
|
||
# (meaning the policy might be needed to be train
|
||
# over the teleop action space)
|
||
class LeKiwiClient(Robot):
|
||
config_class = LeKiwiClientConfig
|
||
name = "lekiwi_client"
|
||
|
||
def __init__(self, config: LeKiwiClientConfig):
|
||
super().__init__(config)
|
||
self.config = config
|
||
self.id = config.id
|
||
self.robot_type = config.type
|
||
self.robot_mode = config.robot_mode
|
||
|
||
self.remote_ip = config.remote_ip
|
||
self.port_zmq_cmd = config.port_zmq_cmd
|
||
self.port_zmq_observations = config.port_zmq_observations
|
||
|
||
self.teleop_keys = config.teleop_keys
|
||
|
||
self.zmq_context = None
|
||
self.zmq_cmd_socket = None
|
||
self.zmq_observation_socket = None
|
||
|
||
self.last_frames = {}
|
||
self.last_present_speed = {"x_cmd": 0.0, "y_cmd": 0.0, "theta_cmd": 0.0}
|
||
|
||
# TODO(Steven): Move everything to 32 instead
|
||
self.last_remote_arm_state = {}
|
||
|
||
# Define three speed levels and a current index
|
||
self.speed_levels = [
|
||
{"xy": 0.1, "theta": 30}, # slow
|
||
{"xy": 0.2, "theta": 60}, # medium
|
||
{"xy": 0.3, "theta": 90}, # fast
|
||
]
|
||
self.speed_index = 0 # Start at slow
|
||
|
||
self._is_connected = False
|
||
self.logs = {}
|
||
|
||
@property
|
||
def state_feature(self) -> dict:
|
||
# TODO(Steven): Get this from the data fetched?
|
||
# TODO(Steven): Motor names are unknown for the Daemon
|
||
# Or assume its size/metadata?
|
||
return {
|
||
"dtype": "float64",
|
||
"shape": (9,),
|
||
"names": {
|
||
"motors": [
|
||
"arm_shoulder_pan",
|
||
"arm_shoulder_lift",
|
||
"arm_elbow_flex",
|
||
"arm_wrist_flex",
|
||
"arm_wrist_roll",
|
||
"arm_gripper",
|
||
"base_left_wheel",
|
||
"base_right_wheel",
|
||
"base_back_wheel",
|
||
]
|
||
},
|
||
}
|
||
|
||
@property
|
||
def action_feature(self) -> dict:
|
||
return self.state_feature
|
||
|
||
@property
|
||
def camera_features(self) -> dict[str, dict]:
|
||
# TODO(Steven): Get this from the data fetched?
|
||
# TODO(Steven): Motor names are unknown for the Daemon
|
||
# Or assume its size/metadata?
|
||
# TODO(Steven): Check consistency of image sizes
|
||
cam_ft = {
|
||
"front": {
|
||
"shape": (480, 640, 3),
|
||
"names": ["height", "width", "channels"],
|
||
"info": None,
|
||
},
|
||
"wrist": {
|
||
"shape": (480, 640, 3),
|
||
"names": ["height", "width", "channels"],
|
||
"info": None,
|
||
},
|
||
}
|
||
return cam_ft
|
||
|
||
@property
|
||
def is_connected(self) -> bool:
|
||
return self._is_connected
|
||
|
||
@property
|
||
def is_calibrated(self) -> bool:
|
||
pass
|
||
|
||
def connect(self) -> None:
|
||
"""Establishes ZMQ sockets with the remote mobile robot"""
|
||
|
||
if self._is_connected:
|
||
raise DeviceAlreadyConnectedError(
|
||
"LeKiwi Daemon is already connected. Do not run `robot.connect()` twice."
|
||
)
|
||
|
||
self.zmq_context = zmq.Context()
|
||
self.zmq_cmd_socket = self.zmq_context.socket(zmq.PUSH)
|
||
zmq_cmd_locator = f"tcp://{self.remote_ip}:{self.port_zmq_cmd}"
|
||
self.zmq_cmd_socket.connect(zmq_cmd_locator)
|
||
self.zmq_cmd_socket.setsockopt(zmq.CONFLATE, 1)
|
||
|
||
self.zmq_observation_socket = self.zmq_context.socket(zmq.PULL)
|
||
zmq_observations_locator = f"tcp://{self.remote_ip}:{self.port_zmq_observations}"
|
||
self.zmq_observation_socket.connect(zmq_observations_locator)
|
||
self.zmq_observation_socket.setsockopt(zmq.CONFLATE, 1)
|
||
|
||
self._is_connected = True
|
||
|
||
def calibrate(self) -> None:
|
||
# TODO(Steven): Nothing to calibrate.
|
||
# Consider triggering calibrate() on the remote mobile robot?
|
||
# Although this would require a more complex comms schema
|
||
logging.warning("LeKiwiClient has nothing to calibrate.")
|
||
return
|
||
|
||
# Consider moving these static functions out of the class
|
||
# Copied from robot_lekiwi MobileManipulator class
|
||
@staticmethod
|
||
def _degps_to_raw(degps: float) -> int:
|
||
steps_per_deg = 4096.0 / 360.0
|
||
speed_in_steps = abs(degps) * steps_per_deg
|
||
speed_int = int(round(speed_in_steps))
|
||
if speed_int > 0x7FFF:
|
||
speed_int = 0x7FFF
|
||
if degps < 0:
|
||
return speed_int | 0x8000
|
||
else:
|
||
return speed_int & 0x7FFF
|
||
|
||
# Copied from robot_lekiwi MobileManipulator class
|
||
@staticmethod
|
||
def _raw_to_degps(raw_speed: int) -> float:
|
||
steps_per_deg = 4096.0 / 360.0
|
||
magnitude = raw_speed & 0x7FFF
|
||
degps = magnitude / steps_per_deg
|
||
if raw_speed & 0x8000:
|
||
degps = -degps
|
||
return degps
|
||
|
||
# Copied from robot_lekiwi MobileManipulator class
|
||
def _body_to_wheel_raw(
|
||
self,
|
||
x_cmd: float,
|
||
y_cmd: float,
|
||
theta_cmd: float,
|
||
wheel_radius: float = 0.05,
|
||
base_radius: float = 0.125,
|
||
max_raw: int = 3000,
|
||
) -> dict:
|
||
"""
|
||
Convert desired body-frame velocities into wheel raw commands.
|
||
|
||
Parameters:
|
||
x_cmd : Linear velocity in x (m/s).
|
||
y_cmd : Linear velocity in y (m/s).
|
||
theta_cmd : Rotational velocity (deg/s).
|
||
wheel_radius: Radius of each wheel (meters).
|
||
base_radius : Distance from the center of rotation to each wheel (meters).
|
||
max_raw : Maximum allowed raw command (ticks) per wheel.
|
||
|
||
Returns:
|
||
A dictionary with wheel raw commands:
|
||
{"left_wheel": value, "back_wheel": value, "right_wheel": value}.
|
||
|
||
Notes:
|
||
- Internally, the method converts theta_cmd to rad/s for the kinematics.
|
||
- The raw command is computed from the wheels angular speed in deg/s
|
||
using _degps_to_raw(). If any command exceeds max_raw, all commands
|
||
are scaled down proportionally.
|
||
"""
|
||
# Convert rotational velocity from deg/s to rad/s.
|
||
theta_rad = theta_cmd * (np.pi / 180.0)
|
||
# Create the body velocity vector [x, y, theta_rad].
|
||
velocity_vector = np.array([x_cmd, y_cmd, theta_rad])
|
||
|
||
# Define the wheel mounting angles with a -90° offset.
|
||
angles = np.radians(np.array([240, 120, 0]) - 90)
|
||
# Build the kinematic matrix: each row maps body velocities to a wheel’s linear speed.
|
||
# The third column (base_radius) accounts for the effect of rotation.
|
||
m = np.array([[np.cos(a), np.sin(a), base_radius] for a in angles])
|
||
|
||
# Compute each wheel’s linear speed (m/s) and then its angular speed (rad/s).
|
||
wheel_linear_speeds = m.dot(velocity_vector)
|
||
wheel_angular_speeds = wheel_linear_speeds / wheel_radius
|
||
|
||
# Convert wheel angular speeds from rad/s to deg/s.
|
||
wheel_degps = wheel_angular_speeds * (180.0 / np.pi)
|
||
|
||
# Scaling
|
||
steps_per_deg = 4096.0 / 360.0
|
||
raw_floats = [abs(degps) * steps_per_deg for degps in wheel_degps]
|
||
max_raw_computed = max(raw_floats)
|
||
if max_raw_computed > max_raw:
|
||
scale = max_raw / max_raw_computed
|
||
wheel_degps = wheel_degps * scale
|
||
|
||
# Convert each wheel’s angular speed (deg/s) to a raw integer.
|
||
wheel_raw = [LeKiwiClient._degps_to_raw(deg) for deg in wheel_degps]
|
||
|
||
# TODO(Steven): remove hard-coded names
|
||
return {"left_wheel": wheel_raw[0], "back_wheel": wheel_raw[1], "right_wheel": wheel_raw[2]}
|
||
|
||
# Copied from robot_lekiwi MobileManipulator class
|
||
def _wheel_raw_to_body(
|
||
self, wheel_raw: dict[str, Any], wheel_radius: float = 0.05, base_radius: float = 0.125
|
||
) -> dict[str, Any]:
|
||
"""
|
||
Convert wheel raw command feedback back into body-frame velocities.
|
||
|
||
Parameters:
|
||
wheel_raw : Vector with raw wheel commands ("left_wheel", "back_wheel", "right_wheel").
|
||
wheel_radius: Radius of each wheel (meters).
|
||
base_radius : Distance from the robot center to each wheel (meters).
|
||
|
||
Returns:
|
||
A tuple (x_cmd, y_cmd, theta_cmd) where:
|
||
x_cmd : Linear velocity in x (m/s).
|
||
y_cmd : Linear velocity in y (m/s).
|
||
theta_cmd : Rotational velocity in deg/s.
|
||
"""
|
||
|
||
# TODO(Steven): No check is done for dict keys
|
||
# Convert each raw command back to an angular speed in deg/s.
|
||
wheel_degps = np.array([LeKiwiClient._raw_to_degps(int(v)) for _, v in wheel_raw.items()])
|
||
# Convert from deg/s to rad/s.
|
||
wheel_radps = wheel_degps * (np.pi / 180.0)
|
||
# Compute each wheel’s linear speed (m/s) from its angular speed.
|
||
wheel_linear_speeds = wheel_radps * wheel_radius
|
||
|
||
# Define the wheel mounting angles with a -90° offset.
|
||
angles = np.radians(np.array([240, 120, 0]) - 90)
|
||
m = np.array([[np.cos(a), np.sin(a), base_radius] for a in angles])
|
||
|
||
# Solve the inverse kinematics: body_velocity = M⁻¹ · wheel_linear_speeds.
|
||
m_inv = np.linalg.inv(m)
|
||
velocity_vector = m_inv.dot(wheel_linear_speeds)
|
||
x_cmd, y_cmd, theta_rad = velocity_vector
|
||
theta_cmd = theta_rad * (180.0 / np.pi)
|
||
return {"x_cmd": x_cmd, "y_cmd": y_cmd, "theta_cmd": theta_cmd}
|
||
|
||
# TODO(Steven): This is flaky, for example, if we received a state but failed decoding the image, we will not update any value
|
||
# TODO(Steven): All this function needs to be refactored
|
||
def _get_data(self):
|
||
# Copied from robot_lekiwi.py
|
||
"""Polls the video socket for up to 15 ms. If data arrives, decode only
|
||
the *latest* message, returning frames, speed, and arm state. If
|
||
nothing arrives for any field, use the last known values."""
|
||
|
||
frames = {}
|
||
present_speed = {}
|
||
|
||
# TODO(Steven): Size is being assumed, is this safe?
|
||
remote_arm_state_tensor = {}
|
||
|
||
# Poll up to 15 ms
|
||
poller = zmq.Poller()
|
||
poller.register(self.zmq_observation_socket, zmq.POLLIN)
|
||
socks = dict(poller.poll(15))
|
||
if self.zmq_observation_socket not in socks or socks[self.zmq_observation_socket] != zmq.POLLIN:
|
||
# No new data arrived → reuse ALL old data
|
||
# TODO(Steven): This might return empty variables at init
|
||
return (self.last_frames, self.last_present_speed, self.last_remote_arm_state)
|
||
|
||
# Drain all messages, keep only the last
|
||
last_msg = None
|
||
# TODO(Steven): There's probably a way to do this without while True
|
||
# TODO(Steven): Even consider changing to PUB/SUB
|
||
while True:
|
||
try:
|
||
obs_string = self.zmq_observation_socket.recv_string(zmq.NOBLOCK)
|
||
last_msg = obs_string
|
||
except zmq.Again:
|
||
break
|
||
|
||
if not last_msg:
|
||
# No new message → also reuse old
|
||
return (self.last_frames, self.last_present_speed, self.last_remote_arm_state)
|
||
|
||
# Decode only the final message
|
||
try:
|
||
observation = json.loads(last_msg)
|
||
|
||
# TODO(Steven): Consider getting directly the item with observation[OBS_STATE]
|
||
state_observation = observation[OBS_STATE]
|
||
image_observation = observation[OBS_IMAGES]
|
||
|
||
# Convert images
|
||
for cam_name, image_b64 in image_observation.items():
|
||
if image_b64:
|
||
jpg_data = base64.b64decode(image_b64)
|
||
np_arr = np.frombuffer(jpg_data, dtype=np.uint8)
|
||
frame_candidate = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
|
||
if frame_candidate is not None:
|
||
frames[cam_name] = frame_candidate
|
||
|
||
# TODO(Steven): Should we really ignore the arm state if the image is None?
|
||
# If remote_arm_state is None and frames is None there is no message then use the previous message
|
||
if state_observation is not None and frames is not None:
|
||
self.last_frames = frames
|
||
|
||
# TODO(Steven): Do we really need the casting here?
|
||
# TODO(Steven): hard-coded name of expected keys, not good
|
||
remote_arm_state_tensor = {k: v for k, v in state_observation.items() if k.startswith("arm")}
|
||
self.last_remote_arm_state = remote_arm_state_tensor
|
||
|
||
present_speed = {k: v for k, v in state_observation.items() if k.startswith("base")}
|
||
self.last_present_speed = present_speed
|
||
else:
|
||
frames = self.last_frames
|
||
remote_arm_state_tensor = self.last_remote_arm_state
|
||
present_speed = self.last_present_speed
|
||
|
||
except Exception as e:
|
||
print(f"[DEBUG] Error decoding video message: {e}")
|
||
# If decode fails, fall back to old data
|
||
return (self.last_frames, self.last_present_speed, self.last_remote_arm_state)
|
||
return frames, present_speed, remote_arm_state_tensor
|
||
|
||
# TODO(Steven): The returned space is different from the get_observation of LeKiwi
|
||
# This returns body-frames velocities instead of wheel pos/speeds
|
||
def get_observation(self) -> dict[str, Any]:
|
||
"""
|
||
Capture observations from the remote robot: current follower arm positions,
|
||
present wheel speeds (converted to body-frame velocities: x, y, theta),
|
||
and a camera frame. Receives over ZMQ, translate to body-frame vel
|
||
"""
|
||
if not self._is_connected:
|
||
raise DeviceNotConnectedError("LeKiwiClient is not connected. You need to run `robot.connect()`.")
|
||
|
||
# TODO(Steven): remove hard-coded cam name
|
||
# This is needed at init for when there's no comms
|
||
obs_dict = {
|
||
OBS_IMAGES: {"wrist": np.zeros(shape=(480, 640, 3)), "front": np.zeros(shape=(640, 480, 3))}
|
||
}
|
||
|
||
frames, present_speed, remote_arm_state_tensor = self._get_data()
|
||
body_state = self._wheel_raw_to_body(present_speed)
|
||
# TODO(Steven): out isdict[str,Any] and we multiply by 1000.0. This should be more explicit and specify the expected type instead of Any
|
||
body_state_mm = {k: v * 1000.0 for k, v in body_state.items()} # Convert x,y to mm/s
|
||
|
||
obs_dict[OBS_STATE] = {**remote_arm_state_tensor, **body_state_mm}
|
||
|
||
# Loop over each configured camera
|
||
for cam_name, frame in frames.items():
|
||
if frame is None:
|
||
# TODO(Steven): Daemon doesn't know camera dimensions
|
||
logging.warning("Frame is None")
|
||
frame = np.zeros((480, 640, 3), dtype=np.uint8)
|
||
obs_dict[OBS_IMAGES][cam_name] = torch.from_numpy(frame)
|
||
|
||
return obs_dict
|
||
|
||
def _from_keyboard_to_wheel_action(self, pressed_keys: np.ndarray):
|
||
# Speed control
|
||
if self.teleop_keys["speed_up"] in pressed_keys:
|
||
self.speed_index = min(self.speed_index + 1, 2)
|
||
if self.teleop_keys["speed_down"] in pressed_keys:
|
||
self.speed_index = max(self.speed_index - 1, 0)
|
||
speed_setting = self.speed_levels[self.speed_index]
|
||
xy_speed = speed_setting["xy"] # e.g. 0.1, 0.25, or 0.4
|
||
theta_speed = speed_setting["theta"] # e.g. 30, 60, or 90
|
||
|
||
x_cmd = 0.0 # m/s forward/backward
|
||
y_cmd = 0.0 # m/s lateral
|
||
theta_cmd = 0.0 # deg/s rotation
|
||
|
||
if self.teleop_keys["forward"] in pressed_keys:
|
||
x_cmd += xy_speed
|
||
if self.teleop_keys["backward"] in pressed_keys:
|
||
x_cmd -= xy_speed
|
||
if self.teleop_keys["left"] in pressed_keys:
|
||
y_cmd += xy_speed
|
||
if self.teleop_keys["right"] in pressed_keys:
|
||
y_cmd -= xy_speed
|
||
if self.teleop_keys["rotate_left"] in pressed_keys:
|
||
theta_cmd += theta_speed
|
||
if self.teleop_keys["rotate_right"] in pressed_keys:
|
||
theta_cmd -= theta_speed
|
||
|
||
return self._body_to_wheel_raw(x_cmd, y_cmd, theta_cmd)
|
||
|
||
def configure(self):
|
||
pass
|
||
|
||
# TODO(Steven): This assumes this call is always called from a keyboard teleop command
|
||
# TODO(Steven): Doing this mapping in here adds latecy between send_action and movement from the user perspective.
|
||
# t0: get teleop_cmd
|
||
# t1: send_action(teleop_cmd)
|
||
# t2: mapping teleop_cmd -> motor_cmd
|
||
# t3: execute motor_md
|
||
# This mapping for other robots/teleop devices might be slower. Doing this in the teleop will make this explicit
|
||
# t0': get teleop_cmd
|
||
# t1': mapping teleop_cmd -> motor_cmd
|
||
# t2': send_action(motor_cmd)
|
||
# t3': execute motor_cmd
|
||
# t3'-t2' << t3-t1
|
||
def send_action(self, action: dict[str, Any]) -> dict[str, Any]:
|
||
"""Command lekiwi to move to a target joint configuration. Translates to motor space + sends over ZMQ
|
||
|
||
Args:
|
||
action (np.ndarray): array containing the goal positions for the motors.
|
||
|
||
Raises:
|
||
RobotDeviceNotConnectedError: if robot is not connected.
|
||
|
||
Returns:
|
||
np.ndarray: the action sent to the motors, potentially clipped.
|
||
"""
|
||
if not self._is_connected:
|
||
raise DeviceNotConnectedError(
|
||
"ManipulatorRobot is not connected. You need to run `robot.connect()`."
|
||
)
|
||
|
||
if self.robot_mode is RobotMode.AUTO:
|
||
# TODO(Steven): Not yet implemented. The policy outputs might need a different conversion
|
||
raise Exception
|
||
|
||
goal_pos = {}
|
||
# TODO(Steven): This assumes teleop mode is always used with keyboard. Tomorrow we could teleop with another device ... ?
|
||
if self.robot_mode is RobotMode.TELEOP:
|
||
# TODO(Steven): I don't like this
|
||
|
||
motors_name = self.state_feature.get("names").get("motors")
|
||
|
||
common_keys = [
|
||
key for key in action if key in (motor.replace("arm_", "") for motor in motors_name)
|
||
]
|
||
|
||
if len(common_keys) < 6:
|
||
logging.error("Action should include at least the states of the leader arm")
|
||
raise InvalidActionError
|
||
|
||
arm_actions = {"arm_" + arm_motor: action[arm_motor] for arm_motor in common_keys}
|
||
goal_pos = arm_actions
|
||
if len(action) > 6:
|
||
keyboard_keys = set(action.keys()) - set(common_keys)
|
||
wheel_actions = {
|
||
"base_" + k: v for k, v in self._from_keyboard_to_wheel_action(keyboard_keys).items()
|
||
}
|
||
goal_pos = {**arm_actions, **wheel_actions}
|
||
|
||
self.zmq_cmd_socket.send_string(json.dumps(goal_pos)) # action is in motor space
|
||
|
||
return goal_pos
|
||
|
||
def print_logs(self):
|
||
# TODO(Steven): Refactor logger
|
||
pass
|
||
|
||
def disconnect(self):
|
||
"""Cleans ZMQ comms"""
|
||
|
||
if not self._is_connected:
|
||
raise DeviceNotConnectedError(
|
||
"LeKiwi is not connected. You need to run `robot.connect()` before disconnecting."
|
||
)
|
||
# TODO(Steven): Consider sending a stop to the remote mobile robot. Although this would need a moore complex comms schema
|
||
self.zmq_observation_socket.close()
|
||
self.zmq_cmd_socket.close()
|
||
self.zmq_context.term()
|
||
self._is_connected = False
|
||
|
||
def __del__(self):
|
||
if getattr(self, "is_connected", False):
|
||
self.disconnect()
|