parkour/onboard_codes/go1/go1_visual_embedding.py

import os
import os.path as osp
import numpy as np
import torch
import json
from functools import partial
from collections import OrderedDict

from a1_real import UnitreeA1Real, resize2d
from rsl_rl import modules

import rospy
from unitree_legged_msgs.msg import Float32MultiArrayStamped
from sensor_msgs.msg import Image
import ros_numpy

import pyrealsense2 as rs

def get_encoder_script(logdir):
    with open(osp.join(logdir, "config.json"), "r") as f:
        config_dict = json.load(f, object_pairs_hook= OrderedDict)

    model_device = torch.device("cuda")

    unitree_real_env = UnitreeA1Real(
        robot_namespace= "a112138",
        cfg= config_dict,
        forward_depth_topic= "", # this env only computes parameters to build the model
        forward_depth_embedding_dims= None,
        model_device= model_device,
    )

    model = getattr(modules, config_dict["runner"]["policy_class_name"])(
        num_actor_obs= unitree_real_env.num_obs,
        num_critic_obs= unitree_real_env.num_privileged_obs,
        num_actions= 12,
        obs_segments= unitree_real_env.obs_segments,
        privileged_obs_segments= unitree_real_env.privileged_obs_segments,
        **config_dict["policy"],
    )
    model_names = [i for i in os.listdir(logdir) if i.startswith("model_")]
    model_names.sort(key= lambda x: int(x.split("_")[-1].split(".")[0]))
    state_dict = torch.load(osp.join(args.logdir, model_names[-1]), map_location= "cpu")
    model.load_state_dict(state_dict["model_state_dict"])
    model.to(model_device)
    model.eval()

    visual_encoder = model.visual_encoder
    script = torch.jit.script(visual_encoder)

    return script, model_device

def get_input_filter(args):
    """ This is the filter different from the simulator, but try to close the gap. """
    with open(osp.join(args.logdir, "config.json"), "r") as f:
        config_dict = json.load(f, object_pairs_hook= OrderedDict)
    image_resolution = config_dict["sensor"]["forward_camera"].get(
        "output_resolution",
        config_dict["sensor"]["forward_camera"]["resolution"],
    )
    depth_range = config_dict["sensor"]["forward_camera"].get(
        "depth_range",
        [0.0, 3.0],
    )
    depth_range = (depth_range[0] * 1000, depth_range[1] * 1000) # [m] -> [mm]
    crop_top, crop_bottom, crop_left, crop_right = args.crop_top, args.crop_bottom, args.crop_left, args.crop_right
    crop_far = args.crop_far * 1000

    def input_filter(depth_image: torch.Tensor,
            crop_top: int,
            crop_bottom: int,
            crop_left: int,
            crop_right: int,
            crop_far: float,
            depth_min: int,
            depth_max: int,
            output_height: int,
            output_width: int,
        ):
        """ depth_image must have shape [1, 1, H, W] """
        depth_image = depth_image[:, :,
            crop_top: -crop_bottom-1,
            crop_left: -crop_right-1,
        ]
        depth_image[depth_image > crop_far] = depth_max
        depth_image = torch.clip(
            depth_image,
            depth_min,
            depth_max,
        ) / (depth_max - depth_min)
        depth_image = resize2d(depth_image, (output_height, output_width))
        return depth_image
    # input_filter = torch.jit.script(input_filter)

    return partial(input_filter,
        crop_top= crop_top,
        crop_bottom= crop_bottom,
        crop_left= crop_left,
        crop_right= crop_right,
        crop_far= crop_far,
        depth_min= depth_range[0],
        depth_max= depth_range[1],
        output_height= image_resolution[0],
        output_width= image_resolution[1],
    ), depth_range

def get_started_pipeline(
        height= 480,
        width= 640,
        fps= 30,
        enable_rgb= False,
    ):
    # By default, rgb is not used.
    pipeline = rs.pipeline()
    config = rs.config()
    config.enable_stream(rs.stream.depth, width, height, rs.format.z16, fps)
    if enable_rgb:
        config.enable_stream(rs.stream.color, width, height, rs.format.rgb8, fps)
    profile = pipeline.start(config)

    # build the sensor filter
    hole_filling_filter = rs.hole_filling_filter(2)
    spatial_filter = rs.spatial_filter()
    spatial_filter.set_option(rs.option.filter_magnitude, 5)
    spatial_filter.set_option(rs.option.filter_smooth_alpha, 0.75)
    spatial_filter.set_option(rs.option.filter_smooth_delta, 1)
    spatial_filter.set_option(rs.option.holes_fill, 4)
    temporal_filter = rs.temporal_filter()
    temporal_filter.set_option(rs.option.filter_smooth_alpha, 0.75)
    temporal_filter.set_option(rs.option.filter_smooth_delta, 1)
    # decimation_filter = rs.decimation_filter()
    # decimation_filter.set_option(rs.option.filter_magnitude, 2)

    def filter_func(frame):
        frame = hole_filling_filter.process(frame)
        frame = spatial_filter.process(frame)
        frame = temporal_filter.process(frame)
        # frame = decimation_filter.process(frame)
        return frame

    return pipeline, filter_func

def main(args):
    rospy.init_node("a1_legged_gym_jetson")

    input_filter, depth_range = get_input_filter(args)
    model_script, model_device = get_encoder_script(args.logdir)
    with open(osp.join(args.logdir, "config.json"), "r") as f:
        config_dict = json.load(f, object_pairs_hook= OrderedDict)
    if config_dict.get("sensor", dict()).get("forward_camera", dict()).get("refresh_duration", None) is not None:
        refresh_duration = config_dict["sensor"]["forward_camera"]["refresh_duration"]
        ros_rate = rospy.Rate(1.0 / refresh_duration)
        rospy.loginfo("Using refresh duration {}s".format(refresh_duration))
    else:
        ros_rate = rospy.Rate(args.fps)

    rs_pipeline, rs_filters = get_started_pipeline(
        height= args.height,
        width= args.width,
        fps= args.fps,
        enable_rgb= args.enable_rgb,
    )

    # gyro_pipeline = rs.pipeline()
    # gyro_config = rs.config()
    # gyro_config.enable_stream(rs.stream.gyro, rs.format.motion_xyz32f, 200)
    # gyro_profile = gyro_pipeline.start(gyro_config)

    embedding_publisher = rospy.Publisher(
        args.namespace + "/visual_embedding",
        Float32MultiArrayStamped,
        queue_size= 1,
    )

    if args.enable_vis:
        depth_image_publisher = rospy.Publisher(
            args.namespace + "/camera/depth/image_rect_raw",
            Image,
            queue_size= 1,
        )
        network_input_publisher = rospy.Publisher(
            args.namespace + "/camera/depth/network_input_raw",
            Image,
            queue_size= 1,
        )
        if args.enable_rgb:
            rgb_image_publisher = rospy.Publisher(
                args.namespace + "/camera/color/image_raw",
                Image,
                queue_size= 1,
            )

    rospy.loginfo("Depth range is clipped to [{}, {}] and normalized".format(depth_range[0], depth_range[1]))
    rospy.loginfo("ROS, model, realsense have been initialized.")
    if args.enable_vis:
        rospy.loginfo("Visualization enabled, sending depth{} images".format(", rgb" if args.enable_rgb else ""))
    try:
        embedding_msg = Float32MultiArrayStamped()
        embedding_msg.header.frame_id = args.namespace + "/camera_depth_optical_frame"
        frame_got = False
        while not rospy.is_shutdown():
            # Wait for the depth image
            frames = rs_pipeline.wait_for_frames(int( \
                config_dict["sensor"]["forward_camera"]["latency_range"][1] \
                 * 1000)) # ms
            embedding_msg.header.stamp = rospy.Time.now()
            depth_frame = frames.get_depth_frame()
            if not depth_frame:
                continue
            if not frame_got:
                frame_got = True
                rospy.loginfo("Realsense frame recieved. Sending embeddings...")
            if args.enable_rgb:
                color_frame = frames.get_color_frame()
                # Use this branch to log the time when image is acquired
                if args.enable_vis and not color_frame is None:
                    color_frame = np.asanyarray(color_frame.get_data())
                    rgb_image_msg = ros_numpy.msgify(Image, color_frame, encoding= "rgb8")
                    rgb_image_msg.header.stamp = rospy.Time.now()
                    rgb_image_msg.header.frame_id = args.namespace + "/camera_color_optical_frame"
                    rgb_image_publisher.publish(rgb_image_msg)

            # Process the depth image and publish
            depth_frame = rs_filters(depth_frame)
            depth_image_ = np.asanyarray(depth_frame.get_data())
            depth_image = torch.from_numpy(depth_image_.astype(np.float32)).unsqueeze(0).unsqueeze(0).to(model_device)
            depth_image = input_filter(depth_image)
            with torch.no_grad():
                depth_embedding = model_script(depth_image).reshape(-1).cpu().numpy()
            embedding_msg.header.seq += 1
            embedding_msg.data = depth_embedding.tolist()
            embedding_publisher.publish(embedding_msg)
            
            # Publish the acquired image if needed
            if args.enable_vis:
                depth_image_msg = ros_numpy.msgify(Image, depth_image_, encoding= "16UC1")
                depth_image_msg.header.stamp = rospy.Time.now()
                depth_image_msg.header.frame_id = args.namespace + "/camera_depth_optical_frame"
                depth_image_publisher.publish(depth_image_msg)
                network_input_np = (\
                    depth_image.detach().cpu().numpy()[0, 0] * (depth_range[1] - depth_range[0]) \
                    + depth_range[0]
                ).astype(np.uint16)
                network_input_msg = ros_numpy.msgify(Image, network_input_np, encoding= "16UC1")
                network_input_msg.header.stamp = rospy.Time.now()
                network_input_msg.header.frame_id = args.namespace + "/camera_depth_optical_frame"
                network_input_publisher.publish(network_input_msg)

            ros_rate.sleep()
    finally:
        rs_pipeline.stop()

if __name__ == "__main__":
    """ This script is designed to load the model and process the realsense image directly
    from realsense SDK without realsense ROS wrapper
    """
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--namespace",
        type= str,
        default= "/a112138",                    
    )
    parser.add_argument("--logdir",
        type= str,
        help= "The log directory of the trained model",
    )
    parser.add_argument("--height",
        type= int,
        default= 270,
        help= "The height of the realsense image",
    )
    parser.add_argument("--width",
        type= int,
        default= 480,
        help= "The width of the realsense image",
    )
    parser.add_argument("--fps",
        type= int,
        default= 30,
        help= "The fps of the realsense image",
    )
    parser.add_argument("--crop_left",
        type= int,
        default= 60,
        help= "num of pixel to crop in the original pyrealsense readings."
    )
    parser.add_argument("--crop_right",
        type= int,
        default= 46,
        help= "num of pixel to crop in the original pyrealsense readings."
    )
    parser.add_argument("--crop_top",
        type= int,
        default= 0,
        help= "num of pixel to crop in the original pyrealsense readings."
    )
    parser.add_argument("--crop_bottom",
        type= int,
        default= 0,
        help= "num of pixel to crop in the original pyrealsense readings."
    )
    parser.add_argument("--crop_far",
        type= float,
        default= 3.0,
        help= "asside from the config far limit, make all depth readings larger than this value to be 3.0 in un-normalized network input."
    )
    parser.add_argument("--enable_rgb",
        action= "store_true",
        help= "Whether to enable rgb image",
    )
    parser.add_argument("--enable_vis",
        action= "store_true",
        help= "Whether to publish realsense image",
    )

    args = parser.parse_args()
    main(args)
[adding] deploy code and instructions * latest environments will follow 2023-11-08 03:27:50 +08:00			`import os`
			`import os.path as osp`
			`import numpy as np`
			`import torch`
			`import json`
			`from functools import partial`
			`from collections import OrderedDict`

			`from a1_real import UnitreeA1Real, resize2d`
			`from rsl_rl import modules`

			`import rospy`
			`from unitree_legged_msgs.msg import Float32MultiArrayStamped`
			`from sensor_msgs.msg import Image`
			`import ros_numpy`

			`import pyrealsense2 as rs`

			`def get_encoder_script(logdir):`
			`with open(osp.join(logdir, "config.json"), "r") as f:`
			`config_dict = json.load(f, object_pairs_hook= OrderedDict)`

			`model_device = torch.device("cuda")`

			`unitree_real_env = UnitreeA1Real(`
			`robot_namespace= "a112138",`
			`cfg= config_dict,`
			`forward_depth_topic= "", # this env only computes parameters to build the model`
			`forward_depth_embedding_dims= None,`
			`model_device= model_device,`
			`)`

			`model = getattr(modules, config_dict["runner"]["policy_class_name"])(`
			`num_actor_obs= unitree_real_env.num_obs,`
			`num_critic_obs= unitree_real_env.num_privileged_obs,`
			`num_actions= 12,`
			`obs_segments= unitree_real_env.obs_segments,`
			`privileged_obs_segments= unitree_real_env.privileged_obs_segments,`
			`**config_dict["policy"],`
			`)`
			`model_names = [i for i in os.listdir(logdir) if i.startswith("model_")]`
			`model_names.sort(key= lambda x: int(x.split("_")[-1].split(".")[0]))`
			`state_dict = torch.load(osp.join(args.logdir, model_names[-1]), map_location= "cpu")`
			`model.load_state_dict(state_dict["model_state_dict"])`
			`model.to(model_device)`
			`model.eval()`

			`visual_encoder = model.visual_encoder`
			`script = torch.jit.script(visual_encoder)`

			`return script, model_device`

			`def get_input_filter(args):`
			`""" This is the filter different from the simulator, but try to close the gap. """`
			`with open(osp.join(args.logdir, "config.json"), "r") as f:`
			`config_dict = json.load(f, object_pairs_hook= OrderedDict)`
			`image_resolution = config_dict["sensor"]["forward_camera"].get(`
			`"output_resolution",`
			`config_dict["sensor"]["forward_camera"]["resolution"],`
			`)`
			`depth_range = config_dict["sensor"]["forward_camera"].get(`
			`"depth_range",`
			`[0.0, 3.0],`
			`)`
			`depth_range = (depth_range[0] * 1000, depth_range[1] * 1000) # [m] -> [mm]`
			`crop_top, crop_bottom, crop_left, crop_right = args.crop_top, args.crop_bottom, args.crop_left, args.crop_right`
			`crop_far = args.crop_far * 1000`

			`def input_filter(depth_image: torch.Tensor,`
			`crop_top: int,`
			`crop_bottom: int,`
			`crop_left: int,`
			`crop_right: int,`
			`crop_far: float,`
			`depth_min: int,`
			`depth_max: int,`
			`output_height: int,`
			`output_width: int,`
			`):`
			`""" depth_image must have shape [1, 1, H, W] """`
			`depth_image = depth_image[:, :,`
			`crop_top: -crop_bottom-1,`
			`crop_left: -crop_right-1,`
			`]`
			`depth_image[depth_image > crop_far] = depth_max`
			`depth_image = torch.clip(`
			`depth_image,`
			`depth_min,`
			`depth_max,`
			`) / (depth_max - depth_min)`
			`depth_image = resize2d(depth_image, (output_height, output_width))`
			`return depth_image`
			`# input_filter = torch.jit.script(input_filter)`

			`return partial(input_filter,`
			`crop_top= crop_top,`
			`crop_bottom= crop_bottom,`
			`crop_left= crop_left,`
			`crop_right= crop_right,`
			`crop_far= crop_far,`
			`depth_min= depth_range[0],`
			`depth_max= depth_range[1],`
			`output_height= image_resolution[0],`
			`output_width= image_resolution[1],`
			`), depth_range`

			`def get_started_pipeline(`
			`height= 480,`
			`width= 640,`
			`fps= 30,`
			`enable_rgb= False,`
			`):`
			`# By default, rgb is not used.`
			`pipeline = rs.pipeline()`
			`config = rs.config()`
			`config.enable_stream(rs.stream.depth, width, height, rs.format.z16, fps)`
			`if enable_rgb:`
			`config.enable_stream(rs.stream.color, width, height, rs.format.rgb8, fps)`
			`profile = pipeline.start(config)`

			`# build the sensor filter`
			`hole_filling_filter = rs.hole_filling_filter(2)`
			`spatial_filter = rs.spatial_filter()`
			`spatial_filter.set_option(rs.option.filter_magnitude, 5)`
			`spatial_filter.set_option(rs.option.filter_smooth_alpha, 0.75)`
			`spatial_filter.set_option(rs.option.filter_smooth_delta, 1)`
			`spatial_filter.set_option(rs.option.holes_fill, 4)`
			`temporal_filter = rs.temporal_filter()`
			`temporal_filter.set_option(rs.option.filter_smooth_alpha, 0.75)`
			`temporal_filter.set_option(rs.option.filter_smooth_delta, 1)`
			`# decimation_filter = rs.decimation_filter()`
			`# decimation_filter.set_option(rs.option.filter_magnitude, 2)`

			`def filter_func(frame):`
			`frame = hole_filling_filter.process(frame)`
			`frame = spatial_filter.process(frame)`
			`frame = temporal_filter.process(frame)`
			`# frame = decimation_filter.process(frame)`
			`return frame`

			`return pipeline, filter_func`

			`def main(args):`
			`rospy.init_node("a1_legged_gym_jetson")`

			`input_filter, depth_range = get_input_filter(args)`
			`model_script, model_device = get_encoder_script(args.logdir)`
			`with open(osp.join(args.logdir, "config.json"), "r") as f:`
			`config_dict = json.load(f, object_pairs_hook= OrderedDict)`
			`if config_dict.get("sensor", dict()).get("forward_camera", dict()).get("refresh_duration", None) is not None:`
			`refresh_duration = config_dict["sensor"]["forward_camera"]["refresh_duration"]`
			`ros_rate = rospy.Rate(1.0 / refresh_duration)`
			`rospy.loginfo("Using refresh duration {}s".format(refresh_duration))`
			`else:`
			`ros_rate = rospy.Rate(args.fps)`

			`rs_pipeline, rs_filters = get_started_pipeline(`
			`height= args.height,`
			`width= args.width,`
			`fps= args.fps,`
			`enable_rgb= args.enable_rgb,`
			`)`

			`# gyro_pipeline = rs.pipeline()`
			`# gyro_config = rs.config()`
			`# gyro_config.enable_stream(rs.stream.gyro, rs.format.motion_xyz32f, 200)`
			`# gyro_profile = gyro_pipeline.start(gyro_config)`

			`embedding_publisher = rospy.Publisher(`
			`args.namespace + "/visual_embedding",`
			`Float32MultiArrayStamped,`
			`queue_size= 1,`
			`)`

			`if args.enable_vis:`
			`depth_image_publisher = rospy.Publisher(`
			`args.namespace + "/camera/depth/image_rect_raw",`
			`Image,`
			`queue_size= 1,`
			`)`
			`network_input_publisher = rospy.Publisher(`
			`args.namespace + "/camera/depth/network_input_raw",`
			`Image,`
			`queue_size= 1,`
			`)`
			`if args.enable_rgb:`
			`rgb_image_publisher = rospy.Publisher(`
			`args.namespace + "/camera/color/image_raw",`
			`Image,`
			`queue_size= 1,`
			`)`

			`rospy.loginfo("Depth range is clipped to [{}, {}] and normalized".format(depth_range[0], depth_range[1]))`
			`rospy.loginfo("ROS, model, realsense have been initialized.")`
			`if args.enable_vis:`
			`rospy.loginfo("Visualization enabled, sending depth{} images".format(", rgb" if args.enable_rgb else ""))`
			`try:`
			`embedding_msg = Float32MultiArrayStamped()`
			`embedding_msg.header.frame_id = args.namespace + "/camera_depth_optical_frame"`
			`frame_got = False`
			`while not rospy.is_shutdown():`
			`# Wait for the depth image`
			`frames = rs_pipeline.wait_for_frames(int( \`
			`config_dict["sensor"]["forward_camera"]["latency_range"][1] \`
			`* 1000)) # ms`
			`embedding_msg.header.stamp = rospy.Time.now()`
			`depth_frame = frames.get_depth_frame()`
			`if not depth_frame:`
			`continue`
			`if not frame_got:`
			`frame_got = True`
			`rospy.loginfo("Realsense frame recieved. Sending embeddings...")`
			`if args.enable_rgb:`
			`color_frame = frames.get_color_frame()`
			`# Use this branch to log the time when image is acquired`
			`if args.enable_vis and not color_frame is None:`
			`color_frame = np.asanyarray(color_frame.get_data())`
			`rgb_image_msg = ros_numpy.msgify(Image, color_frame, encoding= "rgb8")`
			`rgb_image_msg.header.stamp = rospy.Time.now()`
			`rgb_image_msg.header.frame_id = args.namespace + "/camera_color_optical_frame"`
			`rgb_image_publisher.publish(rgb_image_msg)`

			`# Process the depth image and publish`
			`depth_frame = rs_filters(depth_frame)`
			`depth_image_ = np.asanyarray(depth_frame.get_data())`
			`depth_image = torch.from_numpy(depth_image_.astype(np.float32)).unsqueeze(0).unsqueeze(0).to(model_device)`
			`depth_image = input_filter(depth_image)`
			`with torch.no_grad():`
			`depth_embedding = model_script(depth_image).reshape(-1).cpu().numpy()`
			`embedding_msg.header.seq += 1`
			`embedding_msg.data = depth_embedding.tolist()`
			`embedding_publisher.publish(embedding_msg)`

			`# Publish the acquired image if needed`
			`if args.enable_vis:`
			`depth_image_msg = ros_numpy.msgify(Image, depth_image_, encoding= "16UC1")`
			`depth_image_msg.header.stamp = rospy.Time.now()`
			`depth_image_msg.header.frame_id = args.namespace + "/camera_depth_optical_frame"`
			`depth_image_publisher.publish(depth_image_msg)`
			`network_input_np = (\`
			`depth_image.detach().cpu().numpy()[0, 0] * (depth_range[1] - depth_range[0]) \`
			`+ depth_range[0]`
			`).astype(np.uint16)`
			`network_input_msg = ros_numpy.msgify(Image, network_input_np, encoding= "16UC1")`
			`network_input_msg.header.stamp = rospy.Time.now()`
			`network_input_msg.header.frame_id = args.namespace + "/camera_depth_optical_frame"`
			`network_input_publisher.publish(network_input_msg)`

			`ros_rate.sleep()`
			`finally:`
			`rs_pipeline.stop()`

			`if __name__ == "__main__":`
			`""" This script is designed to load the model and process the realsense image directly`
			`from realsense SDK without realsense ROS wrapper`
			`"""`
			`import argparse`
			`parser = argparse.ArgumentParser()`
			`parser.add_argument("--namespace",`
			`type= str,`
			`default= "/a112138",`
			`)`
			`parser.add_argument("--logdir",`
			`type= str,`
			`help= "The log directory of the trained model",`
			`)`
			`parser.add_argument("--height",`
			`type= int,`
			`default= 270,`
			`help= "The height of the realsense image",`
			`)`
			`parser.add_argument("--width",`
			`type= int,`
			`default= 480,`
			`help= "The width of the realsense image",`
			`)`
			`parser.add_argument("--fps",`
			`type= int,`
			`default= 30,`
			`help= "The fps of the realsense image",`
			`)`
			`parser.add_argument("--crop_left",`
			`type= int,`
			`default= 60,`
			`help= "num of pixel to crop in the original pyrealsense readings."`
			`)`
			`parser.add_argument("--crop_right",`
			`type= int,`
			`default= 46,`
			`help= "num of pixel to crop in the original pyrealsense readings."`
			`)`
			`parser.add_argument("--crop_top",`
			`type= int,`
			`default= 0,`
			`help= "num of pixel to crop in the original pyrealsense readings."`
			`)`
			`parser.add_argument("--crop_bottom",`
			`type= int,`
			`default= 0,`
			`help= "num of pixel to crop in the original pyrealsense readings."`
			`)`
			`parser.add_argument("--crop_far",`
			`type= float,`
			`default= 3.0,`
			`help= "asside from the config far limit, make all depth readings larger than this value to be 3.0 in un-normalized network input."`
			`)`
			`parser.add_argument("--enable_rgb",`
			`action= "store_true",`
			`help= "Whether to enable rgb image",`
			`)`
			`parser.add_argument("--enable_vis",`
			`action= "store_true",`
			`help= "Whether to publish realsense image",`
			`)`

			`args = parser.parse_args()`
			`main(args)`