From 89b00cc09358fc352b4531e1ddcd1ef0c62d5a61 Mon Sep 17 00:00:00 2001 From: Jade Choghari Date: Fri, 28 Mar 2025 00:36:48 +0300 Subject: [PATCH 1/4] add cuda decoding (torchcodec) --- lerobot/common/datasets/lerobot_dataset.py | 5 +++-- lerobot/scripts/train.py | 21 ++++++++++++++++++++- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py index 6ef955dd..6e8c003f 100644 --- a/lerobot/common/datasets/lerobot_dataset.py +++ b/lerobot/common/datasets/lerobot_dataset.py @@ -736,8 +736,9 @@ class LeRobotDataset(torch.utils.data.Dataset): if len(self.meta.video_keys) > 0: current_ts = item["timestamp"].item() query_timestamps = self._get_query_timestamps(current_ts, query_indices) - video_frames = self._query_videos(query_timestamps, ep_idx) - item = {**video_frames, **item} + if self.video_backend != "torchcodec-gpu": + video_frames = self._query_videos(query_timestamps, ep_idx) + item = {**video_frames, **item} if self.image_transforms is not None: image_keys = self.meta.camera_keys diff --git a/lerobot/scripts/train.py b/lerobot/scripts/train.py index f2b1e29e..35c959ba 100644 --- a/lerobot/scripts/train.py +++ b/lerobot/scripts/train.py @@ -51,7 +51,9 @@ from lerobot.common.utils.wandb_utils import WandBLogger from lerobot.configs import parser from lerobot.configs.train import TrainPipelineConfig from lerobot.scripts.eval import eval_policy - +from lerobot.common.datasets.video_utils import ( + decode_video_frames_torchcodec +) def update_policy( train_metrics: MetricsTracker, @@ -203,6 +205,23 @@ def train(cfg: TrainPipelineConfig): for _ in range(step, cfg.steps): start_time = time.perf_counter() batch = next(dl_iter) + + if dataset.video_backend == "torchcodec-gpu": + # add cuda decoding + for vid_key, timestamps_list in batch['query_timestamps'].items(): + frames_list = [] + # convert list of scalar tensors to a tensor of shape [T] + query_ts = torch.stack(timestamps_list).T # shape: [T] + for i in range(query_ts.shape[0]): + ep_idx = batch['episode_index'][i] + timestamps = query_ts[i].tolist() + video_path = dataset.root / dataset.meta.get_video_file_path(ep_idx, vid_key) + #TODO: (jadechoghari) make sure user has cuda and num_wokers == 0 + frames = decode_video_frames_torchcodec( + video_path, timestamps, dataset.tolerance_s, device="cuda" + ) + frames_list.append(frames.squeeze(0)) + batch[vid_key] = torch.stack(frames_list) train_tracker.dataloading_s = time.perf_counter() - start_time for key in batch: From db157451c345309a1526c5757be79005106f09a7 Mon Sep 17 00:00:00 2001 From: Jade Choghari Date: Fri, 28 Mar 2025 13:27:25 +0300 Subject: [PATCH 2/4] add assert cuda --- lerobot/scripts/train.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lerobot/scripts/train.py b/lerobot/scripts/train.py index 35c959ba..06941824 100644 --- a/lerobot/scripts/train.py +++ b/lerobot/scripts/train.py @@ -207,16 +207,20 @@ def train(cfg: TrainPipelineConfig): batch = next(dl_iter) if dataset.video_backend == "torchcodec-gpu": + # make sure we have a cuda device + assert torch.cuda.is_available(), ( + "CUDA device not available. Please run on a machine with a GPU " + "to enable CUDA decoding when using `video_backend='torchcodec-gpu'`." + ) # add cuda decoding for vid_key, timestamps_list in batch['query_timestamps'].items(): frames_list = [] - # convert list of scalar tensors to a tensor of shape [T] - query_ts = torch.stack(timestamps_list).T # shape: [T] + # convert list of scalar tensors to a tensor of shape [T, B] + query_ts = torch.stack(timestamps_list).T # convert to shape: [B, T] for i in range(query_ts.shape[0]): ep_idx = batch['episode_index'][i] timestamps = query_ts[i].tolist() video_path = dataset.root / dataset.meta.get_video_file_path(ep_idx, vid_key) - #TODO: (jadechoghari) make sure user has cuda and num_wokers == 0 frames = decode_video_frames_torchcodec( video_path, timestamps, dataset.tolerance_s, device="cuda" ) From a1548d514159a7a5869ae0e1b9c9f91e19e236e8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 28 Mar 2025 10:48:06 +0000 Subject: [PATCH 3/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- lerobot/scripts/train.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lerobot/scripts/train.py b/lerobot/scripts/train.py index 06941824..a439e811 100644 --- a/lerobot/scripts/train.py +++ b/lerobot/scripts/train.py @@ -27,6 +27,7 @@ from torch.optim import Optimizer from lerobot.common.datasets.factory import make_dataset from lerobot.common.datasets.sampler import EpisodeAwareSampler from lerobot.common.datasets.utils import cycle +from lerobot.common.datasets.video_utils import decode_video_frames_torchcodec from lerobot.common.envs.factory import make_env from lerobot.common.optim.factory import make_optimizer_and_scheduler from lerobot.common.policies.factory import make_policy @@ -51,9 +52,7 @@ from lerobot.common.utils.wandb_utils import WandBLogger from lerobot.configs import parser from lerobot.configs.train import TrainPipelineConfig from lerobot.scripts.eval import eval_policy -from lerobot.common.datasets.video_utils import ( - decode_video_frames_torchcodec -) + def update_policy( train_metrics: MetricsTracker, @@ -213,12 +212,12 @@ def train(cfg: TrainPipelineConfig): "to enable CUDA decoding when using `video_backend='torchcodec-gpu'`." ) # add cuda decoding - for vid_key, timestamps_list in batch['query_timestamps'].items(): + for vid_key, timestamps_list in batch["query_timestamps"].items(): frames_list = [] # convert list of scalar tensors to a tensor of shape [T, B] query_ts = torch.stack(timestamps_list).T # convert to shape: [B, T] for i in range(query_ts.shape[0]): - ep_idx = batch['episode_index'][i] + ep_idx = batch["episode_index"][i] timestamps = query_ts[i].tolist() video_path = dataset.root / dataset.meta.get_video_file_path(ep_idx, vid_key) frames = decode_video_frames_torchcodec( From 7e681bcc0f320001ebc35f15997dcdffe12803e3 Mon Sep 17 00:00:00 2001 From: Jade Choghari Date: Fri, 28 Mar 2025 20:54:26 +0300 Subject: [PATCH 4/4] quick fix --- lerobot/common/datasets/lerobot_dataset.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py index 6e8c003f..f7a2900f 100644 --- a/lerobot/common/datasets/lerobot_dataset.py +++ b/lerobot/common/datasets/lerobot_dataset.py @@ -739,6 +739,8 @@ class LeRobotDataset(torch.utils.data.Dataset): if self.video_backend != "torchcodec-gpu": video_frames = self._query_videos(query_timestamps, ep_idx) item = {**video_frames, **item} + else: + item["query_timestamps"] = query_timestamps if self.image_transforms is not None: image_keys = self.meta.camera_keys