Update lerobot/common/datasets/video_utils.py

Co-authored-by: Remi <re.cadene@gmail.com>
This commit is contained in:
Jade Choghari 2025-03-04 13:27:34 +03:00 committed by GitHub
parent c03b0db8aa
commit e1732b4954
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 10 additions and 1 deletions

View File

@ -161,7 +161,16 @@ def decode_video_frames_torchcodec(
device: str = "cpu", device: str = "cpu",
log_loaded_timestamps: bool = False, log_loaded_timestamps: bool = False,
) -> torch.Tensor: ) -> torch.Tensor:
"""Loads frames associated with the requested timestamps of a video using torchcodec.""" """Loads frames associated with the requested timestamps of a video using torchcodec.
Note: Setting device="cuda" outside the main process, e.g. in data loader workers, will lead to CUDA initialization errors.
Note: Video benefits from inter-frame compression. Instead of storing every frame individually,
the encoder stores a reference frame (or a key frame) and subsequent frames as differences relative to
that key frame. As a consequence, to access a requested frame, we need to load the preceding key frame,
and all subsequent frames until reaching the requested frame. The number of key frames in a video
can be adjusted during encoding to take into account decoding time and video size in bytes.
"""
video_path = str(video_path) video_path = str(video_path)
# initialize video decoder # initialize video decoder
decoder = VideoDecoder(video_path, device=device) decoder = VideoDecoder(video_path, device=device)