diff --git a/lerobot/common/datasets/push_dataset_to_hub/compute_stats.py b/lerobot/common/datasets/push_dataset_to_hub/compute_stats.py index b03a17d9..9417d1dd 100644 --- a/lerobot/common/datasets/push_dataset_to_hub/compute_stats.py +++ b/lerobot/common/datasets/push_dataset_to_hub/compute_stats.py @@ -19,7 +19,7 @@ def get_stats_einops_patterns(dataset: LeRobotDataset | datasets.Dataset): dataloader = torch.utils.data.DataLoader( dataset, - num_workers=0, + num_workers=1, # do not set to 0 when using `load_from_videos` batch_size=2, shuffle=False, ) diff --git a/lerobot/common/datasets/video_utils.py b/lerobot/common/datasets/video_utils.py index 7524e881..2b82b3cb 100644 --- a/lerobot/common/datasets/video_utils.py +++ b/lerobot/common/datasets/video_utils.py @@ -13,6 +13,11 @@ from datasets.features.features import register_feature def load_from_videos( item: dict[str, torch.Tensor], video_frame_keys: list[str], videos_dir: Path, tolerance_s: float ): + """Note: When using data workers (e.g. DataLoader with num_workers>0), do not call this function + in the main process (e.g. by using a second Dataloader with num_workers=0). It will result in a Segmentation Fault. + This probably happens because a memory reference to the video loader is created in the main process and a + subprocess fails to access it. + """ # since video path already contains "videos" (e.g. videos_dir="data/videos", path="videos/episode_0.mp4") data_dir = videos_dir.parent