[skip ci] fix(audio save): separate raw audio files from encoded audio files for simplified deletion

This commit is contained in:
CarolinePascal 2025-04-16 11:37:55 +02:00
parent 9f3a424498
commit 61202761f5
No known key found for this signature in database
2 changed files with 4 additions and 6 deletions

View File

@ -1066,11 +1066,9 @@ class LeRobotDataset(torch.utils.data.Dataset):
shutil.rmtree(self.root / "images")
# delete raw audio files
raw_audio_files = list(self.root.rglob("*.wav"))
for raw_audio_file in raw_audio_files:
raw_audio_file.unlink()
if len(list(raw_audio_file.parent.iterdir())) == 0:
raw_audio_file.parent.rmdir()
img_dir = self.root / "raw_audio"
if img_dir.is_dir():
shutil.rmtree(self.root / "raw_audio")
if not episode_data: # Reset the buffer
self.episode_buffer = self.create_episode_buffer()

View File

@ -56,7 +56,7 @@ TASKS_PATH = "meta/tasks.jsonl"
DEFAULT_VIDEO_PATH = "videos/chunk-{episode_chunk:03d}/{video_key}/episode_{episode_index:06d}.mp4"
DEFAULT_PARQUET_PATH = "data/chunk-{episode_chunk:03d}/episode_{episode_index:06d}.parquet"
DEFAULT_IMAGE_PATH = "images/{image_key}/episode_{episode_index:06d}/frame_{frame_index:06d}.png"
DEFAULT_RAW_AUDIO_PATH = "audio/{audio_key}/episode_{episode_index:06d}.wav"
DEFAULT_RAW_AUDIO_PATH = "raw_audio/{audio_key}/episode_{episode_index:06d}.wav"
DEFAULT_COMPRESSED_AUDIO_PATH = "audio/chunk-{episode_chunk:03d}/{audio_key}/episode_{episode_index:06d}.m4a"
DEFAULT_AUDIO_CHUNK_DURATION = 0.5 # seconds