Add complementary info in the replay buffer

- Added complementary info in the add method - Added complementary info in the sample method
2025-03-31 17:36:35 +02:00 · 2025-03-31 17:36:35 +02:00 · 007fee9230
parent 4a1c26d9ee
commit 007fee9230
1 changed files with 37 additions and 0 deletions
--- a/lerobot/scripts/server/buffer.py
+++ b/lerobot/scripts/server/buffer.py
@ -244,6 +244,11 @@ class ReplayBuffer:
        self.dones = torch.empty((self.capacity,), dtype=torch.bool, device=self.storage_device)
        self.truncateds = torch.empty((self.capacity,), dtype=torch.bool, device=self.storage_device)
        # Initialize complementary_info storage
        self.complementary_info_keys = []
        self.complementary_info_storage = {}
        self.initialized = True
    def __len__(self):
@ -277,6 +282,30 @@ class ReplayBuffer:
        self.dones[self.position] = done
        self.truncateds[self.position] = truncated
        # Store complementary info if provided
        if complementary_info is not None:
            # Initialize storage for new keys on first encounter
            for key, value in complementary_info.items():
                if key not in self.complementary_info_keys:
                    self.complementary_info_keys.append(key)
                    if isinstance(value, torch.Tensor):
                        shape = value.shape if value.ndim > 0 else (1,)
                        self.complementary_info_storage[key] = torch.zeros(
                            (self.capacity, *shape), 
                            dtype=value.dtype, 
                            device=self.storage_device
                        )
                # Store the value
                if key in self.complementary_info_storage:
                    if isinstance(value, torch.Tensor):
                        self.complementary_info_storage[key][self.position] = value
                    else:
                        # For non-tensor values (like grasp_penalty)
                        self.complementary_info_storage[key][self.position] = torch.tensor(
                            value, device=self.storage_device
                    )
        self.position = (self.position + 1) % self.capacity
        self.size = min(self.size + 1, self.capacity)
@ -335,6 +364,13 @@ class ReplayBuffer:
        batch_dones = self.dones[idx].to(self.device).float()
        batch_truncateds = self.truncateds[idx].to(self.device).float()
        # Add complementary_info to batch if it exists
        batch_complementary_info = {}
        if hasattr(self, 'complementary_info_keys') and self.complementary_info_keys:
            for key in self.complementary_info_keys:
                if key in self.complementary_info_storage:
                    batch_complementary_info[key] = self.complementary_info_storage[key][idx].to(self.device)
        return BatchTransition(
            state=batch_state,
            action=batch_actions,
@ -342,6 +378,7 @@ class ReplayBuffer:
            next_state=batch_next_state,
            done=batch_dones,
            truncated=batch_truncateds,
            complementary_info=batch_complementary_info if batch_complementary_info else None,
        )
    @classmethod