Merge pull request #91 from UT-Austin-RPL/retro

jakegrigsby · web-flow · commit b8450e2b6f88 · 2025-08-13T23:56:46.000-05:00
clipped sliding window attention w/ sink, clamped linear adv filter
diff --git a/amago/agent.py b/amago/agent.py
@@ -64,6 +64,37 @@ def binary_filter(adv: torch.Tensor, threshold: float = 0.0) -> torch.Tensor:
     return adv > threshold
 
 
+@gin.configurable
+def leaky_relu_filter(
+    adv: torch.Tensor,
+    beta: float = 2.0,
+    tau: float = 1e-2,
+    neg_slope: float = 0.05,
+    target_f0: float = 1e-2,
+    clip_weights_low: Optional[float] = 1e-7,
+    clip_weights_high: Optional[float] = 10.0,
+) -> torch.Tensor:
+    """Weights policy regression data using a leaky relu ramp with f(0)=target_f0.
+
+    Args:
+        adv: Tensor of advantages (Batch, Length, Gammas, 1)
+
+    Keyword Args:
+        beta: Positive scale controlling slope.
+        tau: Advantage hinge location for switching from leak to main slope.
+        neg_slope: Slope for advantages below tau.
+        target_f0: Desired weight at adv=0 (before clipping).
+        clip_weights_low: If provided, clip output weights below this value. Defaults to None.
+        clip_weights_high: If provided, clip output weights above this value. Defaults to None.
+    """
+    bias = target_f0 + neg_slope * tau / beta
+    x = (adv - tau) / beta
+    weights = bias + F.leaky_relu(x, negative_slope=neg_slope)
+    if clip_weights_low is not None or clip_weights_high is not None:
+        weights = torch.clamp(weights, min=clip_weights_low, max=clip_weights_high)
+    return weights
+
+
 @gin.configurable
 def exp_filter(
     adv: torch.Tensor,
@@ -289,8 +320,6 @@ def __init__(
         self.critics = critic_type(**ac_kwargs, num_critics=num_critics)
         self.target_critics = critic_type(**ac_kwargs, num_critics=num_critics)
         self.maximized_critics = critic_type(**ac_kwargs, num_critics=num_critics)
-        if self.multibinary:
-            ac_kwargs["cont_dist_kind"] = "multibinary"
         self.actor = actor_type(**ac_kwargs)
         self.target_actor = actor_type(**ac_kwargs)
         # full weight copy to targets
diff --git a/amago/cli_utils.py b/amago/cli_utils.py
@@ -464,7 +464,6 @@ def make_experiment_learn_only(experiment: amago.Experiment) -> amago.Experiment
     experiment.parallel_actors = 1
     experiment.always_save_latest = True
     experiment.always_load_latest = False
-    experiment.has_dset_edit_rights = True
     return experiment
 
 
@@ -488,6 +487,7 @@ def make_experiment_collect_only(experiment: amago.Experiment) -> amago.Experime
     experiment.epochs = max(experiment.epochs, 1_000_000)
     # do not delete anything from the collection process
     experiment.has_dset_edit_rights = False
+    experiment.init_dsets()
     return experiment
 
 
diff --git a/amago/nets/traj_encoders.py b/amago/nets/traj_encoders.py
@@ -532,6 +532,7 @@ def reset_hidden_state(
         hidden_state.reset(idxs=dones)
         return hidden_state
 
+    @torch.compile
     def forward(
         self,
         seq: torch.Tensor,
diff --git a/amago/nets/transformer.py b/amago/nets/transformer.py
@@ -347,6 +347,56 @@ def sliding_window_mask_mod(b, h, q_idx, kv_idx):
         )
 
 
+@gin.configurable
+class ClippedSlidingSinkAttention(FlexAttention):
+    """
+    Sliding-window attention with optional attention sink and logit clipping.
+    """
+
+    def __init__(
+        self,
+        causal: bool,
+        dropout: float,
+        window_size: int = gin.REQUIRED,
+        logit_clip: float = 0.0,
+        sink_size: int = 0,
+        sink_bias: float = 0.0,
+    ):
+        assert window_size > 0, "window_size must be > 0"
+        self.window_size = int(window_size)
+        self.logit_clip = float(logit_clip) if logit_clip is not None else 0.0
+        self.sink_size = int(sink_size)
+        self.sink_bias = float(sink_bias)
+
+        has_sink = self.sink_size > 0
+        has_sink_bias = has_sink and (self.sink_bias != 0.0)
+        clip_active = self.logit_clip > 0.0
+
+        def sliding_window_with_sink_mask_mod(
+            b: int, h: int, q_idx: int, kv_idx: int
+        ) -> bool:
+            dq = q_idx - kv_idx
+            in_window = (dq >= 0) & (dq <= self.window_size)
+            in_sink = (kv_idx < self.sink_size) if has_sink else False
+            return in_window | in_sink
+
+        def score_with_sink_and_clip(
+            score: torch.Tensor, b: int, h: int, q_idx: int, kv_idx: int
+        ) -> torch.Tensor:
+            if has_sink_bias and kv_idx < self.sink_size:
+                score = score + score.new_tensor(self.sink_bias)
+            if clip_active:
+                score = torch.clamp(score, -self.logit_clip, self.logit_clip)
+            return score
+
+        super().__init__(
+            score_mod=score_with_sink_and_clip,
+            mask_mod=sliding_window_with_sink_mask_mod,
+            causal=causal,
+            dropout=dropout,
+        )
+
+
 @gin.configurable
 class SigmaReparam(nn.Linear):
     """SigmaReparam nn.Linear alternative.