diff --git a/examples/pufferl_clifford_curriculum.py b/examples/pufferl_clifford_curriculum.py
new file mode 100644
index 0000000000..38f593e8ba
--- /dev/null
+++ b/examples/pufferl_clifford_curriculum.py
@@ -0,0 +1,495 @@
+#!/usr/bin/env python3
+
+import argparse
+import math
+import os
+import sys
+from collections import defaultdict
+
+import numpy as np
+import torch
+
+REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if REPO_ROOT not in sys.path:
+    sys.path.insert(0, REPO_ROOT)
+
+from pufferlib.ocean.clifford import Clifford
+from pufferlib.pytorch import layer_init
+
+N_QUBITS = 4
+START_DIFFICULTY = 1.0
+MAX_DIFFICULTY = 1000.0
+SUCCESS_THRESHOLD = 0.90
+DIFFICULTY_STEP = 0.25
+SINGLE_QUBIT_COST = 0.01
+GOAL_BONUS = 0.0
+REWARD_MODE = "hamming_left"
+HAMMING_LEFT_SCALE = 0.5
+
+NUM_ENVS = 2048
+TOTAL_TIMESTEPS = 50_000_000
+LEARNING_RATE = 2.5e-4
+HIDDEN_SIZE = 256
+HIDDEN_LAYERS = 2
+N_STEPS = 128
+MINIBATCH_SIZE = 8192
+UPDATE_EPOCHS = 4
+GAMMA = 0.99
+GAE_LAMBDA = 0.95
+ENT_COEF = 0.0
+CLIP_COEF = 0.15
+VF_CLIP_COEF = 0.2
+TARGET_KL = 0.0
+
+EVAL_FREQ_STEPS = 98_304
+PROGRESSION_EVAL_EPISODES = 50
+GREEDY_EVAL_DIFFICULTY_CUTOFF = 20.0
+MIN_ADVANCE_EVALS = 1
+EMA_SR_ALPHA = 0.9
+MAX_STEPS_SLACK = 64
+MIN_MAX_STEPS = 32
+FORCE_MAX_STEPS_AFTER_DIFFICULTY = 12.0
+FORCE_MAX_STEPS_VALUE = 1000
+
+TRAIN_CONFIG_DEFAULTS = {
+    "env": "puffer_clifford",
+    "torch_deterministic": True,
+    "cpu_offload": False,
+    "optimizer": "muon",
+    "precision": "float32",
+    "anneal_lr": True,
+    "min_lr_ratio": 0.0,
+    "vf_coef": 2.0,
+    "max_grad_norm": 1.5,
+    "adam_beta1": 0.95,
+    "adam_beta2": 0.999,
+    "adam_eps": 1e-12,
+    "data_dir": "experiments",
+    "checkpoint_interval": 200,
+    "max_minibatch_size": 32768,
+    "compile": False,
+    "use_rnn": False,
+    "vtrace_rho_clip": 1.0,
+    "vtrace_c_clip": 1.0,
+    "prio_alpha": 0.8,
+    "prio_beta0": 0.2,
+}
+
+PANEL_STAT_KEYS = {
+    "curriculum/difficulty",
+    "curriculum/rollout_success_rate",
+    "curriculum/decision_success_rate",
+    "curriculum/ema_success_rate",
+    "curriculum/mean_reward",
+    "curriculum/mean_cz",
+    "curriculum/evals_since_advance",
+    "curriculum/consecutive_above_threshold",
+    "curriculum/ent_coef",
+    "perf",
+    "score",
+    "episode_return",
+    "episode_length",
+    "mean_cz",
+    "success_rate",
+}
+
+
+def default_device():
+    return "cuda" if torch.cuda.is_available() else "cpu"
+
+
+class Policy(torch.nn.Module):
+    def __init__(self, env, hidden_size=HIDDEN_SIZE, hidden_layers=HIDDEN_LAYERS):
+        super().__init__()
+        obs_size = int(np.prod(env.single_observation_space.shape))
+        num_actions = int(env.single_action_space.n)
+        hidden_layers = max(int(hidden_layers), 1)
+
+        backbone = [torch.nn.LayerNorm(obs_size)]
+        in_features = obs_size
+        for _ in range(hidden_layers):
+            backbone.append(layer_init(torch.nn.Linear(in_features, int(hidden_size))))
+            backbone.append(torch.nn.GELU())
+            in_features = int(hidden_size)
+        self.backbone = torch.nn.Sequential(*backbone)
+        self.action_head = torch.nn.Linear(in_features, num_actions)
+        self.value_head = torch.nn.Linear(in_features, 1)
+        layer_init(self.action_head, std=0.01)
+        layer_init(self.value_head, std=1.0)
+
+    def forward_eval(self, observations, state=None):
+        hidden = self.backbone(observations.float().view(observations.shape[0], -1))
+        logits = self.action_head(hidden)
+        values = self.value_head(hidden)
+        return logits, values
+
+    def forward(self, observations, state=None):
+        return self.forward_eval(observations, state)
+
+def make_train_config(args):
+    return {
+        **TRAIN_CONFIG_DEFAULTS,
+        "total_timesteps": int(args.total_timesteps),
+        "learning_rate": float(args.learning_rate),
+        "batch_size": int(args.num_envs) * N_STEPS,
+        "bptt_horizon": N_STEPS,
+        "minibatch_size": max(MINIBATCH_SIZE, N_STEPS),
+        "update_epochs": UPDATE_EPOCHS,
+        "device": args.device,
+        "seed": int(args.seed),
+        "gamma": GAMMA,
+        "gae_lambda": GAE_LAMBDA,
+        "ent_coef": float(args.ent_coef),
+        "clip_coef": CLIP_COEF,
+        "vf_clip_coef": VF_CLIP_COEF,
+        "target_kl": TARGET_KL,
+    }
+
+
+def make_env(num_envs, difficulty, max_steps, seed):
+    return Clifford(
+        num_envs=int(num_envs),
+        n_qubits=N_QUBITS,
+        difficulty=float(difficulty),
+        max_steps=int(max_steps),
+        single_qubit_cost=SINGLE_QUBIT_COST,
+        goal_bonus=GOAL_BONUS,
+        reward_mode=REWARD_MODE,
+        hamming_left_scale=HAMMING_LEFT_SCALE,
+        use_reset_pool=True,
+        seed=int(seed),
+    )
+
+
+def run_greedy_eval(policy, env, device, episodes, seed):
+    completed = 0
+    successes = 0
+    rewards = []
+    completed_cz_counts = []
+
+    obs, _ = env.reset(seed=seed)
+    episode_rewards = np.zeros(env.num_agents, dtype=np.float32)
+    episode_cz_counts = np.zeros(env.num_agents, dtype=np.int32)
+
+    policy_was_training = policy.training
+    policy.eval()
+
+    with torch.no_grad():
+        while completed < episodes:
+            obs_t = torch.as_tensor(obs, device=device)
+            logits, _values = policy.forward_eval(obs_t)
+            actions = torch.argmax(logits, dim=-1).cpu().numpy().astype(np.int32, copy=False)
+            obs, step_rewards, terminals, truncations, _info = env.step(actions)
+            episode_rewards += step_rewards
+            for idx, action in enumerate(actions):
+                if env._actions[int(action)][0] == "cz":
+                    episode_cz_counts[idx] += 1
+
+            done_mask = np.logical_or(terminals, truncations)
+            done_indices = np.flatnonzero(done_mask)
+            for idx in done_indices:
+                completed += 1
+                rewards.append(float(episode_rewards[idx]))
+                completed_cz_counts.append(int(episode_cz_counts[idx]))
+                if terminals[idx]:
+                    successes += 1
+                episode_rewards[idx] = 0.0
+                episode_cz_counts[idx] = 0
+                if completed >= episodes:
+                    break
+
+    if policy_was_training:
+        policy.train()
+
+    return {
+        "success_rate": successes / max(completed, 1),
+        "mean_reward": float(np.mean(rewards)) if rewards else 0.0,
+        "mean_cz": float(np.mean(completed_cz_counts)) if completed_cz_counts else 0.0,
+    }
+
+
+def _metric_values(rollout_stats, key):
+    values = rollout_stats.get(key, [])
+    if isinstance(values, np.ndarray):
+        values = values.tolist()
+    if isinstance(values, (list, tuple)):
+        return [float(v) for v in values]
+    if values is None:
+        return []
+    return [float(values)]
+
+
+def merge_rollout_stats(rollout_stats, extra_metrics):
+    merged = defaultdict(list)
+    for source in (rollout_stats, extra_metrics):
+        if not source:
+            continue
+        for key, value in source.items():
+            if isinstance(value, np.ndarray):
+                value = value.tolist()
+            if isinstance(value, (list, tuple)):
+                merged[key].extend(value)
+            else:
+                merged[key].append(value)
+    return merged
+
+
+def rollout_progression_metrics(rollout_stats):
+    if not rollout_stats:
+        return None
+    episode_count = sum(_metric_values(rollout_stats, "episode_count"))
+    if episode_count <= 0.0:
+        return None
+
+    success_count = sum(_metric_values(rollout_stats, "success_count"))
+    reward_sum = sum(_metric_values(rollout_stats, "episode_return_sum"))
+    episode_cz_sum = sum(_metric_values(rollout_stats, "episode_cz_sum"))
+    return {
+        "success_rate": success_count / episode_count,
+        "mean_reward": reward_sum / episode_count,
+        "mean_cz": episode_cz_sum / episode_count,
+    }
+
+
+def compute_max_steps(difficulty):
+    max_steps = int(math.ceil(max(float(difficulty), 0.0) - 1e-12)) + MAX_STEPS_SLACK
+    if (
+        FORCE_MAX_STEPS_AFTER_DIFFICULTY is not None
+        and FORCE_MAX_STEPS_VALUE > 0
+        and difficulty >= FORCE_MAX_STEPS_AFTER_DIFFICULTY
+    ):
+        max_steps = FORCE_MAX_STEPS_VALUE
+    return max(max_steps, MIN_MAX_STEPS, 1)
+
+
+def update_curriculum_stats(trainer, values):
+    for key, value in values.items():
+        trainer.stats[key] = value
+        trainer.last_stats[key] = value
+
+
+def curriculum_stats(
+    difficulty,
+    rollout_success_rate,
+    decision_success_rate,
+    ema_success_rate,
+    mean_reward,
+    mean_cz,
+    evals_since_advance,
+    consecutive_above_threshold,
+    ent_coef,
+):
+    return {
+        "curriculum/difficulty": float(difficulty),
+        "curriculum/mean_reward": float(mean_reward),
+        "curriculum/decision_success_rate": float(decision_success_rate),
+        "curriculum/ema_success_rate": float(ema_success_rate),
+        "curriculum/rollout_success_rate": float(rollout_success_rate),
+        "curriculum/mean_cz": float(mean_cz),
+        "curriculum/evals_since_advance": float(evals_since_advance),
+        "curriculum/consecutive_above_threshold": float(consecutive_above_threshold),
+        "curriculum/ent_coef": float(ent_coef),
+    }
+
+
+def prune_panel_stats(stats):
+    if not stats:
+        return
+    for key in list(stats.keys()):
+        if key not in PANEL_STAT_KEYS:
+            del stats[key]
+
+
+def install_panel_filter(trainer):
+    original_print_dashboard = trainer.print_dashboard
+
+    def filtered_print_dashboard(*args, **kwargs):
+        if trainer.stats:
+            trainer.raw_stats = dict(trainer.stats)
+        elif trainer.last_stats:
+            trainer.raw_stats = dict(trainer.last_stats)
+        prune_panel_stats(trainer.stats)
+        prune_panel_stats(trainer.last_stats)
+        return original_print_dashboard(*args, **kwargs)
+
+    trainer.print_dashboard = filtered_print_dashboard
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Minimal Ocean Clifford trainer focused on the current working 4-qubit regime"
+    )
+    parser.add_argument("--device", default=default_device())
+    parser.add_argument("--num-envs", type=int, default=NUM_ENVS)
+    parser.add_argument("--total-timesteps", type=int, default=TOTAL_TIMESTEPS)
+    parser.add_argument("--seed", type=int, default=0)
+    parser.add_argument("--learning-rate", type=float, default=LEARNING_RATE)
+    parser.add_argument("--ent-coef", type=float, default=ENT_COEF)
+    parser.add_argument("--difficulty-step", type=float, default=DIFFICULTY_STEP)
+    return parser.parse_args()
+
+
+def validate_args(args):
+    if args.num_envs <= 0:
+        raise ValueError("num_envs must be positive")
+    if args.total_timesteps < int(args.num_envs) * N_STEPS:
+        raise ValueError(f"total_timesteps must be at least {int(args.num_envs) * N_STEPS}")
+    if args.learning_rate <= 0.0:
+        raise ValueError("learning_rate must be positive")
+    if args.ent_coef < 0.0:
+        raise ValueError("ent_coef must be non-negative")
+    if args.difficulty_step <= 0.0:
+        raise ValueError("difficulty_step must be positive")
+
+
+def main():
+    args = parse_args()
+    validate_args(args)
+    from pufferlib import pufferl
+
+    current_difficulty = START_DIFFICULTY
+    current_max_steps = compute_max_steps(current_difficulty)
+
+    train_env = make_env(args.num_envs, current_difficulty, current_max_steps, args.seed)
+    eval_env = make_env(
+        min(PROGRESSION_EVAL_EPISODES, int(args.num_envs)),
+        current_difficulty,
+        current_max_steps,
+        args.seed + 10_000,
+    )
+
+    policy = Policy(train_env).to(args.device)
+    trainer = pufferl.PuffeRL(make_train_config(args), train_env, policy)
+    install_panel_filter(trainer)
+
+    last_progress_step = 0
+    ema_success_rate = 0.0
+    ema_initialized = False
+    consecutive_above_threshold = 0
+    evals_since_advance = 0
+    last_rollout_success_rate = 0.0
+    last_decision_success_rate = 0.0
+    last_mean_reward = 0.0
+    last_mean_cz = 0.0
+
+    update_curriculum_stats(
+        trainer,
+        curriculum_stats(
+            current_difficulty,
+            last_rollout_success_rate,
+            last_decision_success_rate,
+            ema_success_rate,
+            last_mean_reward,
+            last_mean_cz,
+            evals_since_advance,
+            consecutive_above_threshold,
+            trainer.config["ent_coef"],
+        ),
+    )
+    trainer.print_dashboard(clear=True)
+
+    try:
+        while trainer.epoch < trainer.total_epochs:
+            update_curriculum_stats(
+                trainer,
+                curriculum_stats(
+                    current_difficulty,
+                    last_rollout_success_rate,
+                    last_decision_success_rate,
+                    ema_success_rate,
+                    last_mean_reward,
+                    last_mean_cz,
+                    evals_since_advance,
+                    consecutive_above_threshold,
+                    trainer.config["ent_coef"],
+                ),
+            )
+            trainer.evaluate()
+            trainer.train()
+
+            if trainer.global_step - last_progress_step < EVAL_FREQ_STEPS:
+                continue
+            last_progress_step = trainer.global_step
+
+            rollout_stats = merge_rollout_stats(getattr(trainer, "raw_stats", None), train_env.flush_logs())
+            rollout_metrics = rollout_progression_metrics(rollout_stats)
+            if rollout_metrics is None:
+                continue
+
+            decision_metrics = rollout_metrics
+            if current_difficulty < GREEDY_EVAL_DIFFICULTY_CUTOFF:
+                eval_env.set_difficulty(current_difficulty)
+                eval_env.set_max_steps(current_max_steps)
+                decision_metrics = run_greedy_eval(
+                    policy=policy,
+                    env=eval_env,
+                    device=args.device,
+                    episodes=PROGRESSION_EVAL_EPISODES,
+                    seed=args.seed + trainer.epoch,
+                )
+
+            raw_success_rate = float(rollout_metrics["success_rate"])
+            if not ema_initialized:
+                ema_success_rate = raw_success_rate
+                ema_initialized = True
+            else:
+                ema_success_rate = (
+                    float(EMA_SR_ALPHA) * ema_success_rate
+                    + (1.0 - float(EMA_SR_ALPHA)) * raw_success_rate
+                )
+
+            decision_success_rate = float(decision_metrics["success_rate"])
+            advancement_success_rate = (
+                decision_success_rate
+                if current_difficulty < GREEDY_EVAL_DIFFICULTY_CUTOFF
+                else ema_success_rate
+            )
+
+            if advancement_success_rate >= SUCCESS_THRESHOLD:
+                consecutive_above_threshold += 1
+            else:
+                consecutive_above_threshold = 0
+
+            if (
+                consecutive_above_threshold >= MIN_ADVANCE_EVALS
+                and current_difficulty < MAX_DIFFICULTY
+            ):
+                current_difficulty = min(MAX_DIFFICULTY, current_difficulty + args.difficulty_step)
+                current_max_steps = compute_max_steps(current_difficulty)
+                train_env.set_difficulty(current_difficulty)
+                train_env.set_max_steps(current_max_steps)
+                eval_env.set_difficulty(current_difficulty)
+                eval_env.set_max_steps(current_max_steps)
+                consecutive_above_threshold = 0
+                evals_since_advance = 0
+            else:
+                evals_since_advance += 1
+
+            last_rollout_success_rate = raw_success_rate
+            last_decision_success_rate = decision_success_rate
+            last_mean_reward = float(rollout_metrics["mean_reward"])
+            last_mean_cz = float(rollout_metrics["mean_cz"])
+
+            update_curriculum_stats(
+                trainer,
+                curriculum_stats(
+                    current_difficulty,
+                    last_rollout_success_rate,
+                    last_decision_success_rate,
+                    ema_success_rate,
+                    last_mean_reward,
+                    last_mean_cz,
+                    evals_since_advance,
+                    consecutive_above_threshold,
+                    trainer.config["ent_coef"],
+                ),
+            )
+
+        trainer.print_dashboard()
+    finally:
+        trainer.close()
+        eval_env.close()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pufferlib/ocean/clifford/__init__.py b/pufferlib/ocean/clifford/__init__.py
new file mode 100644
index 0000000000..52850904ee
--- /dev/null
+++ b/pufferlib/ocean/clifford/__init__.py
@@ -0,0 +1,11 @@
+from .reference import ReferenceCliffordEnv
+
+try:
+    from .clifford import Clifford
+except ImportError:
+    Clifford = None
+
+__all__ = [
+    "Clifford",
+    "ReferenceCliffordEnv",
+]
diff --git a/pufferlib/ocean/clifford/binding.c b/pufferlib/ocean/clifford/binding.c
new file mode 100644
index 0000000000..8f7b6718ba
--- /dev/null
+++ b/pufferlib/ocean/clifford/binding.c
@@ -0,0 +1,411 @@
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+
+#include <Python.h>
+#include <numpy/arrayobject.h>
+#include <stdlib.h>
+
+#include "clifford.h"
+
+static int require_contiguous_array(PyObject* obj, int typenum, int ndim, const char* name, PyArrayObject** out) {
+    if (!PyObject_TypeCheck(obj, &PyArray_Type)) {
+        PyErr_Format(PyExc_TypeError, "%s must be a NumPy array", name);
+        return 0;
+    }
+    PyArrayObject* arr = (PyArrayObject*)obj;
+    if (!PyArray_ISCONTIGUOUS(arr)) {
+        PyErr_Format(PyExc_ValueError, "%s must be contiguous", name);
+        return 0;
+    }
+    if (PyArray_TYPE(arr) != typenum) {
+        PyErr_Format(PyExc_TypeError, "%s has unexpected dtype", name);
+        return 0;
+    }
+    if (ndim >= 0 && PyArray_NDIM(arr) != ndim) {
+        PyErr_Format(PyExc_ValueError, "%s must have ndim=%d", name, ndim);
+        return 0;
+    }
+    *out = arr;
+    return 1;
+}
+
+static PyObject* require_kwarg(PyObject* kwargs, const char* name) {
+    PyObject* obj = PyDict_GetItemString(kwargs, name);
+    if (obj == NULL) {
+        PyErr_Format(PyExc_TypeError, "missing required keyword argument '%s'", name);
+    }
+    return obj;
+}
+
+static CliffordVecEnv* unpack_handle(PyObject* args) {
+    PyObject* handle_obj = PyTuple_GetItem(args, 0);
+    if (!PyLong_Check(handle_obj)) {
+        PyErr_SetString(PyExc_TypeError, "handle must be an integer");
+        return NULL;
+    }
+    CliffordVecEnv* vec = (CliffordVecEnv*)PyLong_AsVoidPtr(handle_obj);
+    if (vec == NULL) {
+        PyErr_SetString(PyExc_ValueError, "invalid native Clifford handle");
+        return NULL;
+    }
+    return vec;
+}
+
+static PyObject* py_vec_init(PyObject* self, PyObject* args, PyObject* kwargs) {
+    (void)self;
+    if (PyTuple_Size(args) != 7) {
+        PyErr_SetString(PyExc_TypeError, "vec_init requires 7 positional arguments");
+        return NULL;
+    }
+
+    PyArrayObject *obs_arr = NULL, *act_arr = NULL, *rew_arr = NULL, *term_arr = NULL, *trunc_arr = NULL;
+    if (!require_contiguous_array(PyTuple_GetItem(args, 0), NPY_UINT8, 2, "observations", &obs_arr)) return NULL;
+    if (!require_contiguous_array(PyTuple_GetItem(args, 1), NPY_INT32, 1, "actions", &act_arr)) return NULL;
+    if (!require_contiguous_array(PyTuple_GetItem(args, 2), NPY_FLOAT32, 1, "rewards", &rew_arr)) return NULL;
+    if (!require_contiguous_array(PyTuple_GetItem(args, 3), NPY_BOOL, 1, "terminals", &term_arr)) return NULL;
+    if (!require_contiguous_array(PyTuple_GetItem(args, 4), NPY_BOOL, 1, "truncations", &trunc_arr)) return NULL;
+
+    int num_envs = (int)PyLong_AsLong(PyTuple_GetItem(args, 5));
+    int seed = (int)PyLong_AsLong(PyTuple_GetItem(args, 6));
+    if (PyErr_Occurred()) {
+        return NULL;
+    }
+    if (num_envs <= 0) {
+        PyErr_SetString(PyExc_ValueError, "num_envs must be positive");
+        return NULL;
+    }
+
+    PyArrayObject *gate_kinds = NULL, *action_q0 = NULL, *action_q1 = NULL;
+    if (!require_contiguous_array(require_kwarg(kwargs, "gate_kinds"), NPY_INT32, 1, "gate_kinds", &gate_kinds)) return NULL;
+    if (!require_contiguous_array(require_kwarg(kwargs, "action_q0"), NPY_INT32, 1, "action_q0", &action_q0)) return NULL;
+    if (!require_contiguous_array(require_kwarg(kwargs, "action_q1"), NPY_INT32, 1, "action_q1", &action_q1)) return NULL;
+
+    int num_actions = (int)PyArray_DIM(gate_kinds, 0);
+    if (num_actions <= 0 || PyArray_DIM(action_q0, 0) != num_actions || PyArray_DIM(action_q1, 0) != num_actions) {
+        PyErr_SetString(PyExc_ValueError, "action arrays must have the same positive length");
+        return NULL;
+    }
+
+    int n_qubits = (int)PyLong_AsLong(require_kwarg(kwargs, "n_qubits"));
+    double difficulty = PyFloat_AsDouble(require_kwarg(kwargs, "difficulty"));
+    int max_steps = (int)PyLong_AsLong(require_kwarg(kwargs, "max_steps"));
+    float single_qubit_cost = (float)PyFloat_AsDouble(require_kwarg(kwargs, "single_qubit_cost"));
+    float goal_bonus = (float)PyFloat_AsDouble(require_kwarg(kwargs, "goal_bonus"));
+    int reward_mode = (int)PyLong_AsLong(require_kwarg(kwargs, "reward_mode"));
+    float hamming_left_scale = (float)PyFloat_AsDouble(require_kwarg(kwargs, "hamming_left_scale"));
+    PyObject* reset_pool_enabled_obj = PyDict_GetItemString(kwargs, "use_reset_pool");
+    int reset_pool_enabled = reset_pool_enabled_obj == NULL ? 1 : (int)PyObject_IsTrue(reset_pool_enabled_obj);
+    if (PyErr_Occurred()) {
+        return NULL;
+    }
+    if (n_qubits <= 0 || 2 * n_qubits > 64) {
+        PyErr_SetString(PyExc_ValueError, "native Clifford env requires 0 < n_qubits and 2*n_qubits <= 64");
+        return NULL;
+    }
+    if (max_steps <= 0) {
+        PyErr_SetString(PyExc_ValueError, "max_steps must be positive");
+        return NULL;
+    }
+    if (reward_mode != REWARD_GATE_COST && reward_mode != REWARD_HAMMING_LEFT) {
+        PyErr_SetString(
+            PyExc_ValueError,
+            "reward_mode must be gate_cost or hamming_left"
+        );
+        return NULL;
+    }
+    if (hamming_left_scale < 0.0f) {
+        PyErr_SetString(PyExc_ValueError, "hamming_left_scale must be non-negative");
+        return NULL;
+    }
+
+    const int dim = 2 * n_qubits;
+    const int obs_size = dim * dim;
+    if (PyArray_DIM(obs_arr, 0) != num_envs || PyArray_DIM(obs_arr, 1) != obs_size) {
+        PyErr_SetString(PyExc_ValueError, "observations must have shape (num_envs, (2*n_qubits)^2)");
+        return NULL;
+    }
+    if (PyArray_DIM(act_arr, 0) != num_envs || PyArray_DIM(rew_arr, 0) != num_envs ||
+            PyArray_DIM(term_arr, 0) != num_envs || PyArray_DIM(trunc_arr, 0) != num_envs) {
+        PyErr_SetString(PyExc_ValueError, "action and buffer arrays must have length num_envs");
+        return NULL;
+    }
+
+    CliffordVecEnv* vec = (CliffordVecEnv*)calloc(1, sizeof(CliffordVecEnv));
+    if (vec == NULL) {
+        return PyErr_NoMemory();
+    }
+    vec->num_envs = num_envs;
+    vec->n_qubits = n_qubits;
+    vec->dim = dim;
+    vec->obs_size = obs_size;
+    set_difficulty_level(vec, difficulty);
+    vec->max_steps = max_steps;
+    vec->num_actions = num_actions;
+    vec->reset_pool_enabled = reset_pool_enabled;
+    vec->single_qubit_cost = single_qubit_cost;
+    vec->goal_bonus = goal_bonus;
+    vec->reward_mode = reward_mode;
+    vec->hamming_left_scale = hamming_left_scale;
+    init_observation_tables();
+
+    vec->actions = (CliffordAction*)calloc((size_t)num_actions, sizeof(CliffordAction));
+    vec->envs = (CliffordEnv*)calloc((size_t)num_envs, sizeof(CliffordEnv));
+    if (vec->actions == NULL || vec->envs == NULL) {
+        free(vec->actions);
+        free(vec->envs);
+        free(vec);
+        return PyErr_NoMemory();
+    }
+
+    int* gate_kind_ptr = (int*)PyArray_DATA(gate_kinds);
+    int* q0_ptr = (int*)PyArray_DATA(action_q0);
+    int* q1_ptr = (int*)PyArray_DATA(action_q1);
+    for (int i = 0; i < num_actions; ++i) {
+        vec->actions[i].gate_kind = gate_kind_ptr[i];
+        vec->actions[i].q0 = q0_ptr[i];
+        vec->actions[i].q1 = q1_ptr[i];
+    }
+    for (int col = 0; col < vec->dim; ++col) {
+        vec->identity_cols[col] = 1ULL << col;
+    }
+    rebuild_reset_pool(vec, (uint64_t)(unsigned int)seed);
+
+    unsigned char* obs_ptr = (unsigned char*)PyArray_DATA(obs_arr);
+    int* act_ptr = (int*)PyArray_DATA(act_arr);
+    float* rew_ptr = (float*)PyArray_DATA(rew_arr);
+    unsigned char* term_ptr = (unsigned char*)PyArray_DATA(term_arr);
+    unsigned char* trunc_ptr = (unsigned char*)PyArray_DATA(trunc_arr);
+    uint64_t seed_state = (uint64_t)(unsigned int)seed;
+    for (int env_idx = 0; env_idx < num_envs; ++env_idx) {
+        CliffordEnv* env = &vec->envs[env_idx];
+        env->cols = (uint64_t*)calloc((size_t)vec->dim, sizeof(uint64_t));
+        if (env->cols == NULL) {
+            for (int j = 0; j < env_idx; ++j) {
+                free(vec->envs[j].cols);
+            }
+            free(vec->reset_pool_cols);
+            free(vec->actions);
+            free(vec->envs);
+            free(vec);
+            return PyErr_NoMemory();
+        }
+        env->observations = obs_ptr + (env_idx * vec->obs_size);
+        env->actions = act_ptr + env_idx;
+        env->rewards = rew_ptr + env_idx;
+        env->terminals = term_ptr + env_idx;
+        env->truncations = trunc_ptr + env_idx;
+        rng_seed(&env->rng, splitmix64_next(&seed_state) ^ (uint64_t)(env_idx + 1));
+        reset_single(vec, env);
+    }
+    return PyLong_FromVoidPtr(vec);
+}
+
+static PyObject* py_vec_reset(PyObject* self, PyObject* args) {
+    (void)self;
+    if (PyTuple_Size(args) != 2) {
+        PyErr_SetString(PyExc_TypeError, "vec_reset requires handle and seed");
+        return NULL;
+    }
+    CliffordVecEnv* vec = unpack_handle(args);
+    if (vec == NULL) {
+        return NULL;
+    }
+    int seed = (int)PyLong_AsLong(PyTuple_GetItem(args, 1));
+    if (PyErr_Occurred()) {
+        return NULL;
+    }
+    if (seed >= 0) {
+        rebuild_reset_pool(vec, (uint64_t)(unsigned int)seed);
+    }
+    uint64_t seed_state = (uint64_t)(unsigned int)seed;
+    for (int env_idx = 0; env_idx < vec->num_envs; ++env_idx) {
+        if (seed >= 0) {
+            rng_seed(&vec->envs[env_idx].rng, splitmix64_next(&seed_state) ^ (uint64_t)(env_idx + 1));
+        }
+        vec->envs[env_idx].terminals[0] = 0;
+        vec->envs[env_idx].truncations[0] = 0;
+        vec->envs[env_idx].rewards[0] = 0.0f;
+        reset_single(vec, &vec->envs[env_idx]);
+    }
+    Py_RETURN_NONE;
+}
+
+static PyObject* py_vec_step(PyObject* self, PyObject* args) {
+    (void)self;
+    if (PyTuple_Size(args) != 1) {
+        PyErr_SetString(PyExc_TypeError, "vec_step requires handle");
+        return NULL;
+    }
+    CliffordVecEnv* vec = unpack_handle(args);
+    if (vec == NULL) {
+        return NULL;
+    }
+    for (int env_idx = 0; env_idx < vec->num_envs; ++env_idx) {
+        CliffordEnv* env = &vec->envs[env_idx];
+        env->terminals[0] = 0;
+        env->truncations[0] = 0;
+        env->rewards[0] = 0.0f;
+        step_single(vec, env);
+    }
+    Py_RETURN_NONE;
+}
+
+static PyObject* py_vec_close(PyObject* self, PyObject* args) {
+    (void)self;
+    if (PyTuple_Size(args) != 1) {
+        PyErr_SetString(PyExc_TypeError, "vec_close requires handle");
+        return NULL;
+    }
+    CliffordVecEnv* vec = unpack_handle(args);
+    if (vec == NULL) {
+        return NULL;
+    }
+    for (int env_idx = 0; env_idx < vec->num_envs; ++env_idx) {
+        free(vec->envs[env_idx].cols);
+    }
+    free(vec->reset_pool_cols);
+    free(vec->actions);
+    free(vec->envs);
+    free(vec);
+    Py_RETURN_NONE;
+}
+
+static PyObject* py_vec_set_difficulty(PyObject* self, PyObject* args) {
+    (void)self;
+    if (PyTuple_Size(args) != 2) {
+        PyErr_SetString(PyExc_TypeError, "vec_set_difficulty requires handle and difficulty");
+        return NULL;
+    }
+    CliffordVecEnv* vec = unpack_handle(args);
+    if (vec == NULL) {
+        return NULL;
+    }
+    double difficulty = PyFloat_AsDouble(PyTuple_GetItem(args, 1));
+    if (PyErr_Occurred()) {
+        return NULL;
+    }
+    set_difficulty_level(vec, difficulty);
+    rebuild_reset_pool(vec, rng_next_u64(&vec->pool_rng));
+    Py_RETURN_NONE;
+}
+
+static PyObject* py_vec_set_max_steps(PyObject* self, PyObject* args) {
+    (void)self;
+    if (PyTuple_Size(args) != 2) {
+        PyErr_SetString(PyExc_TypeError, "vec_set_max_steps requires handle and max_steps");
+        return NULL;
+    }
+    CliffordVecEnv* vec = unpack_handle(args);
+    if (vec == NULL) {
+        return NULL;
+    }
+    vec->max_steps = (int)PyLong_AsLong(PyTuple_GetItem(args, 1));
+    if (PyErr_Occurred()) {
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
+static PyObject* py_vec_set_matrix(PyObject* self, PyObject* args) {
+    (void)self;
+    if (PyTuple_Size(args) != 3) {
+        PyErr_SetString(PyExc_TypeError, "vec_set_matrix requires handle, env_index, matrix");
+        return NULL;
+    }
+    CliffordVecEnv* vec = unpack_handle(args);
+    if (vec == NULL) {
+        return NULL;
+    }
+    int env_index = (int)PyLong_AsLong(PyTuple_GetItem(args, 1));
+    if (PyErr_Occurred()) {
+        return NULL;
+    }
+    if (env_index < 0 || env_index >= vec->num_envs) {
+        PyErr_SetString(PyExc_IndexError, "env_index out of range");
+        return NULL;
+    }
+    PyArrayObject* matrix = NULL;
+    if (!require_contiguous_array(PyTuple_GetItem(args, 2), NPY_UINT8, 2, "matrix", &matrix)) {
+        return NULL;
+    }
+    if (!set_matrix_from_dense(
+            vec,
+            &vec->envs[env_index],
+            (unsigned char*)PyArray_DATA(matrix),
+            (int)PyArray_DIM(matrix, 0),
+            (int)PyArray_DIM(matrix, 1))) {
+        PyErr_SetString(PyExc_ValueError, "matrix shape does not match native Clifford env");
+        return NULL;
+    }
+    vec->envs[env_index].rewards[0] = 0.0f;
+    vec->envs[env_index].terminals[0] = 0;
+    vec->envs[env_index].truncations[0] = 0;
+    Py_RETURN_NONE;
+}
+
+static PyObject* py_vec_log(PyObject* self, PyObject* args) {
+    (void)self;
+    if (PyTuple_Size(args) != 1) {
+        PyErr_SetString(PyExc_TypeError, "vec_log requires handle");
+        return NULL;
+    }
+    CliffordVecEnv* vec = unpack_handle(args);
+    if (vec == NULL) {
+        return NULL;
+    }
+    PyObject* out = PyDict_New();
+    if (vec->log.n <= 0.0f) {
+        return out;
+    }
+    const float inv_n = 1.0f / vec->log.n;
+    PyDict_SetItemString(out, "perf", PyFloat_FromDouble(vec->log.perf * inv_n));
+    PyDict_SetItemString(out, "score", PyFloat_FromDouble(vec->log.score * inv_n));
+    PyDict_SetItemString(out, "episode_return", PyFloat_FromDouble(vec->log.episode_return * inv_n));
+    PyDict_SetItemString(out, "episode_length", PyFloat_FromDouble(vec->log.episode_length * inv_n));
+    PyDict_SetItemString(out, "mean_cz", PyFloat_FromDouble(vec->log.episode_cz_sum * inv_n));
+    PyDict_SetItemString(out, "success_rate", PyFloat_FromDouble(vec->log.success_rate * inv_n));
+    PyDict_SetItemString(out, "n", PyFloat_FromDouble(vec->log.n));
+    PyDict_SetItemString(out, "episode_count", PyFloat_FromDouble(vec->log.n));
+    PyDict_SetItemString(out, "episode_return_sum", PyFloat_FromDouble(vec->log.episode_return));
+    PyDict_SetItemString(out, "episode_length_sum", PyFloat_FromDouble(vec->log.episode_length));
+    PyDict_SetItemString(out, "episode_cz_sum", PyFloat_FromDouble(vec->log.episode_cz_sum));
+    PyDict_SetItemString(out, "episode_cz_max", PyFloat_FromDouble(vec->log.episode_cz_max));
+    PyDict_SetItemString(out, "success_count", PyFloat_FromDouble(vec->log.success_count));
+    PyDict_SetItemString(out, "success_step_sum", PyFloat_FromDouble(vec->log.success_step_sum));
+    PyDict_SetItemString(out, "success_step_sq_sum", PyFloat_FromDouble(vec->log.success_step_sq_sum));
+    PyDict_SetItemString(
+        out,
+        "mean_success_cz",
+        PyFloat_FromDouble(vec->log.success_count > 0.0f ? (vec->log.success_cz_sum / vec->log.success_count) : 0.0)
+    );
+    PyDict_SetItemString(out, "success_cz_sum", PyFloat_FromDouble(vec->log.success_cz_sum));
+    PyDict_SetItemString(out, "success_cz_max", PyFloat_FromDouble(vec->log.success_cz_max));
+    PyDict_SetItemString(out, "success_step_min", PyFloat_FromDouble(vec->log.success_step_min));
+    PyDict_SetItemString(out, "success_step_max", PyFloat_FromDouble(vec->log.success_step_max));
+    memset(&vec->log, 0, sizeof(CliffordLog));
+    return out;
+}
+
+static PyMethodDef MODULE_METHODS[] = {
+    {"vec_init", (PyCFunction)py_vec_init, METH_VARARGS | METH_KEYWORDS, "Create a native Clifford vec env"},
+    {"vec_reset", py_vec_reset, METH_VARARGS, "Reset the native Clifford vec env"},
+    {"vec_step", py_vec_step, METH_VARARGS, "Step the native Clifford vec env"},
+    {"vec_close", py_vec_close, METH_VARARGS, "Close the native Clifford vec env"},
+    {"vec_log", py_vec_log, METH_VARARGS, "Aggregate native Clifford logs"},
+    {"vec_set_difficulty", py_vec_set_difficulty, METH_VARARGS, "Update native Clifford difficulty"},
+    {"vec_set_max_steps", py_vec_set_max_steps, METH_VARARGS, "Update native Clifford max_steps"},
+    {"vec_set_matrix", py_vec_set_matrix, METH_VARARGS, "Set the matrix for one native Clifford env"},
+    {NULL, NULL, 0, NULL},
+};
+
+static struct PyModuleDef MODULE_DEF = {
+    PyModuleDef_HEAD_INIT,
+    "binding",
+    "Native Clifford environment binding",
+    -1,
+    MODULE_METHODS,
+};
+
+PyMODINIT_FUNC PyInit_binding(void) {
+    import_array();
+    return PyModule_Create(&MODULE_DEF);
+}
diff --git a/pufferlib/ocean/clifford/clifford.h b/pufferlib/ocean/clifford/clifford.h
new file mode 100644
index 0000000000..6ea4324e0f
--- /dev/null
+++ b/pufferlib/ocean/clifford/clifford.h
@@ -0,0 +1,579 @@
+#ifndef PUFFERLIB_OCEAN_CLIFFORD_CLIFFORD_H
+#define PUFFERLIB_OCEAN_CLIFFORD_CLIFFORD_H
+
+#include <math.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define GATE_H 0
+#define GATE_S 1
+#define GATE_V 2
+#define GATE_HS 3
+#define GATE_HV 4
+#define GATE_CZ 5
+
+#define REWARD_GATE_COST 0
+#define REWARD_HAMMING_LEFT 1
+
+typedef struct {
+    int gate_kind;
+    int q0;
+    int q1;
+} CliffordAction;
+
+typedef struct {
+    float perf;
+    float score;
+    float episode_return;
+    float episode_length;
+    float episode_cz_sum;
+    float episode_cz_max;
+    float success_rate;
+    float n;
+    float success_count;
+    float success_step_sum;
+    float success_step_sq_sum;
+    float success_cz_sum;
+    float success_cz_max;
+    float success_step_min;
+    float success_step_max;
+} CliffordLog;
+
+typedef struct {
+    uint64_t state;
+    double next_gaussian;
+    int has_gaussian;
+} XorShift64;
+
+typedef struct {
+    uint64_t* cols;
+    unsigned char* observations;
+    int* actions;
+    float* rewards;
+    unsigned char* terminals;
+    unsigned char* truncations;
+    float episode_return;
+    int episode_length;
+    int episode_cz_count;
+    int steps;
+    int episode_max_steps;
+    XorShift64 rng;
+} CliffordEnv;
+
+typedef struct {
+    CliffordEnv* envs;
+    int num_envs;
+    int n_qubits;
+    int dim;
+    int obs_size;
+    int difficulty;
+    float difficulty_fraction;
+    int max_steps;
+    int num_actions;
+    CliffordAction* actions;
+    int reset_pool_enabled;
+    uint64_t* reset_pool_cols;
+    int reset_pool_size;
+    int reset_pool_walk_steps;
+    int reset_tail_steps;
+    int reset_refresh_stride;
+    int reset_refresh_credit;
+    XorShift64 pool_rng;
+    uint64_t identity_cols[64];
+    float single_qubit_cost;
+    float goal_bonus;
+    int reward_mode;
+    float hamming_left_scale;
+    CliffordLog log;
+} CliffordVecEnv;
+
+static uint64_t BYTE_TO_BYTES64[256];
+static uint64_t ROWMASK_ACCUM[8][256];
+static int OBS_TABLES_READY = 0;
+
+static inline uint64_t splitmix64_next(uint64_t* state) {
+    uint64_t z = (*state += 0x9E3779B97F4A7C15ULL);
+    z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ULL;
+    z = (z ^ (z >> 27)) * 0x94D049BB133111EBULL;
+    return z ^ (z >> 31);
+}
+
+static inline void rng_seed(XorShift64* rng, uint64_t seed) {
+    if (seed == 0) {
+        seed = 0x123456789ABCDEFULL;
+    }
+    rng->state = seed;
+    rng->next_gaussian = 0.0;
+    rng->has_gaussian = 0;
+}
+
+static inline uint64_t rng_next_u64(XorShift64* rng) {
+    uint64_t x = rng->state;
+    x ^= x >> 12;
+    x ^= x << 25;
+    x ^= x >> 27;
+    rng->state = x;
+    return x * 2685821657736338717ULL;
+}
+
+static inline int rng_below(XorShift64* rng, int upper) {
+    if (upper <= 1) {
+        return 0;
+    }
+    return (int)(rng_next_u64(rng) % (uint64_t)upper);
+}
+
+static inline int popcount_u64(uint64_t value) {
+#if defined(__GNUC__) || defined(__clang__)
+    return __builtin_popcountll((unsigned long long)value);
+#else
+    int count = 0;
+    while (value != 0ULL) {
+        value &= value - 1ULL;
+        count += 1;
+    }
+    return count;
+#endif
+}
+
+static inline float rng_float01(XorShift64* rng) {
+    return (float)((rng_next_u64(rng) >> 40) * (1.0 / 16777216.0));
+}
+
+static inline void set_difficulty_level(CliffordVecEnv* vec, double difficulty_level) {
+    if (difficulty_level < 0.0) {
+        difficulty_level = 0.0;
+    }
+    double floor_level = floor(difficulty_level + 1e-12);
+    vec->difficulty = (int)floor_level;
+    vec->difficulty_fraction = (float)(difficulty_level - floor_level);
+    if (vec->difficulty_fraction <= 1e-6f) {
+        vec->difficulty_fraction = 0.0f;
+    } else if (vec->difficulty_fraction >= 1.0f - 1e-6f) {
+        vec->difficulty += 1;
+        vec->difficulty_fraction = 0.0f;
+    }
+}
+
+static inline int effective_reset_difficulty(const CliffordVecEnv* vec) {
+    return vec->difficulty + (vec->difficulty_fraction > 0.0f ? 1 : 0);
+}
+
+static inline int sample_reset_difficulty(const CliffordVecEnv* vec, XorShift64* rng) {
+    if (vec->difficulty_fraction <= 0.0f) {
+        return vec->difficulty;
+    }
+    return vec->difficulty + (rng_float01(rng) < vec->difficulty_fraction ? 1 : 0);
+}
+
+static inline void init_observation_tables(void) {
+    if (OBS_TABLES_READY) {
+        return;
+    }
+    for (int value = 0; value < 256; ++value) {
+        uint64_t expanded = 0ULL;
+        for (int bit = 0; bit < 8; ++bit) {
+            expanded |= (uint64_t)((value >> bit) & 1U) << (bit * 8);
+        }
+        BYTE_TO_BYTES64[value] = expanded;
+    }
+    for (int col = 0; col < 8; ++col) {
+        const uint64_t row_bit = (uint64_t)(1U << col);
+        for (int value = 0; value < 256; ++value) {
+            uint64_t packed = 0ULL;
+            for (int row = 0; row < 8; ++row) {
+                if ((value >> row) & 1U) {
+                    packed |= row_bit << (row * 8);
+                }
+            }
+            ROWMASK_ACCUM[col][value] = packed;
+        }
+    }
+    OBS_TABLES_READY = 1;
+}
+
+static inline void copy_identity_cols(const CliffordVecEnv* vec, CliffordEnv* env) {
+    memcpy(env->cols, vec->identity_cols, (size_t)vec->dim * sizeof(uint64_t));
+}
+
+static inline void copy_cols(const CliffordVecEnv* vec, uint64_t* dst, const uint64_t* src) {
+    memcpy(dst, src, (size_t)vec->dim * sizeof(uint64_t));
+}
+
+static inline void reset_episode_state(const CliffordVecEnv* vec, CliffordEnv* env) {
+    env->steps = 0;
+    env->episode_max_steps = vec->max_steps;
+    env->episode_return = 0.0f;
+    env->episode_length = 0;
+    env->episode_cz_count = 0;
+}
+
+static inline int is_identity(const CliffordVecEnv* vec, const CliffordEnv* env) {
+    for (int col = 0; col < vec->dim; ++col) {
+        if (env->cols[col] != vec->identity_cols[col]) {
+            return 0;
+        }
+    }
+    return 1;
+}
+
+static inline int sample_action(const CliffordVecEnv* vec, XorShift64* rng) {
+    if (vec->num_actions <= 0) {
+        return -1;
+    }
+    return rng_below(rng, vec->num_actions);
+}
+
+static inline void apply_action(const CliffordVecEnv* vec, CliffordEnv* env, int action_idx) {
+    const CliffordAction* action = &vec->actions[action_idx];
+    const int n = vec->n_qubits;
+    const int q = action->q0;
+    const uint64_t x = env->cols[q];
+    const uint64_t z = env->cols[n + q];
+    if (action->gate_kind == GATE_H) {
+        env->cols[q] = z;
+        env->cols[n + q] = x;
+    } else if (action->gate_kind == GATE_S) {
+        env->cols[q] = x;
+        env->cols[n + q] = z ^ x;
+    } else if (action->gate_kind == GATE_V) {
+        env->cols[q] = x ^ z;
+        env->cols[n + q] = z;
+    } else if (action->gate_kind == GATE_HS) {
+        env->cols[q] = z;
+        env->cols[n + q] = x ^ z;
+    } else if (action->gate_kind == GATE_HV) {
+        env->cols[q] = x ^ z;
+        env->cols[n + q] = x;
+    } else {
+        env->cols[n + action->q0] ^= env->cols[action->q1];
+        env->cols[n + action->q1] ^= env->cols[action->q0];
+    }
+}
+
+static inline int compute_reset_tail_steps(int difficulty) {
+    if (difficulty < 8) {
+        return 0;
+    }
+    int tail = difficulty / 4;
+    if (tail < 8) {
+        tail = 8;
+    }
+    if (tail > 32) {
+        tail = 32;
+    }
+    if (tail >= difficulty) {
+        tail = difficulty / 2;
+    }
+    if (tail < 1) {
+        tail = 1;
+    }
+    return tail;
+}
+
+static inline int floor_power_of_two(int value) {
+    int power = 1;
+    while ((power << 1) > 0 && (power << 1) <= value) {
+        power <<= 1;
+    }
+    return power;
+}
+
+static inline int compute_reset_pool_size(const CliffordVecEnv* vec) {
+    if (!vec->reset_pool_enabled) {
+        return 0;
+    }
+    if (vec->difficulty_fraction > 0.0f) {
+        return 0;
+    }
+    if (vec->difficulty < 8) {
+        return 0;
+    }
+    const size_t bytes_per_entry = (size_t)vec->dim * sizeof(uint64_t);
+    const size_t target_bytes = 16u << 20;
+    int desired = vec->num_envs * 8;
+    if (desired < 2048) {
+        desired = 2048;
+    }
+    if (desired > 32768) {
+        desired = 32768;
+    }
+    if (bytes_per_entry > 0) {
+        const size_t cap_by_memory = target_bytes / bytes_per_entry;
+        if (cap_by_memory > 0 && (size_t)desired > cap_by_memory) {
+            desired = (int)cap_by_memory;
+        }
+    }
+    if (desired < 256) {
+        desired = 256;
+    }
+    return floor_power_of_two(desired);
+}
+
+static inline void generate_random_walk_cols(
+    const CliffordVecEnv* vec,
+    XorShift64* rng,
+    int walk_steps,
+    uint64_t* out_cols
+) {
+    CliffordEnv env = {
+        .cols = out_cols,
+    };
+    if (walk_steps <= 0) {
+        copy_identity_cols(vec, &env);
+        return;
+    }
+    do {
+        copy_identity_cols(vec, &env);
+        for (int step = 0; step < walk_steps; ++step) {
+            const int action_idx = sample_action(vec, rng);
+            if (action_idx < 0) {
+                break;
+            }
+            apply_action(vec, &env, action_idx);
+        }
+    } while (is_identity(vec, &env));
+}
+
+static inline void rebuild_reset_pool(CliffordVecEnv* vec, uint64_t seed) {
+    free(vec->reset_pool_cols);
+    vec->reset_pool_cols = NULL;
+    const int pool_difficulty = effective_reset_difficulty(vec);
+    if (!vec->reset_pool_enabled) {
+        vec->reset_pool_size = 0;
+        vec->reset_pool_walk_steps = 0;
+        vec->reset_tail_steps = 0;
+        vec->reset_refresh_stride = 0;
+        vec->reset_refresh_credit = 0;
+        rng_seed(&vec->pool_rng, seed ^ 0x9E3779B97F4A7C15ULL ^ (uint64_t)pool_difficulty);
+        return;
+    }
+    vec->reset_pool_size = compute_reset_pool_size(vec);
+    vec->reset_tail_steps = compute_reset_tail_steps(pool_difficulty);
+    vec->reset_pool_walk_steps = pool_difficulty - vec->reset_tail_steps;
+    vec->reset_refresh_stride = 32;
+    vec->reset_refresh_credit = 0;
+    rng_seed(&vec->pool_rng, seed ^ 0x9E3779B97F4A7C15ULL ^ (uint64_t)pool_difficulty);
+    if (vec->reset_pool_size <= 0 || vec->reset_pool_walk_steps <= 0) {
+        vec->reset_pool_size = 0;
+        vec->reset_pool_walk_steps = 0;
+        vec->reset_tail_steps = 0;
+        return;
+    }
+    vec->reset_pool_cols = (uint64_t*)calloc(
+        (size_t)vec->reset_pool_size * (size_t)vec->dim,
+        sizeof(uint64_t)
+    );
+    if (vec->reset_pool_cols == NULL) {
+        vec->reset_pool_size = 0;
+        vec->reset_pool_walk_steps = 0;
+        vec->reset_tail_steps = 0;
+        return;
+    }
+    for (int slot = 0; slot < vec->reset_pool_size; ++slot) {
+        generate_random_walk_cols(
+            vec,
+            &vec->pool_rng,
+            vec->reset_pool_walk_steps,
+            vec->reset_pool_cols + ((size_t)slot * (size_t)vec->dim)
+        );
+    }
+}
+
+static inline void maybe_refresh_reset_pool(CliffordVecEnv* vec) {
+    if (vec->reset_pool_cols == NULL || vec->reset_pool_size <= 0 || vec->reset_pool_walk_steps <= 0) {
+        return;
+    }
+    vec->reset_refresh_credit += 1;
+    if (vec->reset_refresh_credit < vec->reset_refresh_stride) {
+        return;
+    }
+    vec->reset_refresh_credit -= vec->reset_refresh_stride;
+    const int slot = (int)(rng_next_u64(&vec->pool_rng) & (uint64_t)(vec->reset_pool_size - 1));
+    generate_random_walk_cols(
+        vec,
+        &vec->pool_rng,
+        vec->reset_pool_walk_steps,
+        vec->reset_pool_cols + ((size_t)slot * (size_t)vec->dim)
+    );
+}
+
+static inline void write_observation(const CliffordVecEnv* vec, CliffordEnv* env) {
+    const int dim = vec->dim;
+    int row_block = 0;
+    for (; row_block + 8 <= dim; row_block += 8) {
+        int col_block = 0;
+        for (; col_block + 8 <= dim; col_block += 8) {
+            uint64_t row_masks = 0ULL;
+            for (int col = 0; col < 8; ++col) {
+                const uint8_t column_chunk = (uint8_t)((env->cols[col_block + col] >> row_block) & 0xFFU);
+                row_masks |= ROWMASK_ACCUM[col][column_chunk];
+            }
+            for (int row = 0; row < 8; ++row) {
+                const uint8_t row_mask = (uint8_t)((row_masks >> (row * 8)) & 0xFFU);
+                const uint64_t expanded = BYTE_TO_BYTES64[row_mask];
+                memcpy(env->observations + ((row_block + row) * dim) + col_block, &expanded, 8);
+            }
+        }
+        for (int row = 0; row < 8; ++row) {
+            const int dst_row = row_block + row;
+            for (int col = col_block; col < dim; ++col) {
+                env->observations[dst_row * dim + col] = (unsigned char)((env->cols[col] >> dst_row) & 1ULL);
+            }
+        }
+    }
+    for (int row = row_block; row < dim; ++row) {
+        for (int col = 0; col < dim; ++col) {
+            env->observations[row * dim + col] = (unsigned char)((env->cols[col] >> row) & 1ULL);
+        }
+    }
+}
+
+static inline void reset_single(CliffordVecEnv* vec, CliffordEnv* env) {
+    reset_episode_state(vec, env);
+    const int reset_difficulty = sample_reset_difficulty(vec, &env->rng);
+    if (reset_difficulty <= 0) {
+        copy_identity_cols(vec, env);
+        write_observation(vec, env);
+        return;
+    }
+
+    if (vec->reset_pool_cols != NULL && vec->reset_pool_size > 0) {
+        do {
+            maybe_refresh_reset_pool(vec);
+            const int slot = (int)(rng_next_u64(&env->rng) & (uint64_t)(vec->reset_pool_size - 1));
+            copy_cols(
+                vec,
+                env->cols,
+                vec->reset_pool_cols + ((size_t)slot * (size_t)vec->dim)
+            );
+            for (int step = 0; step < vec->reset_tail_steps; ++step) {
+                const int action_idx = sample_action(vec, &env->rng);
+                if (action_idx < 0) {
+                    break;
+                }
+                apply_action(vec, env, action_idx);
+            }
+        } while (is_identity(vec, env));
+        write_observation(vec, env);
+        return;
+    }
+
+    do {
+        copy_identity_cols(vec, env);
+        for (int step = 0; step < reset_difficulty; ++step) {
+            const int action_idx = sample_action(vec, &env->rng);
+            if (action_idx < 0) {
+                break;
+            }
+            apply_action(vec, env, action_idx);
+        }
+    } while (is_identity(vec, env));
+
+    write_observation(vec, env);
+}
+
+static inline int set_matrix_from_dense(CliffordVecEnv* vec, CliffordEnv* env, const unsigned char* data, int rows, int cols) {
+    if (rows != vec->dim || cols != vec->dim) {
+        return 0;
+    }
+    for (int col = 0; col < vec->dim; ++col) {
+        uint64_t packed = 0ULL;
+        for (int row = 0; row < vec->dim; ++row) {
+            packed |= ((uint64_t)(data[row * cols + col] & 1U)) << row;
+        }
+        env->cols[col] = packed;
+    }
+    reset_episode_state(vec, env);
+    write_observation(vec, env);
+    return 1;
+}
+
+static inline int identity_hamming_distance(const CliffordVecEnv* vec, const CliffordEnv* env) {
+    int distance = 0;
+    for (int col = 0; col < vec->dim; ++col) {
+        distance += popcount_u64(env->cols[col] ^ vec->identity_cols[col]);
+    }
+    return distance;
+}
+
+static inline float normalized_identity_hamming_distance(const CliffordVecEnv* vec, const CliffordEnv* env) {
+    return (float)identity_hamming_distance(vec, env) / (float)vec->obs_size;
+}
+
+static inline void add_log(CliffordVecEnv* vec, CliffordEnv* env, int success) {
+    vec->log.perf += success ? 1.0f : 0.0f;
+    vec->log.score += env->episode_return;
+    vec->log.episode_return += env->episode_return;
+    vec->log.episode_length += (float)env->episode_length;
+    vec->log.episode_cz_sum += (float)env->episode_cz_count;
+    if ((float)env->episode_cz_count > vec->log.episode_cz_max) {
+        vec->log.episode_cz_max = (float)env->episode_cz_count;
+    }
+    vec->log.success_rate += success ? 1.0f : 0.0f;
+    vec->log.n += 1.0f;
+    if (success) {
+        vec->log.success_count += 1.0f;
+        vec->log.success_step_sum += (float)env->episode_length;
+        vec->log.success_step_sq_sum += (float)(env->episode_length * env->episode_length);
+        vec->log.success_cz_sum += (float)env->episode_cz_count;
+        if ((float)env->episode_cz_count > vec->log.success_cz_max) {
+            vec->log.success_cz_max = (float)env->episode_cz_count;
+        }
+        if (vec->log.success_count <= 1.0f) {
+            vec->log.success_step_min = (float)env->episode_length;
+            vec->log.success_step_max = (float)env->episode_length;
+        } else {
+            if ((float)env->episode_length < vec->log.success_step_min) {
+                vec->log.success_step_min = (float)env->episode_length;
+            }
+            if ((float)env->episode_length > vec->log.success_step_max) {
+                vec->log.success_step_max = (float)env->episode_length;
+            }
+        }
+    }
+}
+
+static inline float gate_cost_reward(const CliffordVecEnv* vec, int gate_kind) {
+    return gate_kind == GATE_CZ ? -1.0f : -vec->single_qubit_cost;
+}
+
+static inline float step_single(CliffordVecEnv* vec, CliffordEnv* env) {
+    int action_idx = env->actions[0] % vec->num_actions;
+    if (action_idx < 0) {
+        action_idx += vec->num_actions;
+    }
+    const CliffordAction* action = &vec->actions[action_idx];
+    const int gate_kind = action->gate_kind;
+    apply_action(vec, env, action_idx);
+    env->steps += 1;
+    env->episode_length += 1;
+    if (gate_kind == GATE_CZ) {
+        env->episode_cz_count += 1;
+    }
+
+    int terminated = is_identity(vec, env);
+    int truncated = (!terminated && env->steps >= env->episode_max_steps);
+    float reward = gate_cost_reward(vec, gate_kind);
+    if (vec->reward_mode == REWARD_HAMMING_LEFT) {
+        reward -= vec->hamming_left_scale * normalized_identity_hamming_distance(vec, env);
+    }
+    if (terminated) {
+        reward += vec->goal_bonus;
+    }
+    env->episode_return += reward;
+    env->rewards[0] = reward;
+    env->terminals[0] = (unsigned char)terminated;
+    env->truncations[0] = (unsigned char)truncated;
+
+    if (terminated || truncated) {
+        add_log(vec, env, terminated);
+        reset_single(vec, env);
+    } else {
+        write_observation(vec, env);
+    }
+    return reward;
+}
+
+#endif
diff --git a/pufferlib/ocean/clifford/clifford.py b/pufferlib/ocean/clifford/clifford.py
new file mode 100644
index 0000000000..62adf023b3
--- /dev/null
+++ b/pufferlib/ocean/clifford/clifford.py
@@ -0,0 +1,205 @@
+import gymnasium
+import numpy as np
+
+import pufferlib
+from . import binding
+from .reference import _build_actions, _is_symplectic
+
+_GATE_KIND = {
+    "h": 0,
+    "s": 1,
+    "v": 2,
+    "hs": 3,
+    "hv": 4,
+    "cz": 5,
+}
+_REWARD_MODE = {
+    "gate_cost": 0,
+    "hamming_left": 1,
+}
+
+
+class Clifford(pufferlib.PufferEnv):
+    def __init__(
+        self,
+        num_envs=1,
+        n_qubits=6,
+        difficulty=10,
+        max_steps=200,
+        single_qubit_cost=0.01,
+        goal_bonus=0.0,
+        reward_mode="gate_cost",
+        hamming_left_scale=0.5,
+        use_reset_pool=True,
+        log_interval=128,
+        buf=None,
+        seed=0,
+        render_mode=None,
+    ):
+        if n_qubits <= 0:
+            raise ValueError("n_qubits must be positive")
+        if 2 * n_qubits > 64:
+            raise ValueError("native Clifford env requires 2*n_qubits <= 64")
+        if num_envs <= 0:
+            raise ValueError("num_envs must be positive")
+        if difficulty < 0:
+            raise ValueError("difficulty must be non-negative")
+        if max_steps <= 0:
+            raise ValueError("max_steps must be positive")
+        if goal_bonus < 0.0:
+            raise ValueError("goal_bonus must be non-negative")
+        if reward_mode not in _REWARD_MODE:
+            raise ValueError(f"reward_mode must be one of {sorted(_REWARD_MODE)}")
+        if hamming_left_scale < 0.0:
+            raise ValueError("hamming_left_scale must be non-negative")
+
+        self.n_qubits = int(n_qubits)
+        self.dim = 2 * self.n_qubits
+        self._actions = _build_actions(self.n_qubits)
+        self.log_interval = int(log_interval)
+        self.render_mode = render_mode
+        self.num_agents = int(num_envs)
+        self.agents_per_batch = self.num_agents
+
+        obs_size = self.dim * self.dim
+        self.single_observation_space = gymnasium.spaces.Box(
+            low=0,
+            high=1,
+            shape=(obs_size,),
+            dtype=np.uint8,
+        )
+        self.single_action_space = gymnasium.spaces.Discrete(len(self._actions))
+        super().__init__(buf)
+
+        gate_kinds = np.empty(len(self._actions), dtype=np.int32)
+        action_q0 = np.empty(len(self._actions), dtype=np.int32)
+        action_q1 = np.empty(len(self._actions), dtype=np.int32)
+        for idx, (gate, q0, q1) in enumerate(self._actions):
+            gate_kinds[idx] = _GATE_KIND[gate]
+            action_q0[idx] = q0
+            action_q1[idx] = q1
+
+        self.tick = 0
+        self.c_envs = binding.vec_init(
+            self.observations,
+            self.actions,
+            self.rewards,
+            self.terminals,
+            self.truncations,
+            self.num_agents,
+            int(seed),
+            n_qubits=self.n_qubits,
+            difficulty=float(difficulty),
+            max_steps=int(max_steps),
+            gate_kinds=gate_kinds,
+            action_q0=action_q0,
+            action_q1=action_q1,
+            single_qubit_cost=float(single_qubit_cost),
+            goal_bonus=float(goal_bonus),
+            reward_mode=int(_REWARD_MODE[reward_mode]),
+            hamming_left_scale=float(hamming_left_scale),
+            use_reset_pool=bool(use_reset_pool),
+        )
+
+    def reset(self, seed=0):
+        self.tick = 0
+        binding.vec_reset(self.c_envs, -1 if seed is None else int(seed))
+        return self.observations, []
+
+    def step(self, actions):
+        actions = np.asarray(actions, dtype=np.int32)
+        if actions.shape != (self.num_agents,):
+            raise ValueError(f"Expected actions with shape ({self.num_agents},), got {actions.shape}")
+
+        self.actions[:] = actions
+        binding.vec_step(self.c_envs)
+        self.tick += 1
+
+        info = []
+        if self.tick % self.log_interval == 0:
+            metrics = binding.vec_log(self.c_envs)
+            if metrics:
+                info.append(metrics)
+
+        return (
+            self.observations,
+            self.rewards,
+            self.terminals,
+            self.truncations,
+            info,
+        )
+
+    def close(self):
+        if getattr(self, "c_envs", None) is not None:
+            binding.vec_close(self.c_envs)
+            self.c_envs = None
+
+    def flush_logs(self):
+        metrics = binding.vec_log(self.c_envs)
+        return metrics or None
+
+    def set_difficulty(self, difficulty):
+        difficulty = float(difficulty)
+        if difficulty < 0.0:
+            raise ValueError("difficulty must be non-negative")
+        binding.vec_set_difficulty(self.c_envs, difficulty)
+
+    def set_max_steps(self, max_steps):
+        max_steps = int(max_steps)
+        if max_steps <= 0:
+            raise ValueError("max_steps must be positive")
+        binding.vec_set_max_steps(self.c_envs, max_steps)
+
+    def set_matrix(self, matrix, env_index=0):
+        matrix_u8 = np.ascontiguousarray(np.asarray(matrix, dtype=np.uint8))
+        expected_shape = (self.dim, self.dim)
+        if matrix_u8.shape != expected_shape:
+            raise ValueError(f"Expected matrix shape {expected_shape}, got {matrix_u8.shape}")
+        if not _is_symplectic(matrix_u8):
+            raise ValueError("matrix must be symplectic")
+        binding.vec_set_matrix(self.c_envs, int(env_index), matrix_u8)
+
+
+def test_performance(
+    timeout=10,
+    atn_cache=1024,
+    num_envs=2048,
+    n_qubits=6,
+    difficulty=10,
+    max_steps=200,
+    use_reset_pool=True,
+):
+    env = Clifford(
+        num_envs=num_envs,
+        n_qubits=n_qubits,
+        difficulty=difficulty,
+        max_steps=max_steps,
+        use_reset_pool=use_reset_pool,
+    )
+    env.reset()
+    tick = 0
+
+    actions = np.random.randint(
+        0,
+        env.single_action_space.n,
+        (atn_cache, num_envs),
+        dtype=np.int32,
+    )
+
+    import time
+    start = time.time()
+    while time.time() - start < timeout:
+        env.step(actions[tick % atn_cache])
+        tick += 1
+
+    sps = num_envs * tick / (time.time() - start)
+    print(
+        f"Clifford SPS: {sps:,.0f} "
+        f"(num_envs={num_envs}, n_qubits={n_qubits}, difficulty={difficulty}, "
+        f"max_steps={max_steps}, use_reset_pool={use_reset_pool})"
+    )
+    env.close()
+
+
+if __name__ == "__main__":
+    test_performance()
diff --git a/pufferlib/ocean/clifford/reference.py b/pufferlib/ocean/clifford/reference.py
new file mode 100644
index 0000000000..280b2fa345
--- /dev/null
+++ b/pufferlib/ocean/clifford/reference.py
@@ -0,0 +1,240 @@
+from __future__ import annotations
+
+from typing import Any
+
+import gymnasium
+import numpy as np
+
+Action = tuple[str, int, int]
+SINGLE_QUBIT_GATES = ("h", "s", "v", "hs", "hv")
+REWARD_MODES = {"gate_cost", "hamming_left"}
+
+
+def _build_actions(n_qubits: int) -> list[Action]:
+    actions: list[Action] = []
+    for gate in SINGLE_QUBIT_GATES:
+        for qubit in range(n_qubits):
+            actions.append((gate, qubit, -1))
+
+    for src in range(n_qubits):
+        for dst in range(src + 1, n_qubits):
+            actions.append(("cz", src, dst))
+
+    return actions
+
+
+def _identity_symplectic(n_qubits: int) -> np.ndarray:
+    return np.eye(2 * n_qubits, dtype=np.uint8)
+
+
+def _symplectic_form(n_qubits: int) -> np.ndarray:
+    omega = np.zeros((2 * n_qubits, 2 * n_qubits), dtype=np.uint8)
+    eye = np.eye(n_qubits, dtype=np.uint8)
+    omega[:n_qubits, n_qubits:] = eye
+    omega[n_qubits:, :n_qubits] = eye
+    return omega
+
+
+def _is_symplectic(matrix: np.ndarray) -> bool:
+    matrix_u8 = np.asarray(matrix, dtype=np.uint8)
+    if matrix_u8.ndim != 2 or matrix_u8.shape[0] != matrix_u8.shape[1]:
+        return False
+    if matrix_u8.shape[0] % 2 != 0:
+        return False
+    n_qubits = matrix_u8.shape[0] // 2
+    omega = _symplectic_form(n_qubits)
+    lhs = (matrix_u8.T @ omega @ matrix_u8) % 2
+    return bool(np.array_equal(lhs.astype(np.uint8), omega))
+
+
+def _identity_hamming_distance(matrix: np.ndarray) -> int:
+    identity = np.eye(matrix.shape[0], dtype=np.uint8)
+    return int(np.count_nonzero(matrix != identity))
+
+
+def _normalized_identity_hamming_distance(matrix: np.ndarray) -> float:
+    return float(_identity_hamming_distance(matrix) / matrix.size)
+
+
+def _xor_columns_inplace(matrix: np.ndarray, dst_idx: int, src_col: np.ndarray) -> None:
+    np.bitwise_xor(matrix[:, dst_idx], src_col, out=matrix[:, dst_idx])
+
+
+def apply_h_inplace(matrix: np.ndarray, qubit: int) -> None:
+    n_qubits = matrix.shape[0] // 2
+    z_col = n_qubits + qubit
+    matrix[:, [qubit, z_col]] = matrix[:, [z_col, qubit]]
+
+
+def apply_s_inplace(matrix: np.ndarray, qubit: int) -> None:
+    n_qubits = matrix.shape[0] // 2
+    _xor_columns_inplace(matrix, n_qubits + qubit, matrix[:, qubit].copy())
+
+
+def apply_v_inplace(matrix: np.ndarray, qubit: int) -> None:
+    apply_s_inplace(matrix, qubit)
+    apply_h_inplace(matrix, qubit)
+    apply_s_inplace(matrix, qubit)
+
+
+def apply_hs_inplace(matrix: np.ndarray, qubit: int) -> None:
+    apply_h_inplace(matrix, qubit)
+    apply_s_inplace(matrix, qubit)
+
+
+def apply_hv_inplace(matrix: np.ndarray, qubit: int) -> None:
+    apply_h_inplace(matrix, qubit)
+    apply_v_inplace(matrix, qubit)
+
+
+def apply_cz_inplace(matrix: np.ndarray, src: int, dst: int) -> None:
+    if src == dst:
+        raise ValueError("CZ requires distinct qubits")
+    n_qubits = matrix.shape[0] // 2
+    src_x = matrix[:, src].copy()
+    dst_x = matrix[:, dst].copy()
+    _xor_columns_inplace(matrix, n_qubits + src, dst_x)
+    _xor_columns_inplace(matrix, n_qubits + dst, src_x)
+
+
+class ReferenceCliffordEnv(gymnasium.Env):
+    metadata = {"render_modes": []}
+
+    def __init__(
+        self,
+        n_qubits: int = 6,
+        difficulty: int = 10,
+        max_steps: int = 200,
+        single_qubit_cost: float = 0.01,
+        goal_bonus: float = 0.0,
+        reward_mode: str = "gate_cost",
+        hamming_left_scale: float = 0.5,
+        render_mode=None,
+    ):
+        super().__init__()
+        if n_qubits <= 0:
+            raise ValueError("n_qubits must be positive")
+        if difficulty < 0:
+            raise ValueError("difficulty must be non-negative")
+        if max_steps <= 0:
+            raise ValueError("max_steps must be positive")
+        if goal_bonus < 0.0:
+            raise ValueError("goal_bonus must be non-negative")
+        if reward_mode not in REWARD_MODES:
+            raise ValueError(f"reward_mode must be one of {sorted(REWARD_MODES)}")
+        if hamming_left_scale < 0.0:
+            raise ValueError("hamming_left_scale must be non-negative")
+
+        self.n_qubits = int(n_qubits)
+        self.difficulty = int(difficulty)
+        self.max_steps = int(max_steps)
+        self.single_qubit_cost = float(single_qubit_cost)
+        self.goal_bonus = float(goal_bonus)
+        self.reward_mode = str(reward_mode)
+        self.hamming_left_scale = float(hamming_left_scale)
+        self.render_mode = render_mode
+        self._actions = _build_actions(self.n_qubits)
+        self._identity = _identity_symplectic(self.n_qubits)
+        self._matrix = self._identity.copy()
+        self.steps = 0
+
+        obs_size = self._identity.size
+        self.action_space = gymnasium.spaces.Discrete(len(self._actions))
+        self.observation_space = gymnasium.spaces.Box(
+            low=0,
+            high=1,
+            shape=(obs_size,),
+            dtype=np.uint8,
+        )
+
+    def _get_obs(self) -> np.ndarray:
+        return self._matrix.reshape(-1).copy()
+
+    def set_difficulty(self, difficulty: int) -> None:
+        difficulty = int(difficulty)
+        if difficulty < 0:
+            raise ValueError("difficulty must be non-negative")
+        self.difficulty = difficulty
+
+    def set_max_steps(self, max_steps: int) -> None:
+        max_steps = int(max_steps)
+        if max_steps <= 0:
+            raise ValueError("max_steps must be positive")
+        self.max_steps = max_steps
+
+    def set_matrix(self, matrix: np.ndarray) -> None:
+        matrix_u8 = np.ascontiguousarray(np.asarray(matrix, dtype=np.uint8))
+        expected_shape = (2 * self.n_qubits, 2 * self.n_qubits)
+        if matrix_u8.shape != expected_shape:
+            raise ValueError(f"Expected matrix shape {expected_shape}, got {matrix_u8.shape}")
+        if not _is_symplectic(matrix_u8):
+            raise ValueError("matrix must be symplectic")
+        self._matrix = matrix_u8.copy()
+        self.steps = 0
+
+    def _sample_action(self) -> int:
+        return int(self.np_random.integers(len(self._actions)))
+
+    def _apply_action(self, action_idx: int) -> str:
+        gate, q0, q1 = self._actions[action_idx]
+        if gate == "h":
+            apply_h_inplace(self._matrix, q0)
+        elif gate == "s":
+            apply_s_inplace(self._matrix, q0)
+        elif gate == "v":
+            apply_v_inplace(self._matrix, q0)
+        elif gate == "hs":
+            apply_hs_inplace(self._matrix, q0)
+        elif gate == "hv":
+            apply_hv_inplace(self._matrix, q0)
+        elif gate == "cz":
+            apply_cz_inplace(self._matrix, q0, q1)
+        else:
+            raise ValueError(f"Unknown gate {gate}")
+        return gate
+
+    def reset(
+        self,
+        *,
+        seed: int | None = None,
+        options: dict[str, Any] | None = None,
+    ) -> tuple[np.ndarray, dict]:
+        super().reset(seed=seed)
+        if options and "matrix" in options:
+            self.set_matrix(options["matrix"])
+            return self._get_obs(), {}
+
+        if self.difficulty <= 0:
+            self._matrix = self._identity.copy()
+            self.steps = 0
+            return self._get_obs(), {}
+
+        while True:
+            self._matrix = self._identity.copy()
+            for _ in range(self.difficulty):
+                self._apply_action(self._sample_action())
+            if not np.array_equal(self._matrix, self._identity):
+                break
+
+        self.steps = 0
+        return self._get_obs(), {}
+
+    def step(self, action: int) -> tuple[np.ndarray, float, bool, bool, dict]:
+        action_idx = int(action) % len(self._actions)
+        gate = self._apply_action(action_idx)
+        self.steps += 1
+
+        terminated = bool(np.array_equal(self._matrix, self._identity))
+        truncated = bool(not terminated and self.steps >= self.max_steps)
+        reward = -1.0 if gate == "cz" else -self.single_qubit_cost
+        if self.reward_mode == "hamming_left":
+            reward -= self.hamming_left_scale * _normalized_identity_hamming_distance(self._matrix)
+        if terminated:
+            reward += self.goal_bonus
+
+        return self._get_obs(), float(reward), terminated, truncated, {}
+
+
+__all__ = [
+    "ReferenceCliffordEnv",
+]
diff --git a/pufferlib/ocean/environment.py b/pufferlib/ocean/environment.py
index 6c56a4ea20..f3d355ae46 100644
--- a/pufferlib/ocean/environment.py
+++ b/pufferlib/ocean/environment.py
@@ -120,6 +120,7 @@ def make_multiagent(buf=None, **kwargs):
     'battle': 'Battle',
     'breakout': 'Breakout',
     'blastar': 'Blastar',
+    'clifford': 'Clifford',
     'convert': 'Convert',
     'convert_circle': 'ConvertCircle',
     'pong': 'Pong',
diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py
index 7132a19f90..bf8c67da47 100644
--- a/pufferlib/pufferl.py
+++ b/pufferlib/pufferl.py
@@ -28,7 +28,6 @@
 import torch.utils.cpp_extension
 
 import pufferlib
-import pufferlib.sweep
 import pufferlib.vector
 import pufferlib.pytorch
 try:
@@ -1055,10 +1054,16 @@ def sweep(args=None, env_name=None):
     if not args['wandb'] and not args['neptune']:
         raise pufferlib.APIUsageError('Sweeps require either wandb or neptune')
     args['no_model_upload'] = True  # Uploading trained model during sweep crashed wandb
+    try:
+        import pufferlib.sweep as puffer_sweep
+    except ImportError as exc:
+        raise ImportError(
+            'Sweep mode requires optional sweep dependencies such as gpytorch.'
+        ) from exc
 
     method = args['sweep'].pop('method')
     try:
-        sweep_cls = getattr(pufferlib.sweep, method)
+        sweep_cls = getattr(puffer_sweep, method)
     except:
         raise pufferlib.APIUsageError(f'Invalid sweep method {method}. See pufferlib.sweep')
 
diff --git a/tests/test_ocean_clifford.py b/tests/test_ocean_clifford.py
new file mode 100644
index 0000000000..8fd8716d15
--- /dev/null
+++ b/tests/test_ocean_clifford.py
@@ -0,0 +1,334 @@
+import numpy as np
+import pytest
+
+from pufferlib.ocean.clifford.reference import (
+    ReferenceCliffordEnv,
+)
+
+try:
+    import pufferlib.ocean.clifford.binding  # noqa: F401
+except ImportError:
+    BINDING_AVAILABLE = False
+else:
+    BINDING_AVAILABLE = True
+    from pufferlib.ocean.clifford.clifford import Clifford
+
+
+def _apply_explicit(matrix, gate):
+    return ((matrix.astype(np.uint8) @ gate.astype(np.uint8)) % 2).astype(np.uint8)
+
+
+def _identity_symplectic(n_qubits):
+    return np.eye(2 * n_qubits, dtype=np.uint8)
+
+
+def _build_actions(n_qubits):
+    actions = []
+    for gate in ("h", "s", "v", "hs", "hv"):
+        for qubit in range(n_qubits):
+            actions.append((gate, qubit, -1))
+    for src in range(n_qubits):
+        for dst in range(src + 1, n_qubits):
+            actions.append(("cz", src, dst))
+    return actions
+
+
+def _symplectic_matrix_h(n_qubits, qubit):
+    gate = _identity_symplectic(n_qubits)
+    z_col = n_qubits + qubit
+    gate[:, [qubit, z_col]] = gate[:, [z_col, qubit]]
+    return gate
+
+
+def _symplectic_matrix_s(n_qubits, qubit):
+    gate = _identity_symplectic(n_qubits)
+    gate[qubit, n_qubits + qubit] = 1
+    return gate
+
+
+def _symplectic_matrix_v(n_qubits, qubit):
+    gate = _symplectic_matrix_s(n_qubits, qubit)
+    gate = (gate @ _symplectic_matrix_h(n_qubits, qubit)) % 2
+    gate = (gate @ _symplectic_matrix_s(n_qubits, qubit)) % 2
+    return gate.astype(np.uint8, copy=False)
+
+
+def _symplectic_matrix_hs(n_qubits, qubit):
+    gate = _symplectic_matrix_h(n_qubits, qubit)
+    gate = (gate @ _symplectic_matrix_s(n_qubits, qubit)) % 2
+    return gate.astype(np.uint8, copy=False)
+
+
+def _symplectic_matrix_hv(n_qubits, qubit):
+    gate = _symplectic_matrix_h(n_qubits, qubit)
+    gate = (gate @ _symplectic_matrix_v(n_qubits, qubit)) % 2
+    return gate.astype(np.uint8, copy=False)
+
+
+def _symplectic_matrix_cz(n_qubits, src, dst):
+    gate = _identity_symplectic(n_qubits)
+    gate[src, n_qubits + dst] = 1
+    gate[dst, n_qubits + src] = 1
+    return gate
+
+
+def _find_action(actions, gate_name, q0, q1=-1):
+    for idx, action in enumerate(actions):
+        if action == (gate_name, q0, q1):
+            return idx
+    raise AssertionError(f"Action {(gate_name, q0, q1)} not found")
+
+
+def _normalized_identity_hamming_distance(matrix):
+    identity = _identity_symplectic(matrix.shape[0] // 2)
+    return float(np.count_nonzero(matrix != identity) / matrix.size)
+
+
+def test_build_actions_full_connectivity_order():
+    actions = _build_actions(3)
+    assert len(actions) == 18
+    assert actions[:5] == [
+        ("h", 0, -1),
+        ("h", 1, -1),
+        ("h", 2, -1),
+        ("s", 0, -1),
+        ("s", 1, -1),
+    ]
+    assert actions[-3:] == [
+        ("cz", 0, 1),
+        ("cz", 0, 2),
+        ("cz", 1, 2),
+    ]
+
+
+def test_reference_reset_with_difficulty_zero_is_identity():
+    env = ReferenceCliffordEnv(n_qubits=3, difficulty=0, max_steps=8)
+    obs, info = env.reset(seed=7)
+    np.testing.assert_array_equal(obs, _identity_symplectic(3).reshape(-1))
+    assert info == {}
+
+
+def test_reference_reset_with_difficulty_nonzero_is_not_identity():
+    env = ReferenceCliffordEnv(n_qubits=3, difficulty=4, max_steps=8)
+    obs, _info = env.reset(seed=7)
+    assert not np.array_equal(obs, _identity_symplectic(3).reshape(-1))
+
+
+def test_reference_set_matrix_rejects_nonsymplectic_input():
+    env = ReferenceCliffordEnv(n_qubits=2, difficulty=0, max_steps=8)
+    bad = np.zeros((4, 4), dtype=np.uint8)
+    with pytest.raises(ValueError, match="symplectic"):
+        env.set_matrix(bad)
+
+
+@pytest.mark.parametrize("reward_mode", ["bad_mode", "dense_hamming", "end_hamming_left"])
+def test_reference_rejects_invalid_reward_mode(reward_mode):
+    with pytest.raises(ValueError, match="reward_mode"):
+        ReferenceCliffordEnv(n_qubits=2, reward_mode=reward_mode)
+
+
+def test_reference_rejects_negative_hamming_left_scale():
+    with pytest.raises(ValueError, match="hamming_left_scale"):
+        ReferenceCliffordEnv(n_qubits=2, hamming_left_scale=-0.1)
+
+
+@pytest.mark.skipif(not BINDING_AVAILABLE, reason="native clifford binding is not built")
+def test_native_env_shapes_and_dtypes():
+    env = Clifford(n_qubits=3, difficulty=0, max_steps=8, num_envs=4)
+    try:
+        obs, info = env.reset(seed=11)
+        assert obs.shape == (4, 36)
+        assert obs.dtype == np.uint8
+        assert info == []
+        assert env.actions.dtype == np.int32
+        assert env.rewards.dtype == np.float32
+        assert env.terminals.dtype == np.bool_
+        assert env.truncations.dtype == np.bool_
+    finally:
+        env.close()
+
+
+@pytest.mark.skipif(not BINDING_AVAILABLE, reason="native clifford binding is not built")
+@pytest.mark.parametrize(
+    "action",
+    [
+        ("h", 0, -1),
+        ("s", 1, -1),
+        ("v", 2, -1),
+        ("hs", 1, -1),
+        ("hv", 0, -1),
+        ("cz", 0, 2),
+    ],
+)
+def test_native_matches_reference_for_gate_updates(action):
+    env_ref = ReferenceCliffordEnv(n_qubits=3, difficulty=0, max_steps=10)
+    env_native = Clifford(n_qubits=3, difficulty=0, max_steps=10, num_envs=1)
+    try:
+        base = _identity_symplectic(3)
+        base = _apply_explicit(base, _symplectic_matrix_h(3, 1))
+        base = _apply_explicit(base, _symplectic_matrix_s(3, 2))
+        base = _apply_explicit(base, _symplectic_matrix_cz(3, 0, 2))
+        action_idx = _find_action(env_ref._actions, *action)
+
+        env_ref.reset(options={"matrix": base})
+        env_native.reset(seed=0)
+        env_native.set_matrix(base)
+
+        obs_ref, reward_ref, term_ref, trunc_ref, info_ref = env_ref.step(action_idx)
+        obs_native, rewards, terminals, truncations, info_native = env_native.step(
+            np.asarray([action_idx], dtype=np.int32)
+        )
+
+        np.testing.assert_array_equal(obs_native[0], obs_ref)
+        assert rewards[0] == pytest.approx(reward_ref, abs=1e-6)
+        assert bool(terminals[0]) == term_ref
+        assert bool(truncations[0]) == trunc_ref
+        assert info_ref == {}
+        assert info_native == []
+    finally:
+        env_native.close()
+
+
+@pytest.mark.skipif(not BINDING_AVAILABLE, reason="native clifford binding is not built")
+@pytest.mark.parametrize(
+    "action",
+    [
+        ("h", 0, -1),
+        ("s", 1, -1),
+        ("v", 2, -1),
+        ("hs", 1, -1),
+        ("hv", 0, -1),
+        ("cz", 0, 2),
+    ],
+)
+def test_native_matches_reference_for_hamming_left_rewards(action):
+    env_ref = ReferenceCliffordEnv(
+        n_qubits=3,
+        difficulty=0,
+        max_steps=10,
+        reward_mode="hamming_left",
+        hamming_left_scale=0.5,
+    )
+    env_native = Clifford(
+        n_qubits=3,
+        difficulty=0,
+        max_steps=10,
+        num_envs=1,
+        reward_mode="hamming_left",
+        hamming_left_scale=0.5,
+    )
+    try:
+        base = _identity_symplectic(3)
+        base = _apply_explicit(base, _symplectic_matrix_h(3, 1))
+        base = _apply_explicit(base, _symplectic_matrix_s(3, 2))
+        base = _apply_explicit(base, _symplectic_matrix_cz(3, 0, 2))
+        action_idx = _find_action(env_ref._actions, *action)
+
+        env_ref.reset(options={"matrix": base})
+        env_native.reset(seed=0)
+        env_native.set_matrix(base)
+
+        obs_ref, reward_ref, term_ref, trunc_ref, _info_ref = env_ref.step(action_idx)
+        obs_native, rewards, terminals, truncations, info_native = env_native.step(
+            np.asarray([action_idx], dtype=np.int32)
+        )
+
+        np.testing.assert_array_equal(obs_native[0], obs_ref)
+        assert rewards[0] == pytest.approx(reward_ref, abs=1e-6)
+        assert bool(terminals[0]) == term_ref
+        assert bool(truncations[0]) == trunc_ref
+        assert info_native == []
+    finally:
+        env_native.close()
+
+
+def test_reference_hamming_left_penalizes_remaining_distance():
+    env = ReferenceCliffordEnv(
+        n_qubits=2,
+        difficulty=0,
+        max_steps=8,
+        single_qubit_cost=0.01,
+        reward_mode="hamming_left",
+        hamming_left_scale=0.5,
+    )
+    env.reset(options={"matrix": _symplectic_matrix_h(2, 1)})
+    action_idx = _find_action(_build_actions(2), "s", 0)
+    obs, reward, terminated, truncated, _info = env.step(action_idx)
+    matrix = obs.reshape(4, 4)
+    expected = -0.01 - 0.5 * _normalized_identity_hamming_distance(matrix)
+    assert reward == pytest.approx(expected, abs=1e-6)
+    assert not terminated
+    assert not truncated
+
+
+@pytest.mark.skipif(not BINDING_AVAILABLE, reason="native clifford binding is not built")
+def test_native_terminal_step_auto_resets_to_identity():
+    env = Clifford(
+        n_qubits=2,
+        difficulty=0,
+        max_steps=8,
+        single_qubit_cost=0.01,
+        goal_bonus=5.0,
+        num_envs=1,
+    )
+    try:
+        env.reset(seed=0)
+        env.set_matrix(_symplectic_matrix_h(2, 0))
+        action_idx = _find_action(_build_actions(2), "h", 0)
+        obs, rewards, terminals, truncations, info = env.step(np.asarray([action_idx], dtype=np.int32))
+        assert rewards[0] == pytest.approx(4.99, abs=1e-6)
+        assert bool(terminals[0])
+        assert not bool(truncations[0])
+        np.testing.assert_array_equal(obs[0], _identity_symplectic(2).reshape(-1))
+        assert info == []
+    finally:
+        env.close()
+
+
+@pytest.mark.skipif(not BINDING_AVAILABLE, reason="native clifford binding is not built")
+def test_native_hamming_left_terminal_reward_matches_reference():
+    env = Clifford(
+        n_qubits=2,
+        difficulty=0,
+        max_steps=8,
+        single_qubit_cost=0.01,
+        goal_bonus=5.0,
+        reward_mode="hamming_left",
+        hamming_left_scale=0.5,
+        num_envs=1,
+    )
+    try:
+        env.reset(seed=0)
+        env.set_matrix(_symplectic_matrix_h(2, 0))
+        action_idx = _find_action(_build_actions(2), "h", 0)
+        obs, rewards, terminals, truncations, info = env.step(np.asarray([action_idx], dtype=np.int32))
+        assert rewards[0] == pytest.approx(4.99, abs=1e-6)
+        assert bool(terminals[0])
+        assert not bool(truncations[0])
+        np.testing.assert_array_equal(obs[0], _identity_symplectic(2).reshape(-1))
+        assert info == []
+    finally:
+        env.close()
+
+
+@pytest.mark.skipif(not BINDING_AVAILABLE, reason="native clifford binding is not built")
+def test_native_set_matrix_rejects_nonsymplectic_input():
+    env = Clifford(n_qubits=2, difficulty=0, max_steps=8, num_envs=1)
+    try:
+        bad = np.zeros((4, 4), dtype=np.uint8)
+        with pytest.raises(ValueError, match="symplectic"):
+            env.set_matrix(bad)
+    finally:
+        env.close()
+
+
+@pytest.mark.skipif(not BINDING_AVAILABLE, reason="native clifford binding is not built")
+def test_native_rejects_invalid_reward_mode():
+    with pytest.raises(ValueError, match="reward_mode"):
+        Clifford(n_qubits=2, reward_mode="dense_hamming")
+
+
+@pytest.mark.skipif(not BINDING_AVAILABLE, reason="native clifford binding is not built")
+def test_native_rejects_negative_hamming_left_scale():
+    with pytest.raises(ValueError, match="hamming_left_scale"):
+        Clifford(n_qubits=2, hamming_left_scale=-0.1)