From 253c845fbefc8c3c62c04285f94df29ab22ecf49 Mon Sep 17 00:00:00 2001
From: Alex Fournier <afournier@nvidia.com>
Date: Sun, 31 May 2026 09:21:20 -0700
Subject: [PATCH 1/7] feat: add local backend for built-in nemo guardrails

Signed-off-by: Alex Fournier <afournier@nvidia.com>
---
 .../src/plugins/nemo_guardrails/component.rs  |  20 +-
 .../core/src/plugins/nemo_guardrails/local.rs |  51 ++
 .../nemo_guardrails/component_tests.rs        |  48 +-
 crates/python/src/lib.rs                      |  83 +++
 crates/python/src/py_plugin.rs                |  39 +-
 .../python/tests/coverage/coverage_tests.rs   | 648 +++++++++++++++++-
 docs/about-nemo-relay/concepts/plugins.mdx    |   7 +-
 docs/build-plugins/nemoguardrails.mdx         |   1 -
 docs/nemo-guardrails-plugin/about.mdx         | 108 ++-
 docs/nemo-guardrails-plugin/configuration.mdx | 205 ++++--
 python/nemo_relay/_guardrails_local.py        | 589 ++++++++++++++++
 11 files changed, 1654 insertions(+), 145 deletions(-)
 create mode 100644 crates/core/src/plugins/nemo_guardrails/local.rs
 create mode 100644 python/nemo_relay/_guardrails_local.py

diff --git a/crates/core/src/plugins/nemo_guardrails/component.rs b/crates/core/src/plugins/nemo_guardrails/component.rs
index 13695405..28decfbe 100644
--- a/crates/core/src/plugins/nemo_guardrails/component.rs
+++ b/crates/core/src/plugins/nemo_guardrails/component.rs
@@ -17,9 +17,13 @@ use crate::plugin::{
     register_plugin,
 };
 
+#[path = "local.rs"]
+mod local;
 #[cfg(all(feature = "guardrails-remote", not(target_arch = "wasm32")))]
 #[path = "remote.rs"]
 mod remote;
+use local::register_local_backend;
+pub use local::{clear_local_backend_provider, register_local_backend_provider};
 #[cfg(all(feature = "guardrails-remote", not(target_arch = "wasm32")))]
 use remote::register_remote_backend;
 
@@ -447,9 +451,7 @@ fn register_nemo_guardrails_backend(
 ) -> PluginResult<()> {
     match config.mode.as_str() {
         "remote" => register_remote_backend(config, ctx),
-        "local" => Err(PluginError::RegistrationFailed(
-            "built-in NeMo Guardrails local backend is not implemented yet".to_string(),
-        )),
+        "local" => register_local_backend(config, ctx),
         other => Err(PluginError::InvalidConfig(format!(
             "unsupported NeMo Guardrails mode '{other}'"
         ))),
@@ -955,6 +957,18 @@ fn validate_request_defaults(
         return;
     };
 
+    if config.mode == "local" {
+        push_policy_diag(
+            diagnostics,
+            policy.unsupported_value,
+            "nemo_guardrails.unsupported_value",
+            Some(NEMO_GUARDRAILS_PLUGIN_KIND.to_string()),
+            Some("request_defaults".to_string()),
+            "local mode does not currently support request_defaults".to_string(),
+        );
+        return;
+    }
+
     validate_json_object_field(
         diagnostics,
         policy,
diff --git a/crates/core/src/plugins/nemo_guardrails/local.rs b/crates/core/src/plugins/nemo_guardrails/local.rs
new file mode 100644
index 00000000..31f4e1c8
--- /dev/null
+++ b/crates/core/src/plugins/nemo_guardrails/local.rs
@@ -0,0 +1,51 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use std::sync::{Arc, LazyLock, Mutex, MutexGuard};
+
+use crate::plugin::{PluginError, PluginRegistrationContext, Result as PluginResult};
+
+use super::NeMoGuardrailsConfig;
+
+type LocalBackendProvider = Arc<
+    dyn Fn(NeMoGuardrailsConfig, &mut PluginRegistrationContext) -> PluginResult<()> + Send + Sync,
+>;
+
+static LOCAL_BACKEND_PROVIDER: LazyLock<Mutex<Option<LocalBackendProvider>>> =
+    LazyLock::new(|| Mutex::new(None));
+
+fn local_backend_provider_guard() -> PluginResult<MutexGuard<'static, Option<LocalBackendProvider>>> {
+    LOCAL_BACKEND_PROVIDER.lock().map_err(|e| {
+        PluginError::Internal(format!(
+            "NeMo Guardrails local backend provider lock poisoned: {e}"
+        ))
+    })
+}
+
+#[doc(hidden)]
+pub fn register_local_backend_provider(provider: LocalBackendProvider) -> PluginResult<()> {
+    let mut guard = local_backend_provider_guard()?;
+    *guard = Some(provider);
+    Ok(())
+}
+
+#[doc(hidden)]
+pub fn clear_local_backend_provider() -> PluginResult<()> {
+    let mut guard = local_backend_provider_guard()?;
+    *guard = None;
+    Ok(())
+}
+
+pub(super) fn register_local_backend(
+    config: NeMoGuardrailsConfig,
+    ctx: &mut PluginRegistrationContext,
+) -> PluginResult<()> {
+    let provider = local_backend_provider_guard()?.clone();
+
+    match provider {
+        Some(provider) => provider(config, ctx),
+        None => Err(PluginError::RegistrationFailed(
+            "built-in NeMo Guardrails local backend is unavailable in this runtime".to_string(),
+        )),
+    }
+}
diff --git a/crates/core/tests/unit/plugins/nemo_guardrails/component_tests.rs b/crates/core/tests/unit/plugins/nemo_guardrails/component_tests.rs
index 852b8928..0823bbac 100644
--- a/crates/core/tests/unit/plugins/nemo_guardrails/component_tests.rs
+++ b/crates/core/tests/unit/plugins/nemo_guardrails/component_tests.rs
@@ -42,6 +42,7 @@ const TEST_TIMEOUT: Duration = Duration::from_secs(5);
 
 fn reset_runtime() {
     let _ = clear_plugin_configuration();
+    crate::plugins::nemo_guardrails::component::clear_local_backend_provider().unwrap();
     crate::shared_runtime::reset_runtime_owner_for_tests();
     let context = global_context();
     *context.write().unwrap() = NemoRelayContextState::new();
@@ -789,6 +790,22 @@ fn invalid_shapes_and_values_are_reported() {
             .any(|diag| diag.field.as_deref() == Some("local.python_module"))
     );
 
+    let local_request_defaults = validate_plugin_config(&plugin_config(json!({
+        "mode": "local",
+        "codec": "openai_chat",
+        "config_path": "./rails",
+        "request_defaults": {
+            "context": {"tenant": "demo"}
+        }
+    })));
+    assert!(local_request_defaults.has_errors());
+    assert!(local_request_defaults.diagnostics.iter().any(|diag| {
+        diag.field.as_deref() == Some("request_defaults")
+            && diag
+                .message
+                .contains("local mode does not currently support request_defaults")
+    }));
+
     let invalid_request_defaults = validate_plugin_config(&plugin_config(json!({
         "mode": "remote",
         "codec": "openai_chat",
@@ -975,7 +992,7 @@ fn enabled_local_initialization_fails_fast_until_backend_exists() {
 
     match error {
         crate::plugin::PluginError::RegistrationFailed(message) => {
-            assert!(message.contains("local backend"));
+            assert!(message.contains("unavailable in this runtime"));
         }
         other => panic!("unexpected error: {other}"),
     }
@@ -1007,5 +1024,34 @@ fn enabled_unknown_mode_initialization_fails_fast_when_policy_ignores_validation
     }
 }
 
+#[test]
+fn enabled_local_initialization_dispatches_through_installed_provider() {
+    let _guard = crate::plugins::nemo_guardrails::test_mutex()
+        .lock()
+        .unwrap_or_else(|err| err.into_inner());
+    reset_runtime();
+
+    let provider_called = Arc::new(AtomicBool::new(false));
+    let provider_called_clone = Arc::clone(&provider_called);
+    crate::plugins::nemo_guardrails::component::register_local_backend_provider(Arc::new(
+        move |config, _ctx| {
+            provider_called_clone.store(true, Ordering::SeqCst);
+            assert_eq!(config.mode, "local");
+            assert_eq!(config.config_path.as_deref(), Some("./rails"));
+            Ok(())
+        },
+    ))
+    .unwrap();
+
+    futures::executor::block_on(initialize_plugins(plugin_config(json!({
+        "mode": "local",
+        "codec": "openai_chat",
+        "config_path": "./rails"
+    }))))
+    .unwrap();
+
+    assert!(provider_called.load(Ordering::SeqCst));
+}
+
 #[path = "remote_tests.rs"]
 mod remote_tests;
diff --git a/crates/python/src/lib.rs b/crates/python/src/lib.rs
index d11df353..13d0c29f 100644
--- a/crates/python/src/lib.rs
+++ b/crates/python/src/lib.rs
@@ -20,9 +20,16 @@
 //! - `py_adaptive` — Python-facing adaptive helpers (`set_latency_sensitivity`)
 //! - `py_plugin` — Python-facing generic plugin config/registration helpers
 //! - `convert` — JSON ↔ Python conversion utilities
+use nemo_relay::plugin::{PluginRegistrationContext, Result as PluginResult};
+use nemo_relay::plugins::nemo_guardrails::component::{
+    NeMoGuardrailsConfig, register_local_backend_provider,
+};
 use nemo_relay::shared_runtime::initialize_shared_runtime_binding;
 use nemo_relay_adaptive::plugin_component::register_adaptive_component;
 use pyo3::prelude::*;
+use serde_json::Value as Json;
+use std::path::{Path, PathBuf};
+use std::sync::Arc;
 
 mod convert;
 #[doc(hidden)]
@@ -52,6 +59,13 @@ fn _native(m: &Bound<'_, PyModule>) -> PyResult<()> {
             "failed to register adaptive plugin component: {e}"
         ))
     })?;
+    register_local_backend_provider(Arc::new(register_python_local_guardrails_backend)).map_err(
+        |e| {
+            pyo3::exceptions::PyRuntimeError::new_err(format!(
+                "failed to register NeMo Guardrails local backend provider: {e}"
+            ))
+        },
+    )?;
     py_types::register(m)?;
     py_api::register(m)?;
     py_plugin::register(m)?;
@@ -59,6 +73,75 @@ fn _native(m: &Bound<'_, PyModule>) -> PyResult<()> {
     Ok(())
 }
 
+fn register_python_local_guardrails_backend(
+    config: NeMoGuardrailsConfig,
+    ctx: &mut PluginRegistrationContext,
+) -> PluginResult<()> {
+    let plugin_config = match serde_json::to_value(config) {
+        Ok(Json::Object(config)) => config,
+        Ok(_) => {
+            return Err(nemo_relay::plugin::PluginError::Internal(
+                "NeMo Guardrails local config did not serialize to a JSON object".to_string(),
+            ));
+        }
+        Err(err) => {
+            return Err(nemo_relay::plugin::PluginError::Internal(format!(
+                "failed to serialize NeMo Guardrails local config: {err}"
+            )));
+        }
+    };
+
+    let registrations = Python::attach(|py| {
+        let register_fn = load_guardrails_local_register_fn(py)?;
+        let namespace_prefix = ctx.qualify_name("");
+        crate::py_plugin::invoke_python_plugin_register(
+            py,
+            "nemo_guardrails",
+            &register_fn,
+            &plugin_config,
+            namespace_prefix,
+        )
+    })
+    .map_err(|err| nemo_relay::plugin::PluginError::RegistrationFailed(err.to_string()))?;
+
+    ctx.extend_registrations(registrations);
+    Ok(())
+}
+
+fn load_guardrails_local_register_fn(py: Python<'_>) -> PyResult<Bound<'_, PyAny>> {
+    let module = match py.import("nemo_relay._guardrails_local") {
+        Ok(module) => module,
+        Err(err) => {
+            let source_python_dir = guardrails_local_source_python_dir();
+            if !source_python_dir.exists() {
+                return Err(err);
+            }
+
+            prepend_python_path_if_missing(py, &source_python_dir)?;
+            py.import("nemo_relay._guardrails_local")?
+        }
+    };
+    module.getattr("register_local_backend")
+}
+
+fn guardrails_local_source_python_dir() -> PathBuf {
+    PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../python")
+}
+
+fn prepend_python_path_if_missing(py: Python<'_>, path: &Path) -> PyResult<()> {
+    let sys = py.import("sys")?;
+    let sys_path = sys.getattr("path")?;
+    let path_str = path.to_string_lossy();
+
+    if !sys_path.contains(path_str.as_ref())? {
+        // Source-tree fallback for local development and in-repo tests where the
+        // Python package has not been installed into the active environment yet.
+        sys_path.call_method1("insert", (0, path_str.as_ref()))?;
+    }
+
+    Ok(())
+}
+
 #[cfg(test)]
 #[path = "../tests/coverage/coverage_tests.rs"]
 mod coverage_tests;
diff --git a/crates/python/src/py_plugin.rs b/crates/python/src/py_plugin.rs
index d483375b..ee668ea1 100644
--- a/crates/python/src/py_plugin.rs
+++ b/crates/python/src/py_plugin.rs
@@ -160,6 +160,27 @@ fn new_py_plugin_context(
     )
 }
 
+pub(crate) fn invoke_python_plugin_register(
+    py: Python<'_>,
+    plugin_kind: &str,
+    register_fn: &Bound<'_, PyAny>,
+    plugin_config: &Map<String, Json>,
+    namespace_prefix: String,
+) -> PyResult<Vec<PluginRegistration>> {
+    let py_ctx = new_py_plugin_context(
+        py,
+        plugin_kind,
+        Arc::new(Mutex::new(vec![])),
+        namespace_prefix,
+    )?;
+    let plugin_config_py = plugin_config_to_py(py, plugin_kind, plugin_config)?;
+    register_fn.call1((plugin_config_py, py_ctx.clone_ref(py)))?;
+    {
+        let py_ctx_ref = py_ctx.bind(py).borrow();
+        py_ctx_ref.drain_registrations()
+    }
+}
+
 #[pyclass(name = "PluginContext")]
 pub struct PyPluginContext {
     registrations: Arc<Mutex<Vec<PluginRegistration>>>,
@@ -695,22 +716,14 @@ impl Plugin for PyPlugin {
         let plugin_config = plugin_config.clone();
         Box::pin(async move {
             let registrations = Python::attach(|py| -> PyResult<Vec<PluginRegistration>> {
-                let py_ctx = new_py_plugin_context(
+                let register_fn = self.plugin.getattr(py, "register")?.into_bound(py);
+                invoke_python_plugin_register(
                     py,
                     &self.plugin_kind,
-                    Arc::new(Mutex::new(vec![])),
+                    &register_fn,
+                    &plugin_config,
                     namespace_prefix,
-                )?;
-                let plugin_config_py = json_to_py(py, &Json::Object(plugin_config.clone()))?;
-                self.plugin.call_method1(
-                    py,
-                    "register",
-                    (plugin_config_py, py_ctx.clone_ref(py)),
-                )?;
-                {
-                    let py_ctx_ref = py_ctx.bind(py).borrow();
-                    py_ctx_ref.drain_registrations()
-                }
+                )
             })
             .map_err(|err| PluginError::RegistrationFailed(err.to_string()))?;
 
diff --git a/crates/python/tests/coverage/coverage_tests.rs b/crates/python/tests/coverage/coverage_tests.rs
index 6c3205e0..3e553341 100644
--- a/crates/python/tests/coverage/coverage_tests.rs
+++ b/crates/python/tests/coverage/coverage_tests.rs
@@ -4,11 +4,13 @@
 //! Coverage tests for coverage in the NeMo Relay Python crate.
 
 use std::ffi::CString;
+use std::path::PathBuf;
 use std::pin::Pin;
 use std::sync::Arc;
 
+use pyo3::ffi::c_str;
 use pyo3::prelude::*;
-use pyo3::types::PyModule;
+use pyo3::types::{IntoPyDict, PyModule};
 use serde_json::{Value as Json, json};
 use tokio_stream::Stream;
 use tokio_stream::StreamExt;
@@ -24,7 +26,13 @@ use crate::py_callable::{
 };
 use nemo_relay::api::event::{BaseEvent, Event, EventCategory, ScopeCategory, ScopeEvent};
 use nemo_relay::api::llm::LlmRequest;
-use nemo_relay::api::runtime::{LlmExecutionNextFn, LlmStreamExecutionNextFn, ToolExecutionNextFn};
+use nemo_relay::api::runtime::{
+    LlmExecutionNextFn, LlmStreamExecutionNextFn, NemoRelayContextState, ToolExecutionNextFn,
+    global_context,
+};
+use nemo_relay::plugin::{
+    PluginComponentSpec, PluginConfig, clear_plugin_configuration, initialize_plugins,
+};
 
 fn load_module<'py>(py: Python<'py>, code: &str) -> Bound<'py, PyModule> {
     let code = CString::new(code).unwrap();
@@ -65,6 +73,13 @@ fn with_event_loop<T>(py: Python<'_>, f: impl FnOnce(Bound<'_, PyAny>) -> T) ->
     result
 }
 
+fn reset_runtime_state() {
+    let _ = clear_plugin_configuration();
+    nemo_relay::plugins::nemo_guardrails::component::clear_local_backend_provider().unwrap();
+    let context = global_context();
+    *context.write().unwrap() = NemoRelayContextState::new();
+}
+
 #[test]
 fn test_native_module_registers_types_and_api_functions() {
     let _python = crate::test_support::init_python_test();
@@ -94,6 +109,635 @@ fn test_native_pymodule_entrypoint_registers_bindings() {
     });
 }
 
+#[test]
+fn test_native_pymodule_entrypoint_installs_nemo_guardrails_local_provider() {
+    let _python = crate::test_support::init_python_test();
+    Python::attach(|py| {
+        let module = PyModule::new(py, "_native_guardrails_provider").unwrap();
+        crate::_native(&module).unwrap();
+    });
+
+    let runtime = tokio::runtime::Runtime::new().unwrap();
+    let error = runtime
+        .block_on(initialize_plugins(PluginConfig {
+            version: 1,
+            components: vec![PluginComponentSpec {
+                kind: "nemo_guardrails".to_string(),
+                enabled: true,
+                config: serde_json::from_value(json!({
+                    "mode": "local",
+                    "codec": "openai_chat",
+                    "config_path": "./rails"
+                }))
+                .unwrap(),
+            }],
+            policy: Default::default(),
+        }))
+        .unwrap_err();
+
+    let _ = clear_plugin_configuration();
+    match error {
+        nemo_relay::plugin::PluginError::RegistrationFailed(message) => {
+            assert!(
+                message.contains(
+                    "NeMo Guardrails is required for the built-in NeMo Guardrails local backend"
+                ),
+                "unexpected message: {message}"
+            );
+        }
+        other => panic!("unexpected error: {other}"),
+    }
+}
+
+#[test]
+fn test_guardrails_local_helper_registers_and_enforces_llm_and_tool_checks() {
+    let _python = crate::test_support::init_python_test();
+    Python::attach(|py| {
+        let python_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../python");
+        let module = load_module(
+            py,
+            &format!(
+                r#"
+import pathlib
+import sys
+import types
+
+sys.path.insert(0, {python_dir:?})
+
+MODULE_NAME = "fake_guardrails_local_helper"
+
+fake_root = types.ModuleType(MODULE_NAME)
+fake_options = types.ModuleType(MODULE_NAME + ".rails.llm.options")
+
+class Result:
+    def __init__(self, status, content=None, rail=None):
+        self.status = status
+        self.content = content
+        self.rail = rail
+
+class RailType:
+    INPUT = "input"
+    OUTPUT = "output"
+
+class RailStatus:
+    BLOCKED = "blocked"
+    MODIFIED = "modified"
+    PASSED = "passed"
+
+class RailsConfig:
+    @staticmethod
+    def from_content(*, colang_content=None, yaml_content=None):
+        return {{"yaml": yaml_content, "colang": colang_content}}
+
+    @staticmethod
+    def from_path(path):
+        return {{"path": path}}
+
+check_results = []
+check_calls = []
+
+class LLMRails:
+    def __init__(self, config):
+        self.config = config
+
+    async def check_async(self, messages, rail_types):
+        check_calls.append((messages, rail_types))
+        return check_results.pop(0)
+
+fake_root.RailsConfig = RailsConfig
+fake_root.LLMRails = LLMRails
+fake_options.RailType = RailType
+fake_options.RailStatus = RailStatus
+
+sys.modules[MODULE_NAME] = fake_root
+sys.modules[MODULE_NAME + ".rails"] = types.ModuleType(MODULE_NAME + ".rails")
+sys.modules[MODULE_NAME + ".rails.llm"] = types.ModuleType(MODULE_NAME + ".rails.llm")
+sys.modules[MODULE_NAME + ".rails.llm.options"] = fake_options
+
+from nemo_relay._native import LLMRequest
+from nemo_relay._guardrails_local import register_local_backend
+
+class Context:
+    def register_llm_execution_intercept(self, name, priority, callback):
+        self.llm = callback
+
+    def register_llm_stream_execution_intercept(self, name, priority, callback):
+        self.stream = callback
+
+    def register_tool_execution_intercept(self, name, priority, callback):
+        self.tool = callback
+
+async def run_case():
+    ctx = Context()
+    event_log = []
+    register_local_backend(
+        {{
+            "mode": "local",
+            "codec": "openai_chat",
+            "config_yaml": "models: []",
+            "input": True,
+            "output": True,
+            "tool_input": True,
+            "tool_output": True,
+            "local": {{"python_module": MODULE_NAME}},
+        }},
+        ctx,
+    )
+
+    request = LLMRequest(
+        {{}},
+        {{
+            "model": "gpt-4o-mini",
+            "messages": [{{"role": "user", "content": "unsafe"}}],
+        }},
+    )
+    seen_request_messages = []
+
+    async def next_call(req):
+        seen_request_messages.append(req.content["messages"][-1]["content"])
+        return {{
+            "choices": [{{"message": {{"role": "assistant", "content": "safe reply"}}}}],
+            "id": "resp_1",
+            "model": "gpt-4o-mini",
+        }}
+
+    check_results.extend(
+        [
+            Result(RailStatus.MODIFIED, content="sanitized user"),
+            Result(RailStatus.PASSED),
+        ]
+    )
+    llm_result = await ctx.llm("demo", request, next_call)
+
+    seen_tool_args = []
+
+    async def next_tool(args):
+        seen_tool_args.append(args)
+        return {{"raw": True}}
+
+    check_results.extend(
+        [
+            Result(RailStatus.MODIFIED, content='{{"arguments": {{"city": "Boston"}}}}'),
+            Result(RailStatus.MODIFIED, content='{{"result": {{"ok": true}}}}'),
+        ]
+    )
+    tool_result = await ctx.tool("weather_lookup", {{"city": "Phoenix"}}, next_tool)
+
+    return {{
+        "llm_result": llm_result,
+        "tool_result": tool_result,
+        "seen_request_messages": seen_request_messages,
+        "seen_tool_args": seen_tool_args,
+        "check_calls": check_calls,
+    }}
+"#,
+                python_dir = python_dir.display(),
+            ),
+        );
+
+        let result_json = with_event_loop(py, |event_loop| {
+            let coroutine = module.getattr("run_case").unwrap().call0().unwrap();
+            let result = event_loop
+                .call_method1("run_until_complete", (coroutine,))
+                .unwrap();
+            crate::convert::py_to_json(&result).unwrap()
+        });
+
+        assert_eq!(
+            result_json["seen_request_messages"][0],
+            json!("sanitized user")
+        );
+        assert_eq!(result_json["tool_result"], json!({ "ok": true }));
+        assert_eq!(
+            result_json["seen_tool_args"][0],
+            json!({ "city": "Boston" })
+        );
+        assert_eq!(
+            result_json["llm_result"]["choices"][0]["message"]["content"],
+            json!("safe reply")
+        );
+        assert_eq!(result_json["check_calls"].as_array().unwrap().len(), 4);
+    });
+}
+
+#[test]
+fn test_guardrails_local_helper_enforces_streamed_output_rails() {
+    let _python = crate::test_support::init_python_test();
+    Python::attach(|py| {
+        let native_module = PyModule::new(py, "_native_guardrails_streaming").unwrap();
+        crate::_native(&native_module).unwrap();
+        let sys = py.import("sys").unwrap();
+        let modules = sys.getattr("modules").unwrap();
+        modules
+            .set_item("nemo_relay._native", native_module.clone())
+            .unwrap();
+
+        let python_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../python");
+        let module = load_module(
+            py,
+            &format!(
+                r#"
+import sys
+import types
+
+sys.path.insert(0, {python_dir:?})
+
+MODULE_NAME = "fake_guardrails_streaming"
+
+fake_root = types.ModuleType(MODULE_NAME)
+fake_options = types.ModuleType(MODULE_NAME + ".rails.llm.options")
+
+class Result:
+    def __init__(self, status, content=None, rail=None):
+        self.status = status
+        self.content = content
+        self.rail = rail
+
+class RailType:
+    INPUT = "input"
+    OUTPUT = "output"
+
+class RailStatus:
+    BLOCKED = "blocked"
+    MODIFIED = "modified"
+    PASSED = "passed"
+
+class RailsConfig:
+    @staticmethod
+    def from_content(*, colang_content=None, yaml_content=None):
+        return {{"yaml": yaml_content}}
+
+stream_results = []
+event_log = []
+
+class LLMRails:
+    def __init__(self, config):
+        self.config = types.SimpleNamespace(
+            rails=types.SimpleNamespace(
+                output=types.SimpleNamespace(
+                    flows=["self check output"],
+                    streaming=types.SimpleNamespace(enabled=True, stream_first=True),
+                )
+            )
+        )
+
+    async def check_async(self, messages, rail_types):
+        return Result(RailStatus.PASSED)
+
+    def stream_async(self, *, messages=None, generator=None, include_metadata=False):
+        async def _run():
+            outcome = stream_results.pop(0)
+            async for chunk in generator:
+                event_log.append(f"guardrails-sees:{{chunk}}")
+                if outcome == "pass":
+                    yield chunk
+            if outcome == "block":
+                yield '{{"error": {{"message": "Blocked by output rails: output-policy", "type": "guardrails_violation"}}}}'
+        return _run()
+
+fake_root.RailsConfig = RailsConfig
+fake_root.LLMRails = LLMRails
+fake_options.RailType = RailType
+fake_options.RailStatus = RailStatus
+
+sys.modules[MODULE_NAME] = fake_root
+sys.modules[MODULE_NAME + ".rails"] = types.ModuleType(MODULE_NAME + ".rails")
+sys.modules[MODULE_NAME + ".rails.llm"] = types.ModuleType(MODULE_NAME + ".rails.llm")
+sys.modules[MODULE_NAME + ".rails.llm.options"] = fake_options
+
+from nemo_relay._native import LLMRequest
+from nemo_relay._guardrails_local import register_local_backend
+
+class Context:
+    def register_llm_execution_intercept(self, name, priority, callback):
+        self.llm = callback
+
+    def register_llm_stream_execution_intercept(self, name, priority, callback):
+        self.stream = callback
+
+    def register_tool_execution_intercept(self, name, priority, callback):
+        self.tool = callback
+
+async def run_case():
+    ctx = Context()
+    event_log.clear()
+    register_local_backend(
+        {{
+            "mode": "local",
+            "codec": "openai_chat",
+            "config_yaml": "models: []",
+            "input": False,
+            "output": True,
+            "local": {{"python_module": MODULE_NAME}},
+        }},
+        ctx,
+    )
+
+    request = LLMRequest(
+        {{}},
+        {{
+            "model": "gpt-4o-mini",
+            "messages": [{{"role": "user", "content": "hello"}}],
+        }},
+    )
+
+    async def next_call(req):
+        async def _stream():
+            event_log.append("source:hello")
+            yield {{"choices": [{{"delta": {{"content": "hello"}}}}]}}
+            event_log.append("source:world")
+            yield {{"choices": [{{"delta": {{"content": "world"}}}}]}}
+        return _stream()
+
+    stream_results.append("pass")
+    allowed_stream = await ctx.stream(request, next_call)
+    allowed_chunks = []
+    async for chunk in allowed_stream:
+        event_log.append(f"yield:{{chunk['choices'][0]['delta']['content']}}")
+        allowed_chunks.append(chunk)
+
+    stream_results.append("block")
+    try:
+        blocked_stream = await ctx.stream(request, next_call)
+        async for _chunk in blocked_stream:
+            pass
+    except RuntimeError as error:
+        blocked = str(error)
+    else:
+        raise AssertionError("expected streamed output block")
+
+    ctx_stream_first_false = Context()
+    fake_root.LLMRails = lambda config: types.SimpleNamespace(
+        config=types.SimpleNamespace(
+            rails=types.SimpleNamespace(
+                output=types.SimpleNamespace(
+                    flows=["self check output"],
+                    streaming=types.SimpleNamespace(enabled=True, stream_first=False),
+                )
+            )
+        ),
+        check_async=LLMRails(config).check_async,
+        stream_async=LLMRails(config).stream_async,
+    )
+    register_local_backend(
+        {{
+            "mode": "local",
+            "codec": "openai_chat",
+            "config_yaml": "models: []",
+            "input": False,
+            "output": True,
+            "local": {{"python_module": MODULE_NAME}},
+        }},
+        ctx_stream_first_false,
+    )
+    try:
+        failing_stream = await ctx_stream_first_false.stream(request, next_call)
+        async for _chunk in failing_stream:
+            pass
+    except RuntimeError as error:
+        modified = str(error)
+    else:
+        raise AssertionError("expected stream_first=false error")
+
+    return {{
+        "allowed_chunks": allowed_chunks,
+        "blocked": blocked,
+        "event_log": event_log,
+        "modified": modified,
+    }}
+"#,
+                python_dir = python_dir.display(),
+            ),
+        );
+
+        let result = with_event_loop(py, |event_loop| {
+            let coroutine = module.getattr("run_case").unwrap().call0().unwrap();
+            let result = event_loop
+                .call_method1("run_until_complete", (coroutine,))
+                .unwrap();
+            crate::convert::py_to_json(&result).unwrap()
+        });
+        assert_eq!(
+            result["allowed_chunks"],
+            json!([
+                {"choices": [{"delta": {"content": "hello"}}]},
+                {"choices": [{"delta": {"content": "world"}}]}
+            ])
+        );
+        let event_log = result["event_log"].as_array().unwrap();
+        assert_eq!(
+            &event_log[..6],
+            json!([
+                "source:hello",
+                "yield:hello",
+                "source:world",
+                "yield:world",
+                "guardrails-sees:hello",
+                "guardrails-sees:world",
+            ])
+            .as_array()
+            .unwrap()
+        );
+        assert!(
+            result["blocked"]
+                .as_str()
+                .unwrap()
+                .contains("output rail blocked the LLM call")
+        );
+        assert!(
+            result["modified"]
+                .as_str()
+                .unwrap()
+                .contains("stream_first = true")
+        );
+    });
+}
+
+#[test]
+fn test_local_guardrails_provider_initializes_and_enforces_managed_core_calls() {
+    let _python = crate::test_support::init_python_test();
+    reset_runtime_state();
+
+    Python::attach(|py| {
+        let native_module = PyModule::new(py, "_native_guardrails_e2e").unwrap();
+        crate::_native(&native_module).unwrap();
+        let sys = py.import("sys").unwrap();
+        let modules = sys.getattr("modules").unwrap();
+        let module_names = py
+            .eval(
+                c_str!("list(sys.modules.keys())"),
+                None,
+                Some(&[(c_str!("sys"), sys)].into_py_dict(py).unwrap()),
+            )
+            .unwrap()
+            .extract::<Vec<String>>()
+            .unwrap();
+        for name in module_names {
+            if name == "nemo_relay" || name.starts_with("nemo_relay.") {
+                modules.del_item(name).unwrap();
+            }
+        }
+        modules
+            .set_item("nemo_relay._native", native_module.clone())
+            .unwrap();
+
+        let python_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../python");
+        let module = load_module(
+            py,
+            &format!(
+                r#"
+import sys
+import types
+
+sys.path.insert(0, {python_dir:?})
+
+MODULE_NAME = "fake_guardrails_local_e2e"
+
+fake_root = types.ModuleType(MODULE_NAME)
+fake_options = types.ModuleType(MODULE_NAME + ".rails.llm.options")
+
+class Result:
+    def __init__(self, status, content=None, rail=None):
+        self.status = status
+        self.content = content
+        self.rail = rail
+
+class RailType:
+    INPUT = "input"
+    OUTPUT = "output"
+
+class RailStatus:
+    BLOCKED = "blocked"
+    MODIFIED = "modified"
+    PASSED = "passed"
+
+class RailsConfig:
+    @staticmethod
+    def from_content(*, colang_content=None, yaml_content=None):
+        return {{"yaml": yaml_content}}
+
+check_results = []
+
+class LLMRails:
+    def __init__(self, config):
+        self.config = config
+
+    async def check_async(self, messages, rail_types):
+        return check_results.pop(0)
+
+fake_root.RailsConfig = RailsConfig
+fake_root.LLMRails = LLMRails
+fake_options.RailType = RailType
+fake_options.RailStatus = RailStatus
+
+sys.modules[MODULE_NAME] = fake_root
+sys.modules[MODULE_NAME + ".rails"] = types.ModuleType(MODULE_NAME + ".rails")
+sys.modules[MODULE_NAME + ".rails.llm"] = types.ModuleType(MODULE_NAME + ".rails.llm")
+sys.modules[MODULE_NAME + ".rails.llm.options"] = fake_options
+
+import nemo_relay
+
+async def run_case():
+    stack = nemo_relay.create_scope_stack()
+    nemo_relay.set_thread_scope_stack(stack)
+
+    await nemo_relay.plugin.initialize(
+        {{
+            "version": 1,
+            "components": [
+                {{
+                    "kind": "nemo_guardrails",
+                    "enabled": True,
+                    "config": {{
+                        "mode": "local",
+                        "codec": "openai_chat",
+                        "config_yaml": "models: []",
+                        "input": True,
+                        "output": True,
+                        "tool_input": True,
+                        "tool_output": True,
+                        "local": {{"python_module": MODULE_NAME}},
+                    }},
+                }}
+            ],
+        }}
+    )
+
+    check_results.extend(
+        [
+            Result(RailStatus.MODIFIED, content="sanitized user"),
+            Result(RailStatus.PASSED),
+            Result(RailStatus.MODIFIED, content='{{"arguments": {{"city": "Boston"}}}}'),
+            Result(RailStatus.MODIFIED, content='{{"result": {{"ok": true}}}}'),
+        ]
+    )
+
+    request = nemo_relay.LLMRequest(
+        {{}},
+        {{
+            "model": "gpt-4o-mini",
+            "messages": [{{"role": "user", "content": "unsafe"}}],
+        }},
+    )
+
+    seen_request_messages = []
+    async def llm_impl(req):
+        seen_request_messages.append(req.content["messages"][-1]["content"])
+        return {{
+            "choices": [{{"message": {{"role": "assistant", "content": "safe reply"}}}}],
+            "id": "resp_1",
+            "model": req.content["model"],
+        }}
+
+    llm_result = await nemo_relay.llm.execute(
+        "demo",
+        request,
+        llm_impl,
+        response_codec=nemo_relay.codecs.OpenAIChatCodec(),
+    )
+
+    seen_tool_args = []
+    async def tool_impl(args):
+        seen_tool_args.append(args)
+        return {{"raw": True}}
+
+    tool_result = await nemo_relay.tools.execute("weather_lookup", {{"city": "Phoenix"}}, tool_impl)
+    return {{
+        "llm_result": llm_result,
+        "tool_result": tool_result,
+        "seen_request_messages": seen_request_messages,
+        "seen_tool_args": seen_tool_args,
+    }}
+"#,
+                python_dir = python_dir.display(),
+            ),
+        );
+        let result_json = with_event_loop(py, |event_loop| {
+            let coroutine = module.getattr("run_case").unwrap().call0().unwrap();
+            let result = event_loop
+                .call_method1("run_until_complete", (coroutine,))
+                .unwrap();
+            crate::convert::py_to_json(&result).unwrap()
+        });
+
+        assert_eq!(
+            result_json["llm_result"]["choices"][0]["message"]["content"],
+            json!("safe reply")
+        );
+        assert_eq!(result_json["tool_result"], json!({ "ok": true }));
+        assert_eq!(
+            result_json["seen_request_messages"][0],
+            json!("sanitized user")
+        );
+        assert_eq!(
+            result_json["seen_tool_args"][0],
+            json!({ "city": "Boston" })
+        );
+    });
+
+    reset_runtime_state();
+}
+
 #[test]
 fn test_python_test_guard_restores_existing_runtime_env() {
     let lock = crate::test_support::lock_python_test();
diff --git a/docs/about-nemo-relay/concepts/plugins.mdx b/docs/about-nemo-relay/concepts/plugins.mdx
index b9c412e9..065b4b96 100644
--- a/docs/about-nemo-relay/concepts/plugins.mdx
+++ b/docs/about-nemo-relay/concepts/plugins.mdx
@@ -171,9 +171,10 @@ The core crate also ships a built-in `nemo_guardrails` plugin component. It is
 the first-party Guardrails integration point that NeMo Relay owns through the
 shared plugin system.
 
-The current shipped user-facing lane is the remote backend. It gives NeMo Relay
-one canonical plugin kind and config shape for Guardrails-backed managed LLM
-and tool checks while broader backend parity work remains separate.
+The current shipped user-facing lanes are:
+
+- the remote backend for Guardrails-service integration
+- the Python-backed local backend for in-process `nemoguardrails` integration
 
 Detailed Guardrails plugin configuration belongs in
 [NeMo Guardrails Configuration](/nemo-guardrails-plugin/configuration).
diff --git a/docs/build-plugins/nemoguardrails.mdx b/docs/build-plugins/nemoguardrails.mdx
index e5517612..a347c3f7 100644
--- a/docs/build-plugins/nemoguardrails.mdx
+++ b/docs/build-plugins/nemoguardrails.mdx
@@ -15,7 +15,6 @@ first-party `nemo_guardrails` component, see
 [NeMo Guardrails Plugin](/nemo-guardrails-plugin/about).
 
 </Note>
-
 The example lives under `examples/nemoguardrails`. The single-file plugin
 implementation, runnable agent, and Guardrails config artifacts are under
 `example`.
diff --git a/docs/nemo-guardrails-plugin/about.mdx b/docs/nemo-guardrails-plugin/about.mdx
index aa1c6925..5c0cd2f0 100644
--- a/docs/nemo-guardrails-plugin/about.mdx
+++ b/docs/nemo-guardrails-plugin/about.mdx
@@ -17,12 +17,11 @@ first-party NeMo Relay plugin.
 The plugin is designed around backend modes:
 
 - `remote`
-  - Implemented now.
   - Calls a Guardrails service over HTTP(S), including streaming over the same
     remote contract.
 - `local`
-  - Planned.
-  - Reserved for a future in-process Python `nemoguardrails` backend.
+  - Calls `nemoguardrails` in process through the Python runtime instead of a
+    separate Guardrails service.
 
 ## Use This Plugin When
 
@@ -30,39 +29,43 @@ Start here when you need to:
 
 - Apply Guardrails input and output checks around managed `llm.execute(...)`
   calls.
-- Apply Guardrails policy around managed tool execution, including the current
-  remote managed `tool_output` lane.
+- Apply Guardrails policy around managed tool execution.
 - Configure Guardrails behavior through the same plugin config surface used by
   other first-party NeMo Relay components.
-- Keep Guardrails behavior in a reusable process-level config document instead
-  of wiring provider-specific checks into each application call site.
+- Keep Guardrails policy authoring in Guardrails-native config while NeMo Relay
+  owns when those checks run around managed execution.
 
 ## Current Scope
 
-The current shipped user-facing lane is the built-in `remote` backend.
+The built-in plugin currently exposes two user-facing modes with
+different boundaries.
 
-That lane supports:
+| Area | `remote` | `local` |
+|---|---|---|
+| Managed non-streaming LLM `input` | Supported | Supported |
+| Managed non-streaming LLM `output` | Supported | Supported |
+| Managed streaming LLM execution | Supported over the remote HTTP(S) contract | Supported for managed input checks and Guardrails-native output streaming when `rails.output.streaming.enabled = true`; with `stream_first = true`, output rails can stop the stream after some chunks have already been delivered; `stream_first = false` is not supported yet |
+| Managed `tool_input` | Not supported against the stock Guardrails remote contract | Supported |
+| Managed `tool_output` | Supported | Supported |
+| `request_defaults` | Supported as backend pass-through request semantics | Not supported |
+| Codec support | `openai_chat` | `openai_chat`, `openai_responses`, `anthropic_messages` |
+| Runtime availability | Any runtime that includes the remote backend | Python-enabled runtimes that can import `nemoguardrails` |
 
-- Managed non-streaming LLM `input` checks.
-- Managed non-streaming LLM `output` checks.
-- Managed streaming LLM execution over the remote HTTP(S) path.
-- Managed tool-result checks through `tool_output`.
-- Request-time Guardrails defaults passed through to the remote backend.
-
-The current built-in remote backend does not support:
-
-- Managed `tool_input` checks against the stock Guardrails remote contract.
-- `local` mode.
-- Remote managed LLM parity beyond `codec = "openai_chat"`.
+The `local` backend is a Python-backed runtime feature, not a universal
+cross-binding backend. Runtimes that do not install the local backend provider
+report `local` mode as unavailable during plugin initialization.
 
 ## Managed Surfaces Versus Request Defaults
 
-The NeMo Guardrails plugin model uses two different concepts:
+Both `remote` mode and `local` mode share the same top-level plugin model, but
+they do not implement every part of that model in the same way.
+
+At the plugin-model level, NeMo Guardrails uses two different concepts:
 
-- Currently supported managed NeMo Relay execution surfaces in the shipped
-  remote backend:
+- Top-level managed NeMo Relay execution surfaces:
   - `input`
   - `output`
+  - `tool_input`
   - `tool_output`
 - Guardrails backend request defaults:
   - `request_defaults.context`
@@ -78,62 +81,43 @@ This distinction matters:
 
 - Managed surfaces wrap real NeMo Relay execution boundaries such as
   `llm.execute(...)` and `tools.execute(...)`.
-- Managed surfaces let NeMo Relay enforce behavior around those boundaries.
-  Depending on the surface, Relay can block work, allow it, or apply managed
-  request or result handling before the application sees the final outcome.
+- Managed surfaces give NeMo Relay an owned enforcement point around a known
+  runtime step. Depending on the backend and surface, Relay can block work,
+  allow it, or apply managed request or result handling before the application
+  sees the outcome.
 - Managed surfaces also give NeMo Relay a stable runtime boundary for its own
-  middleware ordering, lifecycle behavior, and observability marks. Relay knows
-  exactly which step is being wrapped and can attach policy and telemetry to
-  that step directly.
-- `request_defaults` fields are forwarded to the selected Guardrails backend as
-  request semantics. They do not create new NeMo Relay-native execution
-  surfaces.
-- `request_defaults` can still influence Guardrails behavior, but they do not
-  give NeMo Relay a new local runtime step to wrap. Relay is passing backend
-  options along with a request, not creating a new middleware boundary of its
-  own.
-- `request_defaults` are also backend-contract dependent. A selected Guardrails
-  backend can use them when evaluating a request, but the exact effect depends
-  on what that backend supports. Relay is not creating a separate local
-  retrieval, dialog, or tool boundary just because those fields exist in the
-  request.
-
-In practice, the tradeoff is:
-
-- Managed surfaces give you a Relay-owned enforcement point around a known
-  runtime step, with Relay-owned enforcement, ordering, and marks around that
-  step.
-- `request_defaults` give you backend-level configuration for a request, but
-  not a separate Relay-owned interception point, runtime boundary, or
-  middleware surface.
-
-Another way to think about it:
+  middleware ordering, lifecycle behavior, and observability marks.
+
+In practice:
 
 - Managed surfaces are places where NeMo Relay is holding the steering wheel.
-- `request_defaults` are notes that NeMo Relay passes to the Guardrails backend
-  with a request.
 
-Top-level `tool_input` is still part of the built-in plugin contract, but it is
-not supported by the current stock-remote backend.
+The forwarded request-default side is more mode-specific:
+
+- In `remote` mode, `request_defaults` fields are forwarded to the selected
+  Guardrails backend as request semantics. They do not create new NeMo
+  Relay-native execution surfaces.
+- In `local` mode, `request_defaults` is rejected instead of passed through.
 
-The overlap in names is important:
+The overlap in names is important in `remote` mode:
 
 - Top-level `input` is a managed NeMo Relay execution surface.
 - `request_defaults.rails.input` is a backend pass-through option.
 - Top-level `output` is a managed NeMo Relay execution surface.
 - `request_defaults.rails.output` is a backend pass-through option.
-- Top-level `tool_input` is part of the built-in plugin model, but the current
-  stock-remote backend rejects it.
+- Top-level `tool_input` is a managed NeMo Relay execution surface in the
+  plugin contract. The current stock-remote backend rejects it, while the local
+  backend supports it.
 - `request_defaults.rails.tool_input` is a backend pass-through option.
 - Top-level `tool_output` is a managed NeMo Relay execution surface.
 - `request_defaults.rails.tool_output` is a backend pass-through option.
 
 In particular, `request_defaults.rails.dialog` and
-`request_defaults.rails.retrieval` are simple pass-through options. They are
-not separate managed middleware surfaces in NeMo Relay.
+`request_defaults.rails.retrieval` are pass-through options. They are not
+separate managed middleware surfaces in NeMo Relay.
 
 ## Pages
 
 - [NeMo Guardrails Configuration](/nemo-guardrails-plugin/configuration)
-  documents the built-in component shape, remote-mode boundaries, and current
+  documents the built-in component shape, mode boundaries, and current
   support matrix.
diff --git a/docs/nemo-guardrails-plugin/configuration.mdx b/docs/nemo-guardrails-plugin/configuration.mdx
index ddaa1fb0..b1554e1c 100644
--- a/docs/nemo-guardrails-plugin/configuration.mdx
+++ b/docs/nemo-guardrails-plugin/configuration.mdx
@@ -10,9 +10,6 @@ SPDX-License-Identifier: Apache-2.0 */}
 Use this page when you want to configure the built-in NeMo Guardrails plugin
 component. The component kind is `nemo_guardrails`.
 
-The current shipped user-facing backend is `mode = "remote"`. `local` remains
-part of the config model, but it is not yet a finished user-facing backend.
-
 For plugin file discovery, precedence, merge behavior, editor controls, and
 gateway conflict rules, see
 [Plugin Configuration Files](/build-plugins/plugin-configuration-files).
@@ -37,32 +34,36 @@ The top-level NeMo Guardrails object contains:
 | `codec` | Managed LLM provider codec. |
 | `input` | Enables managed LLM input checks. |
 | `output` | Enables managed LLM output checks. |
-| `tool_input` | Part of the built-in plugin model for managed tool-argument checks before execution. The current stock-remote backend rejects it. |
+| `tool_input` | Enables managed tool-argument checks before execution. |
 | `tool_output` | Enables managed tool-result checks after execution. |
 | `priority` | Middleware priority for installed execution intercepts. |
 | `remote` | Remote backend settings. |
-| `local` | Local backend settings for future local mode. |
-| `request_defaults` | Default request-time Guardrails semantics passed to the backend. |
+| `local` | Local backend settings. |
+| `request_defaults` | Default request-time Guardrails semantics passed to the remote backend. |
 | `policy` | Component-local handling for unknown fields and unsupported values. |
 
 At least one managed Guardrails surface must be enabled.
 
-## Current Remote Support
+## Backend Support
 
-The current built-in remote backend supports:
+| Area | `remote` | `local` |
+|---|---|---|
+| Built-in component kind and config validation | Supported | Supported |
+| Managed LLM `input` | Supported | Supported |
+| Managed LLM `output` | Supported | Supported |
+| Managed streaming LLM execution | Supported over the remote HTTP(S) contract | Supported for managed input checks and Guardrails-native output streaming when `rails.output.streaming.enabled = true`; with `stream_first = true`, output rails can stop the stream after some chunks have already been delivered; `stream_first = false` is not supported yet |
+| Managed `tool_input` | Not supported against the stock Guardrails remote contract | Supported |
+| Managed `tool_output` | Supported | Supported |
+| `request_defaults` pass-through | Supported | Not supported |
+| Codec support | `openai_chat` | `openai_chat`, `openai_responses`, `anthropic_messages` |
+| Runtime availability | Any runtime that includes the remote backend | Python-enabled runtimes that can import `nemoguardrails` |
 
-| Area | Support |
-|---|---|
-| Built-in component kind and config validation | Supported |
-| Managed LLM `input` | Supported |
-| Managed LLM `output` | Supported |
-| Managed streaming LLM execution over the remote HTTP(S) contract | Supported |
-| Managed `tool_output` | Supported |
-| Managed `tool_input` | Not supported against the stock Guardrails remote contract |
-| `request_defaults` pass-through | Supported |
-| `local` mode | Not implemented yet |
+## Remote Mode
+
+Use `remote` mode when NeMo Relay should call a Guardrails service over
+HTTP(S).
 
-## Remote Requirements
+### Requirements
 
 To use `mode = "remote"`, the configured `remote.endpoint` must point at a
 Guardrails service that NeMo Relay can reach from the running process and that
@@ -73,7 +74,7 @@ Guardrails service still owns the actual policy content. In practice, NeMo
 Relay decides when managed checks run, while the Guardrails config decides what
 to block, allow, or rewrite.
 
-## `plugins.toml` Example
+### `plugins.toml` Example
 
 ```toml
 version = 1
@@ -108,32 +109,12 @@ unknown_field = "warn"
 unsupported_value = "error"
 ```
 
-This example configures the built-in remote backend for a Guardrails service
-that uses `codec = "openai_chat"`, managed LLM `input` and `output`, managed
+This example configures the built-in remote mode for a Guardrails service that
+uses `codec = "openai_chat"`, managed LLM `input` and `output`, managed
 `tool_output`, and request-default pass-through for backend context plus
 backend `input` and `output` rail selection.
 
-In that setup, the NeMo Relay plugin chose the managed surfaces to wrap, while
-the Guardrails config defined the actual blocking policy, such as rejecting
-secret-seeking prompts, bypass attempts, specific blocked tokens, or
-private-key-like output.
-
-For example, the Guardrails-side policy can look like this:
-
-```yaml
-rails:
-  input:
-    flows:
-      - self check input
-  output:
-    flows:
-      - self check output
-```
-
-This Guardrails-side config defines the policy logic. The NeMo Relay plugin
-config decides when those checks run.
-
-## Remote Mode Rules
+### Rules
 
 When `mode = "remote"`:
 
@@ -146,24 +127,24 @@ When `mode = "remote"`:
 
 ### Codec Boundary
 
-The current built-in remote backend supports managed LLM execution only with:
+The current built-in remote mode supports managed LLM execution only with:
 
 - `openai_chat`
 
-## Managed Tool Boundary
+### Managed Tool Boundary
 
-The current remote backend supports managed `tool_output`.
+The current remote mode supports managed `tool_output`.
 
-The current remote backend rejects managed `tool_input` explicitly because the
+The current remote mode rejects managed `tool_input` explicitly because the
 stock Guardrails remote contract does not activate pre-execution tool-call
 rails from externally submitted `/v1/chat/completions` history. NeMo Relay
 rejects `tool_input` in remote mode rather than leaving a silent
 non-enforcing path.
 
-## Request Defaults
+### Request Defaults
 
 `request_defaults` lets the built-in plugin pass request-time semantics through
-to the selected backend.
+to the selected remote backend.
 
 Supported request-default fields are:
 
@@ -201,11 +182,12 @@ The `rails` section can include:
 - `tool_output`
 - `tool_input`
 
-Those values are forwarded to the backend as request semantics. They do not
-mean NeMo Relay owns separate managed retrieval or dialog execution surfaces.
-`dialog` and `retrieval` are pass-through request options only. Likewise,
-`request_defaults.rails.tool_input` is only a backend pass-through selector. It
-does not make managed remote `tool_input` supported in the stock-remote lane.
+Those values are forwarded to the remote backend as request semantics. They do
+not mean NeMo Relay owns separate managed retrieval or dialog execution
+surfaces. `dialog` and `retrieval` are pass-through request options only.
+Likewise, `request_defaults.rails.tool_input` is only a backend pass-through
+selector. It does not make managed remote `tool_input` supported in the
+stock-remote lane.
 
 For more targeted request-time pass-through, the remote backend also forwards
 selectors like these:
@@ -219,11 +201,7 @@ dialog = true
 tool_output = ["validate_tool_output"]
 ```
 
-This richer selector shape demonstrates how request-time Guardrails semantics
-can be forwarded even when NeMo Relay does not own a separate native managed
-surface for that category.
-
-## Observability
+### Observability
 
 The current remote backend emits coarse backend-level marks for remote
 Guardrails activity:
@@ -232,4 +210,111 @@ Guardrails activity:
 - `nemo_guardrails.remote.end`
 - `nemo_guardrails.remote.error`
 
-These marks cover managed LLM remote execution and managed tool-result checks.
+## Local Mode
+
+Use `local` mode when NeMo Relay should call `nemoguardrails` in process
+through the Python runtime instead of a separate Guardrails service.
+
+### Requirements
+
+To use `mode = "local"`, the running Python environment must be able to import
+`nemoguardrails`.
+
+The built-in local backend is installed by the Python binding and runs
+Guardrails in process. Use it when the runtime has direct access to the Python
+Guardrails dependency and configuration files rather than a separate Guardrails
+service.
+
+The same ownership boundary still applies:
+
+- NeMo Relay decides when managed checks run.
+- Guardrails-native config still decides what to block, allow, or rewrite.
+
+### `plugins.toml` Example
+
+```toml
+version = 1
+
+[[components]]
+kind = "nemo_guardrails"
+enabled = true
+
+[components.config]
+version = 1
+mode = "local"
+codec = "openai_chat"
+input = true
+output = true
+tool_input = true
+tool_output = true
+config_path = "./rails"
+
+[components.config.policy]
+unknown_component = "warn"
+unknown_field = "warn"
+unsupported_value = "error"
+```
+
+This example configures the built-in local mode for a Python-enabled runtime
+that can import `nemoguardrails` and read a native Guardrails config directory
+from `./rails`.
+
+For example, the Guardrails-side policy can look like this:
+
+```yaml
+rails:
+  input:
+    flows:
+      - self check input
+  output:
+    flows:
+      - self check output
+```
+
+This Guardrails-side config defines the policy logic. The NeMo Relay plugin
+config decides when those checks run.
+
+### Rules
+
+When `mode = "local"`:
+
+- Exactly one of `config_path` or `config_yaml` is required.
+- `colang_content` can only be used with `config_yaml`.
+- `remote` settings cannot be present.
+- `request_defaults` is rejected.
+- `local.python_module` is optional and only needed when the runtime should
+  import the Guardrails dependency from a custom Python module path instead of
+  the default `nemoguardrails` package.
+
+### Codec Boundary
+
+The current built-in local mode supports managed LLM execution with:
+
+- `openai_chat`
+- `openai_responses`
+- `anthropic_messages`
+
+### Managed Tool Boundary
+
+The current local mode supports both:
+
+- managed `tool_input`
+- managed `tool_output`
+
+### Streaming Boundary
+
+The current local mode supports streaming LLM input checks before the stream
+callback runs.
+
+When output rails are configured, the current local mode uses Guardrails-native
+streaming output rails instead of buffering the full provider stream. That
+requires `rails.output.streaming.enabled = true` in the Guardrails config.
+
+The current local mode supports the `stream_first = true` streaming semantics:
+provider chunks can still flow to the caller while Guardrails evaluates the
+stream in parallel. If Guardrails later blocks the stream, the call fails at
+that point even though some chunks may already have been delivered.
+
+The current local mode does not support `rails.output.streaming.stream_first = false`
+yet, because that would require converting guarded text chunks back into valid
+provider-shaped stream chunks.
diff --git a/python/nemo_relay/_guardrails_local.py b/python/nemo_relay/_guardrails_local.py
new file mode 100644
index 00000000..5f30eb49
--- /dev/null
+++ b/python/nemo_relay/_guardrails_local.py
@@ -0,0 +1,589 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Internal helpers for the built-in NeMo Guardrails local backend."""
+
+from __future__ import annotations
+
+import asyncio
+import importlib
+import json
+from collections.abc import Callable
+from typing import Any, Protocol, cast
+
+from nemo_relay import Json, LLMRequest
+from nemo_relay.codecs import (
+    AnthropicMessagesCodec,
+    LlmCodec,
+    LlmResponseCodec,
+    OpenAIChatCodec,
+    OpenAIResponsesCodec,
+)
+from nemo_relay.plugin import PluginContext
+
+_DEFAULT_PRIORITY = 100
+
+
+class NeMoGuardrailsDependencyError(RuntimeError):
+    """Raised when the optional ``nemoguardrails`` dependency is unavailable."""
+
+
+class NeMoGuardrailsViolation(RuntimeError):
+    """Raised when NeMo Guardrails blocks or cannot safely apply a rail result."""
+
+    def __init__(
+        self,
+        message: str,
+        *,
+        rail_type: str,
+        rail: str | None = None,
+        content: str | None = None,
+    ) -> None:
+        super().__init__(message)
+        self.rail_type = rail_type
+        self.rail = rail
+        self.content = content
+
+
+class _GuardrailsCodec(LlmCodec, LlmResponseCodec, Protocol):
+    """Codec shape required by the local backend."""
+
+
+_CODECS: dict[str, Callable[[], _GuardrailsCodec]] = {
+    "openai_chat": OpenAIChatCodec,
+    "openai_responses": OpenAIResponsesCodec,
+    "anthropic_messages": AnthropicMessagesCodec,
+}
+
+
+def _load_nemoguardrails(module_name: str | None):
+    root_module = module_name or "nemoguardrails"
+    try:
+        guardrails = cast(Any, importlib.import_module(root_module))
+        options = cast(Any, importlib.import_module(f"{root_module}.rails.llm.options"))
+    except ImportError as error:
+        if error.name == root_module:
+            raise NeMoGuardrailsDependencyError(
+                "NeMo Guardrails is required for the built-in NeMo Guardrails local backend. "
+                "Install it with: pip install nemoguardrails"
+            ) from error
+        raise NeMoGuardrailsDependencyError(
+            "NeMo Guardrails local backend could not import a required dependency: "
+            f"{error.name or error}. Install the full NeMo Guardrails runtime dependencies."
+        ) from error
+
+    return (
+        guardrails.RailsConfig,
+        guardrails.LLMRails,
+        options.RailType,
+        options.RailStatus,
+    )
+
+
+def _status_value(status: Any) -> str:
+    return str(getattr(status, "value", status)).lower()
+
+
+def _messages_from_annotated(annotated: Any) -> list[dict[str, Any]]:
+    return [dict(message) for message in annotated.messages]
+
+
+async def _apply_input_rails(
+    rails: Any,
+    rail_type: Any,
+    rail_status: Any,
+    codec: _GuardrailsCodec,
+    request: LLMRequest,
+) -> tuple[LLMRequest, list[dict[str, Any]]]:
+    annotated_request = codec.decode(request)
+    messages = _messages_from_annotated(annotated_request)
+    input_result = await rails.check_async(messages, rail_types=[rail_type.INPUT])
+    input_status = _status_value(input_result.status)
+    if input_status == _status_value(rail_status.BLOCKED):
+        _raise_blocked(input_result, "input")
+    if input_status == _status_value(rail_status.MODIFIED):
+        input_content = getattr(input_result, "content", "")
+        annotated_request.messages = _replace_last_role_content(
+            messages,
+            "user",
+            "" if input_content is None else str(input_content),
+        )
+        request = codec.encode(annotated_request, request)
+        messages = _messages_from_annotated(annotated_request)
+    return request, messages
+
+
+def _replace_last_role_content(messages: list[dict[str, Any]], role: str, content: str) -> list[dict[str, Any]]:
+    updated = [dict(message) for message in messages]
+    for index in range(len(updated) - 1, -1, -1):
+        if updated[index].get("role") == role:
+            updated[index]["content"] = content
+            return updated
+    raise NeMoGuardrailsViolation(
+        f"NeMo Guardrails returned modified {role} content but no {role} message was present.",
+        rail_type="input" if role == "user" else "output",
+        content=content,
+    )
+
+
+def _tool_input_content(name: str, args: Json) -> str:
+    return json.dumps(
+        {
+            "tool_name": name,
+            "arguments": args,
+        },
+        sort_keys=True,
+        separators=(",", ":"),
+    )
+
+
+def _tool_output_content(name: str, args: Json, result: Json) -> str:
+    return json.dumps(
+        {
+            "tool_name": name,
+            "arguments": args,
+            "result": result,
+        },
+        sort_keys=True,
+        separators=(",", ":"),
+    )
+
+
+def _modified_tool_payload(content: str, field: str) -> Json:
+    try:
+        value = json.loads(content)
+    except json.JSONDecodeError as error:
+        raise NeMoGuardrailsViolation(
+            f"NeMo Guardrails returned modified tool {field} content that is not valid JSON.",
+            rail_type=f"tool_{field}",
+            content=content,
+        ) from error
+
+    if not isinstance(value, dict) or field not in value:
+        raise NeMoGuardrailsViolation(
+            f"NeMo Guardrails returned modified tool {field} content without a '{field}' field.",
+            rail_type=f"tool_{field}",
+            content=content,
+        )
+    return cast(Json, value[field])
+
+
+def _raise_modified_output_not_supported(result: Any) -> None:
+    output_content = getattr(result, "content", "")
+    output_rail = getattr(result, "rail", None)
+    raise NeMoGuardrailsViolation(
+        "NeMo Guardrails output rail returned modified content, but the local backend "
+        "does not rewrite provider responses yet.",
+        rail_type="output",
+        rail=None if output_rail is None else str(output_rail),
+        content="" if output_content is None else str(output_content),
+    )
+
+
+async def _check_output_rails(
+    rails: Any,
+    rail_type: Any,
+    rail_status: Any,
+    messages: list[dict[str, Any]],
+    response_text: str | None,
+) -> None:
+    if response_text is None:
+        return
+
+    output_messages = [*messages, {"role": "assistant", "content": response_text}]
+    output_result = await rails.check_async(output_messages, rail_types=[rail_type.OUTPUT])
+    output_status = _status_value(output_result.status)
+    if output_status == _status_value(rail_status.BLOCKED):
+        _raise_blocked(output_result, "output")
+    if output_status == _status_value(rail_status.MODIFIED):
+        _raise_modified_output_not_supported(output_result)
+
+
+def _has_streaming_output_rails(rails: Any) -> bool:
+    return bool(getattr(rails.config.rails.output, "flows", []))
+
+
+def _output_streaming_config(rails: Any) -> Any | None:
+    return getattr(rails.config.rails.output, "streaming", None)
+
+
+def _guardrails_streaming_enabled(rails: Any) -> bool:
+    streaming = _output_streaming_config(rails)
+    return bool(streaming is not None and getattr(streaming, "enabled", False))
+
+
+def _extract_stream_text(codec_name: str, chunk: Json) -> str | None:
+    if not isinstance(chunk, dict):
+        return None
+
+    if codec_name == "openai_chat":
+        choices = chunk.get("choices")
+        if not isinstance(choices, list):
+            return None
+        parts: list[str] = []
+        for choice in choices:
+            if not isinstance(choice, dict):
+                continue
+            delta = choice.get("delta")
+            if not isinstance(delta, dict):
+                continue
+            content = delta.get("content")
+            if isinstance(content, str) and content:
+                parts.append(content)
+        return "".join(parts) if parts else None
+
+    if codec_name == "openai_responses":
+        if chunk.get("type") == "response.output_text.delta":
+            delta = chunk.get("delta")
+            return delta if isinstance(delta, str) and delta else None
+        return None
+
+    if codec_name == "anthropic_messages":
+        if chunk.get("type") != "content_block_delta":
+            return None
+        delta = chunk.get("delta")
+        if not isinstance(delta, dict):
+            return None
+        if delta.get("type") != "text_delta":
+            return None
+        text = delta.get("text")
+        return text if isinstance(text, str) and text else None
+
+    return None
+
+
+def _guardrails_stream_error_message(chunk: str) -> str | None:
+    try:
+        payload = json.loads(chunk)
+    except json.JSONDecodeError:
+        return None
+    if not isinstance(payload, dict):
+        return None
+    error = payload.get("error")
+    if not isinstance(error, dict):
+        return None
+    if error.get("type") != "guardrails_violation":
+        return None
+    message = error.get("message")
+    return message if isinstance(message, str) and message else "Blocked by output rails."
+
+
+async def _queue_string_stream(queue: "asyncio.Queue[str | None]"):
+    while True:
+        item = await queue.get()
+        if item is None:
+            return
+        yield item
+
+
+async def _monitor_streaming_output_rails(
+    *,
+    rails: Any,
+    messages: list[dict[str, Any]],
+    text_queue: "asyncio.Queue[str | None]",
+    blocked: dict[str, str | None],
+) -> None:
+    guarded_stream = rails.stream_async(
+        messages=messages,
+        generator=_queue_string_stream(text_queue),
+        include_metadata=False,
+    )
+    async for chunk in guarded_stream:
+        if isinstance(chunk, str):
+            message = _guardrails_stream_error_message(chunk)
+            if message is not None:
+                blocked["message"] = message
+                return
+
+
+def _raise_streaming_output_blocked(blocked_message: str) -> None:
+    raise NeMoGuardrailsViolation(
+        f"NeMo Guardrails output rail blocked the LLM call: {blocked_message}",
+        rail_type="output",
+        content=blocked_message,
+    )
+
+
+def _build_guardrails_config(config: dict[str, Any], rails_config_cls: Any) -> Any:
+    if config.get("config_path") is not None:
+        return rails_config_cls.from_path(cast(str, config["config_path"]))
+    return rails_config_cls.from_content(
+        colang_content=cast(str | None, config.get("colang_content")),
+        yaml_content=cast(str, config["config_yaml"]),
+    )
+
+
+def _resolve_codec(config: dict[str, Any]) -> tuple[str, _GuardrailsCodec]:
+    codec_name = cast(str | None, config.get("codec"))
+    if codec_name is None or codec_name not in _CODECS:
+        raise RuntimeError("local NeMo Guardrails backend requires a supported codec")
+    return codec_name, _CODECS[codec_name]()
+
+
+async def _check_tool_input(
+    rails: Any,
+    rail_type: Any,
+    rail_status: Any,
+    tool_name: str,
+    args: Json,
+) -> Json:
+    input_result = await rails.check_async(
+        [{"role": "user", "content": _tool_input_content(tool_name, args)}],
+        rail_types=[rail_type.INPUT],
+    )
+    input_status = _status_value(input_result.status)
+    if input_status == _status_value(rail_status.BLOCKED):
+        _raise_blocked(input_result, "tool_input")
+    if input_status == _status_value(rail_status.MODIFIED):
+        input_content = getattr(input_result, "content", "")
+        return _modified_tool_payload(
+            "" if input_content is None else str(input_content),
+            "arguments",
+        )
+    return args
+
+
+async def _check_tool_output(
+    rails: Any,
+    rail_type: Any,
+    rail_status: Any,
+    tool_name: str,
+    args: Json,
+    result: Json,
+) -> Json:
+    output_result = await rails.check_async(
+        [
+            {"role": "user", "content": _tool_input_content(tool_name, args)},
+            {
+                "role": "assistant",
+                "content": _tool_output_content(tool_name, args, result),
+            },
+        ],
+        rail_types=[rail_type.OUTPUT],
+    )
+    output_status = _status_value(output_result.status)
+    if output_status == _status_value(rail_status.BLOCKED):
+        _raise_blocked(output_result, "tool_output")
+    if output_status == _status_value(rail_status.MODIFIED):
+        output_content = getattr(output_result, "content", "")
+        return _modified_tool_payload(
+            "" if output_content is None else str(output_content),
+            "result",
+        )
+    return result
+
+
+def _make_llm_intercept(
+    *,
+    rails: Any,
+    rail_type: Any,
+    rail_status: Any,
+    codec: _GuardrailsCodec,
+    enable_input: bool,
+    enable_output: bool,
+):
+    async def intercept(_name: str, request: LLMRequest, next_call):
+        current_request = request
+        messages = _messages_from_annotated(codec.decode(current_request))
+
+        if enable_input:
+            current_request, messages = await _apply_input_rails(
+                rails,
+                rail_type,
+                rail_status,
+                codec,
+                current_request,
+            )
+
+        response = await next_call(current_request)
+        if not enable_output:
+            return response
+
+        annotated_response = codec.decode_response(response)
+        await _check_output_rails(
+            rails,
+            rail_type,
+            rail_status,
+            messages,
+            annotated_response.response_text(),
+        )
+        return response
+
+    return intercept
+
+
+def _make_llm_stream_intercept(
+    *,
+    rails: Any,
+    rail_type: Any,
+    rail_status: Any,
+    codec_name: str,
+    codec: _GuardrailsCodec,
+    enable_input: bool,
+    enable_output: bool,
+):
+    async def stream_intercept(request: LLMRequest, next_call):
+        current_request = request
+        messages = _messages_from_annotated(codec.decode(current_request))
+        if enable_input:
+            current_request, messages = await _apply_input_rails(
+                rails,
+                rail_type,
+                rail_status,
+                codec,
+                current_request,
+            )
+
+        stream = await next_call(current_request)
+        if not enable_output:
+            return stream
+        if not _has_streaming_output_rails(rails):
+            return stream
+        if not _guardrails_streaming_enabled(rails):
+            raise RuntimeError(
+                "local NeMo Guardrails streaming output rails require "
+                "rails.output.streaming.enabled = true in the Guardrails config."
+            )
+
+        streaming_config = _output_streaming_config(rails)
+        if streaming_config is None or not getattr(streaming_config, "stream_first", True):
+            raise RuntimeError(
+                "local NeMo Guardrails streaming output rails currently require "
+                "rails.output.streaming.stream_first = true."
+            )
+
+        text_queue: asyncio.Queue[str | None] = asyncio.Queue()
+        blocked: dict[str, str | None] = {"message": None}
+        monitor = asyncio.create_task(
+            _monitor_streaming_output_rails(
+                rails=rails,
+                messages=messages,
+                text_queue=text_queue,
+                blocked=blocked,
+            )
+        )
+
+        async def guarded_provider_stream():
+            try:
+                async for chunk in stream:
+                    if blocked["message"] is not None:
+                        _raise_streaming_output_blocked(blocked["message"])
+
+                    text = _extract_stream_text(codec_name, chunk)
+                    if text is not None:
+                        await text_queue.put(text)
+
+                    yield chunk
+
+                    if blocked["message"] is not None:
+                        _raise_streaming_output_blocked(blocked["message"])
+            finally:
+                await text_queue.put(None)
+                await monitor
+                if blocked["message"] is not None:
+                    _raise_streaming_output_blocked(blocked["message"])
+
+        return guarded_provider_stream()
+
+    return stream_intercept
+
+
+def _make_tool_intercept(
+    *,
+    rails: Any,
+    rail_type: Any,
+    rail_status: Any,
+    enable_tool_input: bool,
+    enable_tool_output: bool,
+):
+    async def tool_intercept(tool_name: str, args: Json, next_call):
+        current_args = args
+
+        if enable_tool_input:
+            current_args = await _check_tool_input(
+                rails,
+                rail_type,
+                rail_status,
+                tool_name,
+                current_args,
+            )
+
+        tool_result = await next_call(current_args)
+        if not enable_tool_output:
+            return tool_result
+
+        return await _check_tool_output(
+            rails,
+            rail_type,
+            rail_status,
+            tool_name,
+            current_args,
+            tool_result,
+        )
+
+    return tool_intercept
+
+
+def _raise_blocked(result: Any, rail_type: str) -> None:
+    rail_value = getattr(result, "rail", None)
+    rail = None if rail_value is None else str(rail_value)
+    content = getattr(result, "content", "")
+    detail = f" by rail '{rail}'" if rail else ""
+    subject = "LLM call" if rail_type in {"input", "output"} else "tool call"
+    raise NeMoGuardrailsViolation(
+        f"NeMo Guardrails {rail_type} rail blocked the {subject}{detail}.",
+        rail_type=rail_type,
+        rail=rail,
+        content="" if content is None else str(content),
+    )
+
+
+def register_local_backend(config: dict[str, Any], context: PluginContext) -> None:
+    """Install the built-in NeMo Guardrails local backend."""
+
+    local = cast(dict[str, Any], config.get("local") or {})
+    module_name = cast(str | None, local.get("python_module"))
+    RailsConfig, LLMRails, RailType, RailStatus = _load_nemoguardrails(module_name)
+    guardrails_config = _build_guardrails_config(config, RailsConfig)
+    rails = LLMRails(guardrails_config)
+    enable_input = bool(config.get("input", True))
+    enable_output = bool(config.get("output", True))
+    enable_tool_input = bool(config.get("tool_input", False))
+    enable_tool_output = bool(config.get("tool_output", False))
+    priority = int(config.get("priority", _DEFAULT_PRIORITY))
+
+    if enable_input or enable_output:
+        codec_name, codec = _resolve_codec(config)
+        intercept = _make_llm_intercept(
+            rails=rails,
+            rail_type=RailType,
+            rail_status=RailStatus,
+            codec=codec,
+            enable_input=enable_input,
+            enable_output=enable_output,
+        )
+        stream_intercept = _make_llm_stream_intercept(
+            rails=rails,
+            rail_type=RailType,
+            rail_status=RailStatus,
+            codec_name=codec_name,
+            codec=codec,
+            enable_input=enable_input,
+            enable_output=enable_output,
+        )
+        context.register_llm_execution_intercept("nemo_guardrails_local", priority, intercept)
+        context.register_llm_stream_execution_intercept(
+            "nemo_guardrails_local_stream",
+            priority,
+            stream_intercept,
+        )
+
+    if enable_tool_input or enable_tool_output:
+        tool_intercept = _make_tool_intercept(
+            rails=rails,
+            rail_type=RailType,
+            rail_status=RailStatus,
+            enable_tool_input=enable_tool_input,
+            enable_tool_output=enable_tool_output,
+        )
+        context.register_tool_execution_intercept("nemo_guardrails_local", priority, tool_intercept)

From ec49259df6bc42e2c70c344c4741c4c322693171 Mon Sep 17 00:00:00 2001
From: Alex Fournier <afournier@nvidia.com>
Date: Mon, 1 Jun 2026 06:48:08 -0700
Subject: [PATCH 2/7] docs: refine local guardrails mode docs

Signed-off-by: Alex Fournier <afournier@nvidia.com>
---
 docs/nemo-guardrails-plugin/about.mdx         | 27 +++++++------
 docs/nemo-guardrails-plugin/configuration.mdx | 39 +++++++++++++++----
 2 files changed, 45 insertions(+), 21 deletions(-)

diff --git a/docs/nemo-guardrails-plugin/about.mdx b/docs/nemo-guardrails-plugin/about.mdx
index 5c0cd2f0..d346fcb0 100644
--- a/docs/nemo-guardrails-plugin/about.mdx
+++ b/docs/nemo-guardrails-plugin/about.mdx
@@ -37,19 +37,18 @@ Start here when you need to:
 
 ## Current Scope
 
-The built-in plugin currently exposes two user-facing modes with
-different boundaries.
-
-| Area | `remote` | `local` |
-|---|---|---|
-| Managed non-streaming LLM `input` | Supported | Supported |
-| Managed non-streaming LLM `output` | Supported | Supported |
-| Managed streaming LLM execution | Supported over the remote HTTP(S) contract | Supported for managed input checks and Guardrails-native output streaming when `rails.output.streaming.enabled = true`; with `stream_first = true`, output rails can stop the stream after some chunks have already been delivered; `stream_first = false` is not supported yet |
-| Managed `tool_input` | Not supported against the stock Guardrails remote contract | Supported |
-| Managed `tool_output` | Supported | Supported |
-| `request_defaults` | Supported as backend pass-through request semantics | Not supported |
-| Codec support | `openai_chat` | `openai_chat`, `openai_responses`, `anthropic_messages` |
-| Runtime availability | Any runtime that includes the remote backend | Python-enabled runtimes that can import `nemoguardrails` |
+The built-in plugin currently exposes two user-facing modes:
+
+- `remote` for Guardrails-service integration over HTTP(S)
+- `local` for in-process `nemoguardrails` integration through the Python runtime
+
+Both modes support managed LLM `input` and `output`. The current mode-specific
+differences are:
+
+- `remote` supports `request_defaults` pass-through but does not support managed
+  `tool_input`
+- `local` supports managed `tool_input` and broader LLM codec coverage, but it
+  does not support `request_defaults`
 
 The `local` backend is a Python-backed runtime feature, not a universal
 cross-binding backend. Runtimes that do not install the local backend provider
@@ -119,5 +118,5 @@ separate managed middleware surfaces in NeMo Relay.
 ## Pages
 
 - [NeMo Guardrails Configuration](/nemo-guardrails-plugin/configuration)
-  documents the built-in component shape, mode boundaries, and current
+  documents the built-in component shape, mode boundaries, and the detailed
   support matrix.
diff --git a/docs/nemo-guardrails-plugin/configuration.mdx b/docs/nemo-guardrails-plugin/configuration.mdx
index b1554e1c..24cc12c6 100644
--- a/docs/nemo-guardrails-plugin/configuration.mdx
+++ b/docs/nemo-guardrails-plugin/configuration.mdx
@@ -61,7 +61,9 @@ At least one managed Guardrails surface must be enabled.
 ## Remote Mode
 
 Use `remote` mode when NeMo Relay should call a Guardrails service over
-HTTP(S).
+HTTP(S), especially when Guardrails must be shared across runtimes, used from
+non-Python environments, or deployed independently from the application
+process.
 
 ### Requirements
 
@@ -76,6 +78,11 @@ to block, allow, or rewrite.
 
 ### `plugins.toml` Example
 
+You can write this config directly in `plugins.toml`, or create and edit it
+through the CLI with `nemo-relay plugins edit`. For plugin file discovery,
+precedence, merge behavior, and editor controls, see
+[Plugin Configuration Files](/build-plugins/plugin-configuration-files).
+
 ```toml
 version = 1
 
@@ -232,6 +239,11 @@ The same ownership boundary still applies:
 
 ### `plugins.toml` Example
 
+You can write this config directly in `plugins.toml`, or create and edit it
+through the CLI with `nemo-relay plugins edit`. For plugin file discovery,
+precedence, merge behavior, and editor controls, see
+[Plugin Configuration Files](/build-plugins/plugin-configuration-files).
+
 ```toml
 version = 1
 
@@ -310,11 +322,24 @@ When output rails are configured, the current local mode uses Guardrails-native
 streaming output rails instead of buffering the full provider stream. That
 requires `rails.output.streaming.enabled = true` in the Guardrails config.
 
-The current local mode supports the `stream_first = true` streaming semantics:
-provider chunks can still flow to the caller while Guardrails evaluates the
-stream in parallel. If Guardrails later blocks the stream, the call fails at
-that point even though some chunks may already have been delivered.
+Guardrails calls the main streaming-output switch
+`rails.output.streaming.stream_first`.
+
+When `stream_first = true`, the current local mode uses pass-through-first
+streaming semantics:
+
+- provider chunks can flow to the caller immediately
+- Guardrails evaluates the streamed text in parallel
+- if Guardrails later blocks the stream, the call fails at that point even
+  though some chunks may already have been delivered
 
 The current local mode does not support `rails.output.streaming.stream_first = false`
-yet, because that would require converting guarded text chunks back into valid
-provider-shaped stream chunks.
+yet. That mode would be Guardrails-first streaming semantics:
+
+- Guardrails would need to evaluate streamed text before chunks are released to
+  the caller
+- the local backend would then need to convert Guardrails-approved text back
+  into valid provider-shaped stream chunks
+
+That guarded-text-to-provider-chunk adapter does not exist yet in the current
+local backend.

From 98d49155906b6ccab70710c854faa22e719660b0 Mon Sep 17 00:00:00 2001
From: Alex Fournier <afournier@nvidia.com>
Date: Mon, 1 Jun 2026 07:03:26 -0700
Subject: [PATCH 3/7] test: factor local guardrails coverage fixtures

Signed-off-by: Alex Fournier <afournier@nvidia.com>
---
 .../python/tests/coverage/coverage_tests.rs   | 256 ++++++++----------
 1 file changed, 112 insertions(+), 144 deletions(-)

diff --git a/crates/python/tests/coverage/coverage_tests.rs b/crates/python/tests/coverage/coverage_tests.rs
index 3e553341..90b792f8 100644
--- a/crates/python/tests/coverage/coverage_tests.rs
+++ b/crates/python/tests/coverage/coverage_tests.rs
@@ -41,6 +41,80 @@ fn load_module<'py>(py: Python<'py>, code: &str) -> Bound<'py, PyModule> {
     PyModule::from_code(py, &code, &file_name, &module_name).unwrap()
 }
 
+fn python_package_dir() -> PathBuf {
+    PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../python")
+}
+
+fn fake_guardrails_module_prelude(module_name: &str, python_dir: &str) -> String {
+    format!(
+        r#"
+import sys
+import types
+
+sys.path.insert(0, {python_dir:?})
+
+MODULE_NAME = {module_name:?}
+
+fake_root = types.ModuleType(MODULE_NAME)
+fake_options = types.ModuleType(MODULE_NAME + ".rails.llm.options")
+
+class Result:
+    def __init__(self, status, content=None, rail=None):
+        self.status = status
+        self.content = content
+        self.rail = rail
+
+class RailType:
+    INPUT = "input"
+    OUTPUT = "output"
+
+class RailStatus:
+    BLOCKED = "blocked"
+    MODIFIED = "modified"
+    PASSED = "passed"
+
+class RailsConfig:
+    @staticmethod
+    def from_content(*, colang_content=None, yaml_content=None):
+        return {{"yaml": yaml_content, "colang": colang_content}}
+
+    @staticmethod
+    def from_path(path):
+        return {{"path": path}}
+"#,
+        python_dir = python_dir,
+        module_name = module_name,
+    )
+}
+
+fn register_fake_guardrails_module_epilogue() -> &'static str {
+    r#"
+fake_root.RailsConfig = RailsConfig
+fake_root.LLMRails = LLMRails
+fake_options.RailType = RailType
+fake_options.RailStatus = RailStatus
+
+sys.modules[MODULE_NAME] = fake_root
+sys.modules[MODULE_NAME + ".rails"] = types.ModuleType(MODULE_NAME + ".rails")
+sys.modules[MODULE_NAME + ".rails.llm"] = types.ModuleType(MODULE_NAME + ".rails.llm")
+sys.modules[MODULE_NAME + ".rails.llm.options"] = fake_options
+"#
+}
+
+fn local_plugin_context_python() -> &'static str {
+    r#"
+class Context:
+    def register_llm_execution_intercept(self, name, priority, callback):
+        self.llm = callback
+
+    def register_llm_stream_execution_intercept(self, name, priority, callback):
+        self.stream = callback
+
+    def register_tool_execution_intercept(self, name, priority, callback):
+        self.tool = callback
+"#
+}
+
 fn make_request() -> LlmRequest {
     LlmRequest {
         headers: serde_json::Map::from_iter([("x-trace".into(), json!("1"))]),
@@ -153,45 +227,24 @@ fn test_native_pymodule_entrypoint_installs_nemo_guardrails_local_provider() {
 fn test_guardrails_local_helper_registers_and_enforces_llm_and_tool_checks() {
     let _python = crate::test_support::init_python_test();
     Python::attach(|py| {
-        let python_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../python");
+        let native_module = PyModule::new(py, "_native_guardrails_helper").unwrap();
+        crate::_native(&native_module).unwrap();
+        let sys = py.import("sys").unwrap();
+        let modules = sys.getattr("modules").unwrap();
+        modules
+            .set_item("nemo_relay._native", native_module.clone())
+            .unwrap();
+
+        let python_dir = python_package_dir();
+        let prelude =
+            fake_guardrails_module_prelude("fake_guardrails_local_helper", &python_dir.display().to_string());
+        let epilogue = register_fake_guardrails_module_epilogue();
+        let context_class = local_plugin_context_python();
         let module = load_module(
             py,
             &format!(
                 r#"
-import pathlib
-import sys
-import types
-
-sys.path.insert(0, {python_dir:?})
-
-MODULE_NAME = "fake_guardrails_local_helper"
-
-fake_root = types.ModuleType(MODULE_NAME)
-fake_options = types.ModuleType(MODULE_NAME + ".rails.llm.options")
-
-class Result:
-    def __init__(self, status, content=None, rail=None):
-        self.status = status
-        self.content = content
-        self.rail = rail
-
-class RailType:
-    INPUT = "input"
-    OUTPUT = "output"
-
-class RailStatus:
-    BLOCKED = "blocked"
-    MODIFIED = "modified"
-    PASSED = "passed"
-
-class RailsConfig:
-    @staticmethod
-    def from_content(*, colang_content=None, yaml_content=None):
-        return {{"yaml": yaml_content, "colang": colang_content}}
-
-    @staticmethod
-    def from_path(path):
-        return {{"path": path}}
+{prelude}
 
 check_results = []
 check_calls = []
@@ -204,32 +257,15 @@ class LLMRails:
         check_calls.append((messages, rail_types))
         return check_results.pop(0)
 
-fake_root.RailsConfig = RailsConfig
-fake_root.LLMRails = LLMRails
-fake_options.RailType = RailType
-fake_options.RailStatus = RailStatus
-
-sys.modules[MODULE_NAME] = fake_root
-sys.modules[MODULE_NAME + ".rails"] = types.ModuleType(MODULE_NAME + ".rails")
-sys.modules[MODULE_NAME + ".rails.llm"] = types.ModuleType(MODULE_NAME + ".rails.llm")
-sys.modules[MODULE_NAME + ".rails.llm.options"] = fake_options
+{epilogue}
 
 from nemo_relay._native import LLMRequest
 from nemo_relay._guardrails_local import register_local_backend
 
-class Context:
-    def register_llm_execution_intercept(self, name, priority, callback):
-        self.llm = callback
-
-    def register_llm_stream_execution_intercept(self, name, priority, callback):
-        self.stream = callback
-
-    def register_tool_execution_intercept(self, name, priority, callback):
-        self.tool = callback
+{context_class}
 
 async def run_case():
     ctx = Context()
-    event_log = []
     register_local_backend(
         {{
             "mode": "local",
@@ -291,7 +327,9 @@ async def run_case():
         "check_calls": check_calls,
     }}
 "#,
-                python_dir = python_dir.display(),
+                prelude = prelude,
+                epilogue = epilogue,
+                context_class = context_class,
             ),
         );
 
@@ -332,40 +370,16 @@ fn test_guardrails_local_helper_enforces_streamed_output_rails() {
             .set_item("nemo_relay._native", native_module.clone())
             .unwrap();
 
-        let python_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../python");
+        let python_dir = python_package_dir();
+        let prelude =
+            fake_guardrails_module_prelude("fake_guardrails_streaming", &python_dir.display().to_string());
+        let epilogue = register_fake_guardrails_module_epilogue();
+        let context_class = local_plugin_context_python();
         let module = load_module(
             py,
             &format!(
                 r#"
-import sys
-import types
-
-sys.path.insert(0, {python_dir:?})
-
-MODULE_NAME = "fake_guardrails_streaming"
-
-fake_root = types.ModuleType(MODULE_NAME)
-fake_options = types.ModuleType(MODULE_NAME + ".rails.llm.options")
-
-class Result:
-    def __init__(self, status, content=None, rail=None):
-        self.status = status
-        self.content = content
-        self.rail = rail
-
-class RailType:
-    INPUT = "input"
-    OUTPUT = "output"
-
-class RailStatus:
-    BLOCKED = "blocked"
-    MODIFIED = "modified"
-    PASSED = "passed"
-
-class RailsConfig:
-    @staticmethod
-    def from_content(*, colang_content=None, yaml_content=None):
-        return {{"yaml": yaml_content}}
+{prelude}
 
 stream_results = []
 event_log = []
@@ -395,28 +409,12 @@ class LLMRails:
                 yield '{{"error": {{"message": "Blocked by output rails: output-policy", "type": "guardrails_violation"}}}}'
         return _run()
 
-fake_root.RailsConfig = RailsConfig
-fake_root.LLMRails = LLMRails
-fake_options.RailType = RailType
-fake_options.RailStatus = RailStatus
-
-sys.modules[MODULE_NAME] = fake_root
-sys.modules[MODULE_NAME + ".rails"] = types.ModuleType(MODULE_NAME + ".rails")
-sys.modules[MODULE_NAME + ".rails.llm"] = types.ModuleType(MODULE_NAME + ".rails.llm")
-sys.modules[MODULE_NAME + ".rails.llm.options"] = fake_options
+{epilogue}
 
 from nemo_relay._native import LLMRequest
 from nemo_relay._guardrails_local import register_local_backend
 
-class Context:
-    def register_llm_execution_intercept(self, name, priority, callback):
-        self.llm = callback
-
-    def register_llm_stream_execution_intercept(self, name, priority, callback):
-        self.stream = callback
-
-    def register_tool_execution_intercept(self, name, priority, callback):
-        self.tool = callback
+{context_class}
 
 async def run_case():
     ctx = Context()
@@ -506,7 +504,9 @@ async def run_case():
         "modified": modified,
     }}
 "#,
-                python_dir = python_dir.display(),
+                prelude = prelude,
+                epilogue = epilogue,
+                context_class = context_class,
             ),
         );
 
@@ -581,40 +581,15 @@ fn test_local_guardrails_provider_initializes_and_enforces_managed_core_calls()
             .set_item("nemo_relay._native", native_module.clone())
             .unwrap();
 
-        let python_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../python");
+        let python_dir = python_package_dir();
+        let prelude =
+            fake_guardrails_module_prelude("fake_guardrails_local_e2e", &python_dir.display().to_string());
+        let epilogue = register_fake_guardrails_module_epilogue();
         let module = load_module(
             py,
             &format!(
                 r#"
-import sys
-import types
-
-sys.path.insert(0, {python_dir:?})
-
-MODULE_NAME = "fake_guardrails_local_e2e"
-
-fake_root = types.ModuleType(MODULE_NAME)
-fake_options = types.ModuleType(MODULE_NAME + ".rails.llm.options")
-
-class Result:
-    def __init__(self, status, content=None, rail=None):
-        self.status = status
-        self.content = content
-        self.rail = rail
-
-class RailType:
-    INPUT = "input"
-    OUTPUT = "output"
-
-class RailStatus:
-    BLOCKED = "blocked"
-    MODIFIED = "modified"
-    PASSED = "passed"
-
-class RailsConfig:
-    @staticmethod
-    def from_content(*, colang_content=None, yaml_content=None):
-        return {{"yaml": yaml_content}}
+{prelude}
 
 check_results = []
 
@@ -625,15 +600,7 @@ class LLMRails:
     async def check_async(self, messages, rail_types):
         return check_results.pop(0)
 
-fake_root.RailsConfig = RailsConfig
-fake_root.LLMRails = LLMRails
-fake_options.RailType = RailType
-fake_options.RailStatus = RailStatus
-
-sys.modules[MODULE_NAME] = fake_root
-sys.modules[MODULE_NAME + ".rails"] = types.ModuleType(MODULE_NAME + ".rails")
-sys.modules[MODULE_NAME + ".rails.llm"] = types.ModuleType(MODULE_NAME + ".rails.llm")
-sys.modules[MODULE_NAME + ".rails.llm.options"] = fake_options
+{epilogue}
 
 import nemo_relay
 
@@ -709,7 +676,8 @@ async def run_case():
         "seen_tool_args": seen_tool_args,
     }}
 "#,
-                python_dir = python_dir.display(),
+                prelude = prelude,
+                epilogue = epilogue,
             ),
         );
         let result_json = with_event_loop(py, |event_loop| {

From 244f29f022929b395fe64ace2e4b0dc428ccea5d Mon Sep 17 00:00:00 2001
From: Alex Fournier <afournier@nvidia.com>
Date: Mon, 1 Jun 2026 09:16:28 -0700
Subject: [PATCH 4/7] style: apply rustfmt for local guardrails tests

Signed-off-by: Alex Fournier <afournier@nvidia.com>
---
 .../core/src/plugins/nemo_guardrails/local.rs  |  3 ++-
 crates/python/tests/coverage/coverage_tests.rs | 18 ++++++++++++------
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/crates/core/src/plugins/nemo_guardrails/local.rs b/crates/core/src/plugins/nemo_guardrails/local.rs
index 31f4e1c8..240ed186 100644
--- a/crates/core/src/plugins/nemo_guardrails/local.rs
+++ b/crates/core/src/plugins/nemo_guardrails/local.rs
@@ -14,7 +14,8 @@ type LocalBackendProvider = Arc<
 static LOCAL_BACKEND_PROVIDER: LazyLock<Mutex<Option<LocalBackendProvider>>> =
     LazyLock::new(|| Mutex::new(None));
 
-fn local_backend_provider_guard() -> PluginResult<MutexGuard<'static, Option<LocalBackendProvider>>> {
+fn local_backend_provider_guard() -> PluginResult<MutexGuard<'static, Option<LocalBackendProvider>>>
+{
     LOCAL_BACKEND_PROVIDER.lock().map_err(|e| {
         PluginError::Internal(format!(
             "NeMo Guardrails local backend provider lock poisoned: {e}"
diff --git a/crates/python/tests/coverage/coverage_tests.rs b/crates/python/tests/coverage/coverage_tests.rs
index 90b792f8..029eee58 100644
--- a/crates/python/tests/coverage/coverage_tests.rs
+++ b/crates/python/tests/coverage/coverage_tests.rs
@@ -236,8 +236,10 @@ fn test_guardrails_local_helper_registers_and_enforces_llm_and_tool_checks() {
             .unwrap();
 
         let python_dir = python_package_dir();
-        let prelude =
-            fake_guardrails_module_prelude("fake_guardrails_local_helper", &python_dir.display().to_string());
+        let prelude = fake_guardrails_module_prelude(
+            "fake_guardrails_local_helper",
+            &python_dir.display().to_string(),
+        );
         let epilogue = register_fake_guardrails_module_epilogue();
         let context_class = local_plugin_context_python();
         let module = load_module(
@@ -371,8 +373,10 @@ fn test_guardrails_local_helper_enforces_streamed_output_rails() {
             .unwrap();
 
         let python_dir = python_package_dir();
-        let prelude =
-            fake_guardrails_module_prelude("fake_guardrails_streaming", &python_dir.display().to_string());
+        let prelude = fake_guardrails_module_prelude(
+            "fake_guardrails_streaming",
+            &python_dir.display().to_string(),
+        );
         let epilogue = register_fake_guardrails_module_epilogue();
         let context_class = local_plugin_context_python();
         let module = load_module(
@@ -582,8 +586,10 @@ fn test_local_guardrails_provider_initializes_and_enforces_managed_core_calls()
             .unwrap();
 
         let python_dir = python_package_dir();
-        let prelude =
-            fake_guardrails_module_prelude("fake_guardrails_local_e2e", &python_dir.display().to_string());
+        let prelude = fake_guardrails_module_prelude(
+            "fake_guardrails_local_e2e",
+            &python_dir.display().to_string(),
+        );
         let epilogue = register_fake_guardrails_module_epilogue();
         let module = load_module(
             py,

From ffa88dcee41143e3bfb4131162155a57ef922876 Mon Sep 17 00:00:00 2001
From: Alex Fournier <afournier@nvidia.com>
Date: Mon, 1 Jun 2026 09:24:48 -0700
Subject: [PATCH 5/7] refactor: name local guardrails imports

Signed-off-by: Alex Fournier <afournier@nvidia.com>
---
 python/nemo_relay/_guardrails_local.py | 41 ++++++++++++++++----------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/python/nemo_relay/_guardrails_local.py b/python/nemo_relay/_guardrails_local.py
index 5f30eb49..86f5946c 100644
--- a/python/nemo_relay/_guardrails_local.py
+++ b/python/nemo_relay/_guardrails_local.py
@@ -9,7 +9,7 @@
 import importlib
 import json
 from collections.abc import Callable
-from typing import Any, Protocol, cast
+from typing import Any, NamedTuple, Protocol, cast
 
 from nemo_relay import Json, LLMRequest
 from nemo_relay.codecs import (
@@ -49,6 +49,15 @@ class _GuardrailsCodec(LlmCodec, LlmResponseCodec, Protocol):
     """Codec shape required by the local backend."""
 
 
+class _GuardrailsRuntimeImports(NamedTuple):
+    """Resolved Python symbols required by the local Guardrails backend."""
+
+    rails_config_cls: Any
+    llm_rails_cls: Any
+    rail_type: Any
+    rail_status: Any
+
+
 _CODECS: dict[str, Callable[[], _GuardrailsCodec]] = {
     "openai_chat": OpenAIChatCodec,
     "openai_responses": OpenAIResponsesCodec,
@@ -56,7 +65,7 @@ class _GuardrailsCodec(LlmCodec, LlmResponseCodec, Protocol):
 }
 
 
-def _load_nemoguardrails(module_name: str | None):
+def _load_nemoguardrails(module_name: str | None) -> _GuardrailsRuntimeImports:
     root_module = module_name or "nemoguardrails"
     try:
         guardrails = cast(Any, importlib.import_module(root_module))
@@ -72,11 +81,11 @@ def _load_nemoguardrails(module_name: str | None):
             f"{error.name or error}. Install the full NeMo Guardrails runtime dependencies."
         ) from error
 
-    return (
-        guardrails.RailsConfig,
-        guardrails.LLMRails,
-        options.RailType,
-        options.RailStatus,
+    return _GuardrailsRuntimeImports(
+        rails_config_cls=guardrails.RailsConfig,
+        llm_rails_cls=guardrails.LLMRails,
+        rail_type=options.RailType,
+        rail_status=options.RailStatus,
     )
 
 
@@ -543,9 +552,9 @@ def register_local_backend(config: dict[str, Any], context: PluginContext) -> No
 
     local = cast(dict[str, Any], config.get("local") or {})
     module_name = cast(str | None, local.get("python_module"))
-    RailsConfig, LLMRails, RailType, RailStatus = _load_nemoguardrails(module_name)
-    guardrails_config = _build_guardrails_config(config, RailsConfig)
-    rails = LLMRails(guardrails_config)
+    runtime_imports = _load_nemoguardrails(module_name)
+    guardrails_config = _build_guardrails_config(config, runtime_imports.rails_config_cls)
+    rails = runtime_imports.llm_rails_cls(guardrails_config)
     enable_input = bool(config.get("input", True))
     enable_output = bool(config.get("output", True))
     enable_tool_input = bool(config.get("tool_input", False))
@@ -556,16 +565,16 @@ def register_local_backend(config: dict[str, Any], context: PluginContext) -> No
         codec_name, codec = _resolve_codec(config)
         intercept = _make_llm_intercept(
             rails=rails,
-            rail_type=RailType,
-            rail_status=RailStatus,
+            rail_type=runtime_imports.rail_type,
+            rail_status=runtime_imports.rail_status,
             codec=codec,
             enable_input=enable_input,
             enable_output=enable_output,
         )
         stream_intercept = _make_llm_stream_intercept(
             rails=rails,
-            rail_type=RailType,
-            rail_status=RailStatus,
+            rail_type=runtime_imports.rail_type,
+            rail_status=runtime_imports.rail_status,
             codec_name=codec_name,
             codec=codec,
             enable_input=enable_input,
@@ -581,8 +590,8 @@ def register_local_backend(config: dict[str, Any], context: PluginContext) -> No
     if enable_tool_input or enable_tool_output:
         tool_intercept = _make_tool_intercept(
             rails=rails,
-            rail_type=RailType,
-            rail_status=RailStatus,
+            rail_type=runtime_imports.rail_type,
+            rail_status=runtime_imports.rail_status,
             enable_tool_input=enable_tool_input,
             enable_tool_output=enable_tool_output,
         )

From f8dead5c41a81bfc58b6c895e396181674b2833a Mon Sep 17 00:00:00 2001
From: Alex Fournier <afournier@nvidia.com>
Date: Mon, 1 Jun 2026 09:36:47 -0700
Subject: [PATCH 6/7] fix: address local guardrails review nits

Signed-off-by: Alex Fournier <afournier@nvidia.com>
---
 crates/python/src/lib.rs                      |  21 +
 crates/python/src/py_plugin.rs                |  18 +-
 .../python/tests/coverage/coverage_tests.rs   | 370 ++++++++++--------
 .../coverage/py_plugin_coverage_tests.rs      |  45 +++
 python/nemo_relay/_guardrails_local.py        |  30 +-
 5 files changed, 307 insertions(+), 177 deletions(-)

diff --git a/crates/python/src/lib.rs b/crates/python/src/lib.rs
index 13d0c29f..4a40eaf8 100644
--- a/crates/python/src/lib.rs
+++ b/crates/python/src/lib.rs
@@ -112,6 +112,10 @@ fn load_guardrails_local_register_fn(py: Python<'_>) -> PyResult<Bound<'_, PyAny
     let module = match py.import("nemo_relay._guardrails_local") {
         Ok(module) => module,
         Err(err) => {
+            if !is_missing_guardrails_local_module(py, &err)? {
+                return Err(err);
+            }
+
             let source_python_dir = guardrails_local_source_python_dir();
             if !source_python_dir.exists() {
                 return Err(err);
@@ -124,6 +128,23 @@ fn load_guardrails_local_register_fn(py: Python<'_>) -> PyResult<Bound<'_, PyAny
     module.getattr("register_local_backend")
 }
 
+fn is_missing_guardrails_local_module(py: Python<'_>, err: &PyErr) -> PyResult<bool> {
+    if !err.is_instance_of::<pyo3::exceptions::PyModuleNotFoundError>(py) {
+        return Ok(false);
+    }
+
+    let err_value = err.value(py);
+    let module_name = err_value
+        .getattr("name")
+        .ok()
+        .and_then(|name| name.extract::<String>().ok());
+
+    Ok(matches!(
+        module_name.as_deref(),
+        Some("nemo_relay") | Some("nemo_relay._guardrails_local")
+    ))
+}
+
 fn guardrails_local_source_python_dir() -> PathBuf {
     PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../python")
 }
diff --git a/crates/python/src/py_plugin.rs b/crates/python/src/py_plugin.rs
index ee668ea1..09209b03 100644
--- a/crates/python/src/py_plugin.rs
+++ b/crates/python/src/py_plugin.rs
@@ -29,7 +29,8 @@ use nemo_relay::api::subscriber::{deregister_subscriber, register_subscriber};
 use nemo_relay::plugin::{
     ConfigDiagnostic, DiagnosticLevel, Plugin, PluginConfig, PluginError, PluginRegistration,
     PluginRegistrationContext, active_plugin_report, clear_plugin_configuration, deregister_plugin,
-    initialize_plugins, list_plugin_kinds, register_plugin, validate_plugin_config,
+    initialize_plugins, list_plugin_kinds, register_plugin, rollback_registrations,
+    validate_plugin_config,
 };
 
 use crate::convert::{json_to_py, py_to_json};
@@ -174,10 +175,17 @@ pub(crate) fn invoke_python_plugin_register(
         namespace_prefix,
     )?;
     let plugin_config_py = plugin_config_to_py(py, plugin_kind, plugin_config)?;
-    register_fn.call1((plugin_config_py, py_ctx.clone_ref(py)))?;
-    {
-        let py_ctx_ref = py_ctx.bind(py).borrow();
-        py_ctx_ref.drain_registrations()
+    match register_fn.call1((plugin_config_py, py_ctx.clone_ref(py))) {
+        Ok(_) => {
+            let py_ctx_ref = py_ctx.bind(py).borrow();
+            py_ctx_ref.drain_registrations()
+        }
+        Err(err) => {
+            if let Ok(mut registrations) = py_ctx.bind(py).borrow().drain_registrations() {
+                rollback_registrations(&mut registrations);
+            }
+            Err(err)
+        }
     }
 }
 
diff --git a/crates/python/tests/coverage/coverage_tests.rs b/crates/python/tests/coverage/coverage_tests.rs
index 029eee58..a104d68c 100644
--- a/crates/python/tests/coverage/coverage_tests.rs
+++ b/crates/python/tests/coverage/coverage_tests.rs
@@ -4,13 +4,13 @@
 //! Coverage tests for coverage in the NeMo Relay Python crate.
 
 use std::ffi::CString;
+use std::panic::{AssertUnwindSafe, catch_unwind};
 use std::path::PathBuf;
 use std::pin::Pin;
 use std::sync::Arc;
 
-use pyo3::ffi::c_str;
 use pyo3::prelude::*;
-use pyo3::types::{IntoPyDict, PyModule};
+use pyo3::types::{PyDict, PyModule};
 use serde_json::{Value as Json, json};
 use tokio_stream::Stream;
 use tokio_stream::StreamExt;
@@ -115,6 +115,59 @@ class Context:
 "#
 }
 
+fn with_isolated_nemo_relay_modules<T>(
+    py: Python<'_>,
+    native_module: &Bound<'_, PyModule>,
+    f: impl FnOnce() -> T,
+) -> T {
+    let sys = py.import("sys").unwrap();
+    let modules = sys
+        .getattr("modules")
+        .unwrap()
+        .cast_into::<PyDict>()
+        .unwrap();
+    let saved_modules = modules
+        .iter()
+        .filter_map(|(name, module)| {
+            let name = name.extract::<String>().ok()?;
+            if name == "nemo_relay" || name.starts_with("nemo_relay.") {
+                Some((name, module.unbind()))
+            } else {
+                None
+            }
+        })
+        .collect::<Vec<_>>();
+
+    clear_nemo_relay_modules(&modules);
+    modules
+        .set_item("nemo_relay._native", native_module.clone())
+        .unwrap();
+
+    let result = catch_unwind(AssertUnwindSafe(f));
+
+    clear_nemo_relay_modules(&modules);
+    for (name, module) in saved_modules {
+        modules.set_item(name, module).unwrap();
+    }
+
+    match result {
+        Ok(value) => value,
+        Err(payload) => std::panic::resume_unwind(payload),
+    }
+}
+
+fn clear_nemo_relay_modules(modules: &Bound<'_, PyDict>) {
+    let module_names = modules
+        .iter()
+        .filter_map(|(name, _)| name.extract::<String>().ok())
+        .filter(|name| name == "nemo_relay" || name.starts_with("nemo_relay."))
+        .collect::<Vec<_>>();
+
+    for name in module_names {
+        modules.del_item(name).unwrap();
+    }
+}
+
 fn make_request() -> LlmRequest {
     LlmRequest {
         headers: serde_json::Map::from_iter([("x-trace".into(), json!("1"))]),
@@ -229,23 +282,19 @@ fn test_guardrails_local_helper_registers_and_enforces_llm_and_tool_checks() {
     Python::attach(|py| {
         let native_module = PyModule::new(py, "_native_guardrails_helper").unwrap();
         crate::_native(&native_module).unwrap();
-        let sys = py.import("sys").unwrap();
-        let modules = sys.getattr("modules").unwrap();
-        modules
-            .set_item("nemo_relay._native", native_module.clone())
-            .unwrap();
 
-        let python_dir = python_package_dir();
-        let prelude = fake_guardrails_module_prelude(
-            "fake_guardrails_local_helper",
-            &python_dir.display().to_string(),
-        );
-        let epilogue = register_fake_guardrails_module_epilogue();
-        let context_class = local_plugin_context_python();
-        let module = load_module(
-            py,
-            &format!(
-                r#"
+        with_isolated_nemo_relay_modules(py, &native_module, || {
+            let python_dir = python_package_dir();
+            let prelude = fake_guardrails_module_prelude(
+                "fake_guardrails_local_helper",
+                &python_dir.display().to_string(),
+            );
+            let epilogue = register_fake_guardrails_module_epilogue();
+            let context_class = local_plugin_context_python();
+            let module = load_module(
+                py,
+                &format!(
+                    r#"
 {prelude}
 
 check_results = []
@@ -329,34 +378,61 @@ async def run_case():
         "check_calls": check_calls,
     }}
 "#,
-                prelude = prelude,
-                epilogue = epilogue,
-                context_class = context_class,
-            ),
-        );
+                    prelude = prelude,
+                    epilogue = epilogue,
+                    context_class = context_class,
+                ),
+            );
 
-        let result_json = with_event_loop(py, |event_loop| {
-            let coroutine = module.getattr("run_case").unwrap().call0().unwrap();
-            let result = event_loop
-                .call_method1("run_until_complete", (coroutine,))
-                .unwrap();
-            crate::convert::py_to_json(&result).unwrap()
-        });
+            let result_json = with_event_loop(py, |event_loop| {
+                let coroutine = module.getattr("run_case").unwrap().call0().unwrap();
+                let result = event_loop
+                    .call_method1("run_until_complete", (coroutine,))
+                    .unwrap();
+                crate::convert::py_to_json(&result).unwrap()
+            });
 
-        assert_eq!(
-            result_json["seen_request_messages"][0],
-            json!("sanitized user")
-        );
-        assert_eq!(result_json["tool_result"], json!({ "ok": true }));
-        assert_eq!(
-            result_json["seen_tool_args"][0],
-            json!({ "city": "Boston" })
-        );
-        assert_eq!(
-            result_json["llm_result"]["choices"][0]["message"]["content"],
-            json!("safe reply")
-        );
-        assert_eq!(result_json["check_calls"].as_array().unwrap().len(), 4);
+            assert_eq!(
+                result_json["seen_request_messages"][0],
+                json!("sanitized user")
+            );
+            assert_eq!(result_json["tool_result"], json!({ "ok": true }));
+            assert_eq!(
+                result_json["seen_tool_args"][0],
+                json!({ "city": "Boston" })
+            );
+            assert_eq!(
+                result_json["llm_result"]["choices"][0]["message"]["content"],
+                json!("safe reply")
+            );
+            assert_eq!(
+                result_json["check_calls"],
+                json!([
+                    [
+                        [{"role": "user", "content": "unsafe"}],
+                        ["input"]
+                    ],
+                    [
+                        [
+                            {"role": "user", "content": "sanitized user"},
+                            {"role": "assistant", "content": "safe reply"}
+                        ],
+                        ["output"]
+                    ],
+                    [
+                        [{"role": "user", "content": "{\"arguments\":{\"city\":\"Phoenix\"},\"tool_name\":\"weather_lookup\"}"}],
+                        ["input"]
+                    ],
+                    [
+                        [
+                            {"role": "user", "content": "{\"arguments\":{\"city\":\"Boston\"},\"tool_name\":\"weather_lookup\"}"},
+                            {"role": "assistant", "content": "{\"arguments\":{\"city\":\"Boston\"},\"result\":{\"raw\":true},\"tool_name\":\"weather_lookup\"}"}
+                        ],
+                        ["output"]
+                    ]
+                ])
+            );
+        });
     });
 }
 
@@ -366,23 +442,19 @@ fn test_guardrails_local_helper_enforces_streamed_output_rails() {
     Python::attach(|py| {
         let native_module = PyModule::new(py, "_native_guardrails_streaming").unwrap();
         crate::_native(&native_module).unwrap();
-        let sys = py.import("sys").unwrap();
-        let modules = sys.getattr("modules").unwrap();
-        modules
-            .set_item("nemo_relay._native", native_module.clone())
-            .unwrap();
 
-        let python_dir = python_package_dir();
-        let prelude = fake_guardrails_module_prelude(
-            "fake_guardrails_streaming",
-            &python_dir.display().to_string(),
-        );
-        let epilogue = register_fake_guardrails_module_epilogue();
-        let context_class = local_plugin_context_python();
-        let module = load_module(
-            py,
-            &format!(
-                r#"
+        with_isolated_nemo_relay_modules(py, &native_module, || {
+            let python_dir = python_package_dir();
+            let prelude = fake_guardrails_module_prelude(
+                "fake_guardrails_streaming",
+                &python_dir.display().to_string(),
+            );
+            let epilogue = register_fake_guardrails_module_epilogue();
+            let context_class = local_plugin_context_python();
+            let module = load_module(
+                py,
+                &format!(
+                    r#"
 {prelude}
 
 stream_results = []
@@ -508,52 +580,53 @@ async def run_case():
         "modified": modified,
     }}
 "#,
-                prelude = prelude,
-                epilogue = epilogue,
-                context_class = context_class,
-            ),
-        );
+                    prelude = prelude,
+                    epilogue = epilogue,
+                    context_class = context_class,
+                ),
+            );
 
-        let result = with_event_loop(py, |event_loop| {
-            let coroutine = module.getattr("run_case").unwrap().call0().unwrap();
-            let result = event_loop
-                .call_method1("run_until_complete", (coroutine,))
-                .unwrap();
-            crate::convert::py_to_json(&result).unwrap()
-        });
-        assert_eq!(
-            result["allowed_chunks"],
-            json!([
-                {"choices": [{"delta": {"content": "hello"}}]},
-                {"choices": [{"delta": {"content": "world"}}]}
-            ])
-        );
-        let event_log = result["event_log"].as_array().unwrap();
-        assert_eq!(
-            &event_log[..6],
-            json!([
-                "source:hello",
-                "yield:hello",
-                "source:world",
-                "yield:world",
-                "guardrails-sees:hello",
-                "guardrails-sees:world",
-            ])
-            .as_array()
-            .unwrap()
-        );
-        assert!(
-            result["blocked"]
-                .as_str()
-                .unwrap()
-                .contains("output rail blocked the LLM call")
-        );
-        assert!(
-            result["modified"]
-                .as_str()
+            let result = with_event_loop(py, |event_loop| {
+                let coroutine = module.getattr("run_case").unwrap().call0().unwrap();
+                let result = event_loop
+                    .call_method1("run_until_complete", (coroutine,))
+                    .unwrap();
+                crate::convert::py_to_json(&result).unwrap()
+            });
+            assert_eq!(
+                result["allowed_chunks"],
+                json!([
+                    {"choices": [{"delta": {"content": "hello"}}]},
+                    {"choices": [{"delta": {"content": "world"}}]}
+                ])
+            );
+            let event_log = result["event_log"].as_array().unwrap();
+            assert_eq!(
+                &event_log[..6],
+                json!([
+                    "source:hello",
+                    "yield:hello",
+                    "source:world",
+                    "yield:world",
+                    "guardrails-sees:hello",
+                    "guardrails-sees:world",
+                ])
+                .as_array()
                 .unwrap()
-                .contains("stream_first = true")
-        );
+            );
+            assert!(
+                result["blocked"]
+                    .as_str()
+                    .unwrap()
+                    .contains("output rail blocked the LLM call")
+            );
+            assert!(
+                result["modified"]
+                    .as_str()
+                    .unwrap()
+                    .contains("stream_first = true")
+            );
+        });
     });
 }
 
@@ -565,36 +638,18 @@ fn test_local_guardrails_provider_initializes_and_enforces_managed_core_calls()
     Python::attach(|py| {
         let native_module = PyModule::new(py, "_native_guardrails_e2e").unwrap();
         crate::_native(&native_module).unwrap();
-        let sys = py.import("sys").unwrap();
-        let modules = sys.getattr("modules").unwrap();
-        let module_names = py
-            .eval(
-                c_str!("list(sys.modules.keys())"),
-                None,
-                Some(&[(c_str!("sys"), sys)].into_py_dict(py).unwrap()),
-            )
-            .unwrap()
-            .extract::<Vec<String>>()
-            .unwrap();
-        for name in module_names {
-            if name == "nemo_relay" || name.starts_with("nemo_relay.") {
-                modules.del_item(name).unwrap();
-            }
-        }
-        modules
-            .set_item("nemo_relay._native", native_module.clone())
-            .unwrap();
 
-        let python_dir = python_package_dir();
-        let prelude = fake_guardrails_module_prelude(
-            "fake_guardrails_local_e2e",
-            &python_dir.display().to_string(),
-        );
-        let epilogue = register_fake_guardrails_module_epilogue();
-        let module = load_module(
-            py,
-            &format!(
-                r#"
+        with_isolated_nemo_relay_modules(py, &native_module, || {
+            let python_dir = python_package_dir();
+            let prelude = fake_guardrails_module_prelude(
+                "fake_guardrails_local_e2e",
+                &python_dir.display().to_string(),
+            );
+            let epilogue = register_fake_guardrails_module_epilogue();
+            let module = load_module(
+                py,
+                &format!(
+                    r#"
 {prelude}
 
 check_results = []
@@ -682,31 +737,32 @@ async def run_case():
         "seen_tool_args": seen_tool_args,
     }}
 "#,
-                prelude = prelude,
-                epilogue = epilogue,
-            ),
-        );
-        let result_json = with_event_loop(py, |event_loop| {
-            let coroutine = module.getattr("run_case").unwrap().call0().unwrap();
-            let result = event_loop
-                .call_method1("run_until_complete", (coroutine,))
-                .unwrap();
-            crate::convert::py_to_json(&result).unwrap()
-        });
+                    prelude = prelude,
+                    epilogue = epilogue,
+                ),
+            );
+            let result_json = with_event_loop(py, |event_loop| {
+                let coroutine = module.getattr("run_case").unwrap().call0().unwrap();
+                let result = event_loop
+                    .call_method1("run_until_complete", (coroutine,))
+                    .unwrap();
+                crate::convert::py_to_json(&result).unwrap()
+            });
 
-        assert_eq!(
-            result_json["llm_result"]["choices"][0]["message"]["content"],
-            json!("safe reply")
-        );
-        assert_eq!(result_json["tool_result"], json!({ "ok": true }));
-        assert_eq!(
-            result_json["seen_request_messages"][0],
-            json!("sanitized user")
-        );
-        assert_eq!(
-            result_json["seen_tool_args"][0],
-            json!({ "city": "Boston" })
-        );
+            assert_eq!(
+                result_json["llm_result"]["choices"][0]["message"]["content"],
+                json!("safe reply")
+            );
+            assert_eq!(result_json["tool_result"], json!({ "ok": true }));
+            assert_eq!(
+                result_json["seen_request_messages"][0],
+                json!("sanitized user")
+            );
+            assert_eq!(
+                result_json["seen_tool_args"][0],
+                json!({ "city": "Boston" })
+            );
+        });
     });
 
     reset_runtime_state();
diff --git a/crates/python/tests/coverage/py_plugin_coverage_tests.rs b/crates/python/tests/coverage/py_plugin_coverage_tests.rs
index f774d5ea..dbb8a21b 100644
--- a/crates/python/tests/coverage/py_plugin_coverage_tests.rs
+++ b/crates/python/tests/coverage/py_plugin_coverage_tests.rs
@@ -792,6 +792,51 @@ async def initialize_plugins(module, config):
     });
 }
 
+#[test]
+fn invoke_python_plugin_register_rolls_back_partial_registrations_on_error() {
+    let _python = crate::test_support::init_python_test();
+    Python::attach(|py| {
+        let helpers = load_module(
+            py,
+            r#"
+def subscriber(event):
+    return None
+
+class FailingPlugin:
+    def register(self, plugin_config, context):
+        context.register_subscriber("sub", subscriber)
+        raise RuntimeError("boom")
+"#,
+        );
+
+        let plugin = helpers.getattr("FailingPlugin").unwrap().call0().unwrap();
+        let register_fn = plugin.getattr("register").unwrap();
+        let namespace_prefix = "rollback.".to_string();
+
+        for _ in 0..2 {
+            let err = invoke_python_plugin_register(
+                py,
+                "demo.rollback",
+                &register_fn,
+                &serde_json::Map::new(),
+                namespace_prefix.clone(),
+            )
+            .unwrap_err();
+            assert!(err.to_string().contains("boom"), "{err}");
+
+            let context = PyPluginContext {
+                registrations: Arc::new(Mutex::new(vec![])),
+                namespace_prefix: namespace_prefix.clone(),
+            };
+            context
+                .register_subscriber("sub", helpers.getattr("subscriber").unwrap().unbind())
+                .unwrap();
+            let mut registrations = context.drain_registrations().unwrap();
+            rollback_registrations(&mut registrations);
+        }
+    });
+}
+
 #[test]
 fn plugin_context_lock_poisoning_covers_error_paths() {
     let _python = crate::test_support::init_python_test();
diff --git a/python/nemo_relay/_guardrails_local.py b/python/nemo_relay/_guardrails_local.py
index 86f5946c..f16f839a 100644
--- a/python/nemo_relay/_guardrails_local.py
+++ b/python/nemo_relay/_guardrails_local.py
@@ -462,21 +462,21 @@ async def stream_intercept(request: LLMRequest, next_call):
             )
 
         text_queue: asyncio.Queue[str | None] = asyncio.Queue()
-        blocked: dict[str, str | None] = {"message": None}
-        monitor = asyncio.create_task(
-            _monitor_streaming_output_rails(
-                rails=rails,
-                messages=messages,
-                text_queue=text_queue,
-                blocked=blocked,
-            )
-        )
+        block_state: dict[str, str | None] = {"message": None}
 
         async def guarded_provider_stream():
+            monitor = asyncio.create_task(
+                _monitor_streaming_output_rails(
+                    rails=rails,
+                    messages=messages,
+                    text_queue=text_queue,
+                    blocked=block_state,
+                )
+            )
             try:
                 async for chunk in stream:
-                    if blocked["message"] is not None:
-                        _raise_streaming_output_blocked(blocked["message"])
+                    if block_state["message"] is not None:
+                        _raise_streaming_output_blocked(block_state["message"])
 
                     text = _extract_stream_text(codec_name, chunk)
                     if text is not None:
@@ -484,13 +484,13 @@ async def guarded_provider_stream():
 
                     yield chunk
 
-                    if blocked["message"] is not None:
-                        _raise_streaming_output_blocked(blocked["message"])
+                    if block_state["message"] is not None:
+                        _raise_streaming_output_blocked(block_state["message"])
             finally:
                 await text_queue.put(None)
                 await monitor
-                if blocked["message"] is not None:
-                    _raise_streaming_output_blocked(blocked["message"])
+                if block_state["message"] is not None:
+                    _raise_streaming_output_blocked(block_state["message"])
 
         return guarded_provider_stream()
 

From 67fd1b912e4da86c8362f6d93e0437933591dd20 Mon Sep 17 00:00:00 2001
From: Alex Fournier <afournier@nvidia.com>
Date: Mon, 1 Jun 2026 12:11:06 -0700
Subject: [PATCH 7/7] test: extend local guardrails cli coverage

Signed-off-by: Alex Fournier <afournier@nvidia.com>
---
 crates/cli/tests/coverage/plugins_tests.rs | 243 ++++++++++++++++++++-
 1 file changed, 242 insertions(+), 1 deletion(-)

diff --git a/crates/cli/tests/coverage/plugins_tests.rs b/crates/cli/tests/coverage/plugins_tests.rs
index 28bf0fd2..502e7b07 100644
--- a/crates/cli/tests/coverage/plugins_tests.rs
+++ b/crates/cli/tests/coverage/plugins_tests.rs
@@ -7,7 +7,7 @@ use nemo_relay::config_editor::{EditorConfig, EditorSchema};
 use nemo_relay::observability::plugin_component::{OBSERVABILITY_PLUGIN_KIND, ObservabilityConfig};
 use nemo_relay::plugin::{ConfigPolicy, PluginComponentSpec, PluginConfig};
 use nemo_relay::plugins::nemo_guardrails::component::{
-    NEMO_GUARDRAILS_PLUGIN_KIND, NeMoGuardrailsConfig, RemoteBackendConfig,
+    LocalBackendConfig, NEMO_GUARDRAILS_PLUGIN_KIND, NeMoGuardrailsConfig, RemoteBackendConfig,
 };
 use nemo_relay_adaptive::AdaptiveConfig;
 use nemo_relay_adaptive::plugin_component::ADAPTIVE_PLUGIN_KIND;
@@ -50,6 +50,40 @@ fn guardrails_component_config(config_id: &str) -> serde_json::Map<String, Value
     .clone()
 }
 
+fn local_guardrails_component_config(config_path: &str) -> serde_json::Map<String, Value> {
+    json!({
+        "mode": "local",
+        "input": false,
+        "output": false,
+        "config_path": config_path,
+        "tool_input": true,
+        "tool_output": true,
+        "local": {
+            "python_module": "custom_guardrails"
+        }
+    })
+    .as_object()
+    .unwrap()
+    .clone()
+}
+
+fn local_llm_guardrails_component_config(config_yaml: &str) -> serde_json::Map<String, Value> {
+    json!({
+        "mode": "local",
+        "codec": "openai_chat",
+        "input": true,
+        "output": true,
+        "config_yaml": config_yaml,
+        "colang_content": "define flow noop\n  pass",
+        "local": {
+            "python_module": "custom_guardrails"
+        }
+    })
+    .as_object()
+    .unwrap()
+    .clone()
+}
+
 #[test]
 fn target_scope_defaults_to_user_and_rejects_conflicts() {
     assert_eq!(
@@ -160,6 +194,24 @@ fn typed_editor_model_contains_nemo_guardrails_options() {
         EditorFieldKind::StringMap
     );
 
+    let local = schema.field("local").unwrap().schema().unwrap();
+    assert_eq!(
+        local.field("python_module").unwrap().kind,
+        EditorFieldKind::String
+    );
+    assert_eq!(
+        schema.field("config_path").unwrap().kind,
+        EditorFieldKind::String
+    );
+    assert_eq!(
+        schema.field("config_yaml").unwrap().kind,
+        EditorFieldKind::String
+    );
+    assert_eq!(
+        schema.field("colang_content").unwrap().kind,
+        EditorFieldKind::String
+    );
+
     let request_defaults = schema.field("request_defaults").unwrap().schema().unwrap();
     let rails = request_defaults.field("rails").unwrap().schema().unwrap();
     assert_eq!(
@@ -1137,6 +1189,98 @@ fn validate_config_accepts_nemo_guardrails_component() {
     validate_config(&config).unwrap();
 }
 
+#[test]
+fn validate_config_accepts_local_tool_only_nemo_guardrails_component() {
+    let config = PluginConfig {
+        components: vec![PluginComponentSpec {
+            kind: NEMO_GUARDRAILS_PLUGIN_KIND.to_string(),
+            enabled: true,
+            config: local_guardrails_component_config("./rails"),
+        }],
+        ..PluginConfig::default()
+    };
+
+    validate_config(&config).unwrap();
+}
+
+#[test]
+fn validate_config_rejects_local_nemo_guardrails_request_defaults() {
+    let config = PluginConfig {
+        components: vec![PluginComponentSpec {
+            kind: NEMO_GUARDRAILS_PLUGIN_KIND.to_string(),
+            enabled: true,
+            config: json!({
+                "mode": "local",
+                "codec": "openai_chat",
+                "input": true,
+                "output": true,
+                "config_yaml": "models: []",
+                "request_defaults": {
+                    "context": {"tenant": "demo"}
+                }
+            })
+            .as_object()
+            .unwrap()
+            .clone(),
+        }],
+        ..PluginConfig::default()
+    };
+
+    let error = validate_config(&config).unwrap_err().to_string();
+    assert!(error.contains("request_defaults"), "error was: {error}");
+    assert!(error.contains("local mode"), "error was: {error}");
+}
+
+#[test]
+fn validate_config_rejects_local_nemo_guardrails_multiple_config_sources() {
+    let config = PluginConfig {
+        components: vec![PluginComponentSpec {
+            kind: NEMO_GUARDRAILS_PLUGIN_KIND.to_string(),
+            enabled: true,
+            config: json!({
+                "mode": "local",
+                "config_path": "./rails",
+                "config_yaml": "models: []"
+            })
+            .as_object()
+            .unwrap()
+            .clone(),
+        }],
+        ..PluginConfig::default()
+    };
+
+    let error = validate_config(&config).unwrap_err().to_string();
+    assert!(
+        error.contains("exactly one of config_path or config_yaml"),
+        "error was: {error}"
+    );
+}
+
+#[test]
+fn validate_config_rejects_local_nemo_guardrails_colang_without_yaml() {
+    let config = PluginConfig {
+        components: vec![PluginComponentSpec {
+            kind: NEMO_GUARDRAILS_PLUGIN_KIND.to_string(),
+            enabled: true,
+            config: json!({
+                "mode": "local",
+                "config_path": "./rails",
+                "colang_content": "define flow noop\n  pass"
+            })
+            .as_object()
+            .unwrap()
+            .clone(),
+        }],
+        ..PluginConfig::default()
+    };
+
+    let error = validate_config(&config).unwrap_err().to_string();
+    assert!(
+        error.contains("colang_content can only be used with config_yaml"),
+        "error was: {error}"
+    );
+}
+
 #[test]
 fn nemo_guardrails_config_map_prunes_default_version() {
     let map = nemo_guardrails_config_map(&NeMoGuardrailsConfig {
@@ -1155,6 +1299,103 @@ fn nemo_guardrails_config_map_prunes_default_version() {
     assert_eq!(map["remote"]["config_id"], json!("default"));
 }
 
+#[test]
+fn write_plugin_config_round_trips_local_nemo_guardrails_component() {
+    let temp = tempfile::tempdir().unwrap();
+    let path = temp.path().join("plugins.toml");
+    let config = PluginConfig {
+        components: vec![PluginComponentSpec {
+            kind: NEMO_GUARDRAILS_PLUGIN_KIND.to_string(),
+            enabled: true,
+            config: local_guardrails_component_config("./rails"),
+        }],
+        ..PluginConfig::default()
+    };
+
+    write_plugin_config(&path, &config).unwrap();
+
+    let rendered = std::fs::read_to_string(&path).unwrap();
+    assert!(rendered.contains("mode = \"local\""));
+    assert!(rendered.contains("config_path = \"./rails\""));
+    assert!(rendered.contains("tool_input = true"));
+    assert!(rendered.contains("python_module = \"custom_guardrails\""));
+
+    let round_tripped = read_plugin_config(&path).unwrap();
+    let guardrails = round_tripped
+        .components
+        .iter()
+        .find(|component| component.kind == NEMO_GUARDRAILS_PLUGIN_KIND)
+        .unwrap();
+    assert!(guardrails.enabled);
+    assert_eq!(guardrails.config["mode"], json!("local"));
+    assert_eq!(guardrails.config["config_path"], json!("./rails"));
+    assert_eq!(guardrails.config["tool_input"], json!(true));
+    assert_eq!(
+        guardrails.config["local"]["python_module"],
+        json!("custom_guardrails")
+    );
+}
+
+#[test]
+fn nemo_guardrails_config_map_serializes_local_mode_fields() {
+    let map = nemo_guardrails_config_map(&NeMoGuardrailsConfig {
+        mode: "local".into(),
+        config_path: Some("./rails".into()),
+        tool_input: true,
+        tool_output: true,
+        local: Some(LocalBackendConfig {
+            python_module: Some("custom_guardrails".into()),
+        }),
+        ..NeMoGuardrailsConfig::default()
+    })
+    .unwrap();
+
+    assert!(!map.contains_key("version"));
+    assert_eq!(map.get("mode"), Some(&json!("local")));
+    assert_eq!(map.get("config_path"), Some(&json!("./rails")));
+    assert_eq!(map.get("tool_input"), Some(&json!(true)));
+    assert_eq!(map["local"]["python_module"], json!("custom_guardrails"));
+}
+
+#[test]
+fn write_plugin_config_round_trips_local_llm_nemo_guardrails_component() {
+    let temp = tempfile::tempdir().unwrap();
+    let path = temp.path().join("plugins.toml");
+    let config = PluginConfig {
+        components: vec![PluginComponentSpec {
+            kind: NEMO_GUARDRAILS_PLUGIN_KIND.to_string(),
+            enabled: true,
+            config: local_llm_guardrails_component_config("models: []"),
+        }],
+        ..PluginConfig::default()
+    };
+
+    write_plugin_config(&path, &config).unwrap();
+
+    let rendered = std::fs::read_to_string(&path).unwrap();
+    assert!(rendered.contains("mode = \"local\""));
+    assert!(rendered.contains("codec = \"openai_chat\""));
+    assert!(rendered.contains("input = true"));
+    assert!(rendered.contains("output = true"));
+    assert!(rendered.contains("config_yaml = \"models: []\""));
+
+    let round_tripped = read_plugin_config(&path).unwrap();
+    let guardrails = round_tripped
+        .components
+        .iter()
+        .find(|component| component.kind == NEMO_GUARDRAILS_PLUGIN_KIND)
+        .unwrap();
+    assert_eq!(guardrails.config["mode"], json!("local"));
+    assert_eq!(guardrails.config["codec"], json!("openai_chat"));
+    assert_eq!(guardrails.config["input"], json!(true));
+    assert_eq!(guardrails.config["output"], json!(true));
+    assert_eq!(guardrails.config["config_yaml"], json!("models: []"));
+    assert_eq!(
+        guardrails.config["colang_content"],
+        json!("define flow noop\n  pass")
+    );
+}
+
 #[test]
 fn display_helpers_render_scalars_json_and_defaults() {
     assert_eq!(display_value(&json!("logs")), "logs");