networmix
diff --git a/‎docs/reference/api-full.md‎
Lines changed: 35 additions & 5 deletions b/‎docs/reference/api-full.md‎
Lines changed: 35 additions & 5 deletions
diff --git a/‎docs/reference/api.md‎
Lines changed: 26 additions & 7 deletions b/‎docs/reference/api.md‎
Lines changed: 26 additions & 7 deletions
diff --git a/‎docs/reference/dsl.md‎
Lines changed: 35 additions & 20 deletions b/‎docs/reference/dsl.md‎
Lines changed: 35 additions & 20 deletions
diff --git a/‎ngraph/failure_manager.py‎
Lines changed: 28 additions & 11 deletions b/‎ngraph/failure_manager.py‎
Lines changed: 28 additions & 11 deletions
diff --git a/‎ngraph/failure_policy.py‎
Lines changed: 31 additions & 0 deletions b/‎ngraph/failure_policy.py‎
Lines changed: 31 additions & 0 deletions
@@ -10,7 +10,7 @@ For a curated, example-driven API guide, see **[api.md](api.md)**.
 > - **[CLI Reference](cli.md)** - Command-line interface
 > - **[DSL Reference](dsl.md)** - YAML syntax guide
 
-**Generated from source code on:** June 13, 2025 at 03:15 UTC
+**Generated from source code on:** June 13, 2025 at 10:43 UTC
 
 **Modules auto-discovered:** 37
 
@@ -313,14 +313,15 @@ repeats multiple times for Monte Carlo experiments.
 Attributes:
     network (Network): The underlying network to mutate (enable/disable nodes/links).
     traffic_matrix_set (TrafficMatrixSet): Traffic matrices to place after failures.
+    failure_policy_set (FailurePolicySet): Set of named failure policies.
     matrix_name (Optional[str]): Name of specific matrix to use, or None for default.
-    failure_policy (Optional[FailurePolicy]): The policy describing what fails.
+    policy_name (Optional[str]): Name of specific failure policy to use, or None for default.
     default_flow_policy_config: The default flow policy for any demands lacking one.
 
 **Methods:**
 
 - `apply_failures(self) -> 'None'`
-  - Apply the current failure_policy to self.network (in-place).
+  - Apply the current failure policy to self.network (in-place).
 - `run_monte_carlo_failures(self, iterations: 'int', parallelism: 'int' = 1) -> 'Dict[str, Any]'`
   - Repeatedly applies (randomized) failures to the network and accumulates
 - `run_single_failure_scenario(self) -> 'List[TrafficResult]'`
@@ -401,6 +402,8 @@ Attributes:
 
 - `apply_failures(self, network_nodes: 'Dict[str, Any]', network_links: 'Dict[str, Any]', network_risk_groups: 'Dict[str, Any] | None' = None) -> 'List[str]'`
   - Identify which entities fail given the defined rules, then optionally
+- `to_dict(self) -> 'Dict[str, Any]'`
+  - Convert to dictionary for JSON serialization.
 
 ### FailureRule
 
@@ -633,6 +636,33 @@ Attributes:
 - `to_dict(self) -> 'dict[str, Any]'`
   - Convert to dictionary for JSON serialization.
 
+### FailurePolicySet
+
+Named collection of FailurePolicy objects.
+
+This mutable container maps failure policy names to FailurePolicy objects,
+allowing management of multiple failure policies for analysis.
+
+Attributes:
+    policies: Dictionary mapping failure policy names to FailurePolicy objects.
+
+**Attributes:**
+
+- `policies` (dict[str, 'FailurePolicy']) = {}
+
+**Methods:**
+
+- `add(self, name: 'str', policy: "'FailurePolicy'") -> 'None'`
+  - Add a failure policy to the collection.
+- `get_all_policies(self) -> "list['FailurePolicy']"`
+  - Get all failure policies from the collection.
+- `get_default_policy(self) -> "'FailurePolicy | None'"`
+  - Get the default failure policy.
+- `get_policy(self, name: 'str') -> "'FailurePolicy'"`
+  - Get a specific failure policy by name.
+- `to_dict(self) -> 'dict[str, Any]'`
+  - Convert to dictionary for JSON serialization.
+
 ### PlacementResultSet
 
 Aggregated traffic placement results from one or many runs.
@@ -693,7 +723,7 @@ Represents a complete scenario for building and executing network workflows.
 
 This scenario includes:
   - A network (nodes/links), constructed via blueprint expansion.
-  - A failure policy (one or more rules).
+  - A failure policy set (one or more named failure policies).
   - A traffic matrix set containing one or more named traffic matrices.
   - A list of workflow steps to execute.
   - A results container for storing outputs.
@@ -708,8 +738,8 @@ Typical usage example:
 **Attributes:**
 
 - `network` (Network)
-- `failure_policy` (Optional[FailurePolicy])
 - `workflow` (List[WorkflowStep])
+- `failure_policy_set` (FailurePolicySet) = FailurePolicySet(policies={})
 - `traffic_matrix_set` (TrafficMatrixSet) = TrafficMatrixSet(matrices={})
 - `results` (Results) = Results(_store={})
 - `components_library` (ComponentsLibrary) = ComponentsLibrary(components={})
 
@@ -125,16 +125,35 @@ demand = TrafficDemand(
 
 ## Failure Modeling
 
-### FailurePolicy
-Configure failure simulation parameters.
+### FailurePolicy and FailurePolicySet
+Configure failure simulation parameters using named policies.
 
 ```python
-from ngraph.failure_policy import FailurePolicy
+from ngraph.failure_policy import FailurePolicy, FailureRule
+from ngraph.results_artifacts import FailurePolicySet
+
+# Create individual failure rules
+rule = FailureRule(
+    entity_scope="link",
+    rule_type="choice",
+    count=2
+)
 
-policy = FailurePolicy(
-    enable_failures=True,
-    max_concurrent_failures=2,
-    failure_probability=0.01
+# Create failure policy
+policy = FailurePolicy(rules=[rule])
+
+# Create policy set to manage multiple policies
+policy_set = FailurePolicySet()
+policy_set.add("light_failures", policy)
+policy_set.add("default", policy)
+
+# Use with FailureManager
+from ngraph.failure_manager import FailureManager
+manager = FailureManager(
+    network=network,
+    traffic_matrix_set=traffic_matrix_set,
+    failure_policy_set=policy_set,
+    policy_name="light_failures"  # Optional: specify which policy to use
 )
 ```
 
 
@@ -15,7 +15,7 @@ The main sections of a scenario YAML file work together to define a complete net
 - `components`: **[Optional]** A library of hardware and optics definitions with attributes like power consumption.
 - `risk_groups`: **[Optional]** Defines groups of components that might fail together (e.g., all components in a rack or multiple parallel links sharing the same DWDM transmission).
 - `traffic_matrix_set`: **[Optional]** Defines traffic demand matrices between network nodes with various placement policies.
-- `failure_policy`: **[Optional]** Specifies availability parameters and rules for simulating network failures.
+- `failure_policy_set`: **[Optional]** Specifies named failure policies and rules for simulating network failures.
 - `workflow`: **[Optional]** A list of steps to be executed, such as building graphs, running simulations, or performing analyses.
 
 ## `network` - Core Foundation
@@ -398,30 +398,45 @@ traffic_matrix_set:
 
 - **`full_mesh`**: Creates individual demands for each (source_node, sink_node) pair, excluding self-pairs (where source equals sink). The total demand volume is split evenly among all valid pairs. This is useful for modeling distributed traffic patterns where every source communicates with every sink.
 
-## `failure_policy` - Failure Simulation
+## `failure_policy_set` - Failure Simulation
 
-Defines how network failures are simulated to test resilience and analyze failure scenarios.
+Defines named failure policies for simulating network failures to test resilience and analyze failure scenarios. Each policy contains rules and configuration for how failures are applied.
 
 ```yaml
-failure_policy:
-  name: "PolicyName" # Optional
-  fail_shared_risk_groups: true | false
-  fail_risk_group_children: true | false
-  use_cache: true | false
-  attrs: # Optional custom attributes for the policy
-    custom_key: value
-  rules:
-    - entity_scope: "node" | "link" | "risk_group"
-      conditions: # Optional: list of conditions to select entities
-        - attr: "attribute_name"
-          operator: "==" | "!=" | ">" | "<" | ">=" | "<=" | "contains" | "not_contains" | "any_value" | "no_value"
-          value: "some_value"
-      logic: "and" | "or" | "any" # How to combine conditions
-      rule_type: "all" | "choice" | "random" # How to select entities matching conditions
-      count: N # For 'choice' rule_type
-      probability: P # For 'random' rule_type (0.0 to 1.0)
+failure_policy_set:
+  policy_name_1:
+    name: "PolicyName" # Optional
+    fail_shared_risk_groups: true | false
+    fail_risk_group_children: true | false
+    use_cache: true | false
+    attrs: # Optional custom attributes for the policy
+      custom_key: value
+    rules:
+      - entity_scope: "node" | "link" | "risk_group"
+        conditions: # Optional: list of conditions to select entities
+          - attr: "attribute_name"
+            operator: "==" | "!=" | ">" | "<" | ">=" | "<=" | "contains" | "not_contains" | "any_value" | "no_value"
+            value: "some_value"
+        logic: "and" | "or" | "any" # How to combine conditions
+        rule_type: "all" | "choice" | "random" # How to select entities matching conditions
+        count: N # For 'choice' rule_type
+        probability: P # For 'random' rule_type (0.0 to 1.0)
+  policy_name_2:
+    # Another failure policy...
+  default:
+    # Default failure policy (used when no specific policy is selected)
+    rules:
+      - entity_scope: "link"
+        rule_type: "choice"
+        count: 1
 ```
 
+**Policy Selection:**
+
+- If a `default` policy exists, it will be used when no specific policy is selected
+- If only one policy exists and no `default` is specified, that policy becomes the default
+- Multiple policies allow testing different failure scenarios in the same network
+
 ## `workflow` - Execution Steps
 
 A list of operations to perform on the network. Each step has a `step_type` and specific arguments. This section defines the analysis workflow to be executed.
 
@@ -5,10 +5,9 @@
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Any, Dict, List, Optional, Tuple
 
-from ngraph.failure_policy import FailurePolicy
 from ngraph.lib.flow_policy import FlowPolicyConfig
 from ngraph.network import Network
-from ngraph.results_artifacts import TrafficMatrixSet
+from ngraph.results_artifacts import FailurePolicySet, TrafficMatrixSet
 from ngraph.traffic_manager import TrafficManager, TrafficResult
 
 
@@ -19,47 +18,65 @@ class FailureManager:
     Attributes:
         network (Network): The underlying network to mutate (enable/disable nodes/links).
         traffic_matrix_set (TrafficMatrixSet): Traffic matrices to place after failures.
+        failure_policy_set (FailurePolicySet): Set of named failure policies.
         matrix_name (Optional[str]): Name of specific matrix to use, or None for default.
-        failure_policy (Optional[FailurePolicy]): The policy describing what fails.
+        policy_name (Optional[str]): Name of specific failure policy to use, or None for default.
         default_flow_policy_config: The default flow policy for any demands lacking one.
     """
 
     def __init__(
         self,
         network: Network,
         traffic_matrix_set: TrafficMatrixSet,
+        failure_policy_set: FailurePolicySet,
         matrix_name: Optional[str] = None,
-        failure_policy: Optional[FailurePolicy] = None,
+        policy_name: Optional[str] = None,
         default_flow_policy_config: Optional[FlowPolicyConfig] = None,
     ) -> None:
         """Initialize a FailureManager.
 
         Args:
             network: The Network to be modified by failures.
             traffic_matrix_set: Traffic matrices containing demands to place after failures.
+            failure_policy_set: Set of named failure policies.
             matrix_name: Name of specific matrix to use. If None, uses default matrix.
-            failure_policy: A FailurePolicy specifying the rules of what fails.
+            policy_name: Name of specific failure policy to use. If None, uses default policy.
             default_flow_policy_config: Default FlowPolicyConfig if demands do not specify one.
         """
         self.network = network
         self.traffic_matrix_set = traffic_matrix_set
+        self.failure_policy_set = failure_policy_set
         self.matrix_name = matrix_name
-        self.failure_policy = failure_policy
+        self.policy_name = policy_name
         self.default_flow_policy_config = default_flow_policy_config
 
     def apply_failures(self) -> None:
-        """Apply the current failure_policy to self.network (in-place).
+        """Apply the current failure policy to self.network (in-place).
 
-        If failure_policy is None, this method does nothing.
+        If failure_policy_set is empty or no valid policy is found, this method does nothing.
         """
-        if not self.failure_policy:
-            return
+        # Check if we have any policies
+        if len(self.failure_policy_set.policies) == 0:
+            return  # No policies, do nothing
+
+        # Get the failure policy to use
+        if self.policy_name:
+            # Use specific named policy
+            try:
+                failure_policy = self.failure_policy_set.get_policy(self.policy_name)
+            except KeyError:
+                return  # Policy not found, do nothing
+        else:
+            # Use default policy
+            failure_policy = self.failure_policy_set.get_default_policy()
+            if failure_policy is None:
+                return  # No default policy, do nothing
 
         # Collect node/links as dicts {id: attrs}, matching FailurePolicy expectations
         node_map = {n_name: n.attrs for n_name, n in self.network.nodes.items()}
         link_map = {link_id: link.attrs for link_id, link in self.network.links.items()}
 
-        failed_ids = self.failure_policy.apply_failures(node_map, link_map)
+        failed_ids = failure_policy.apply_failures(node_map, link_map)
 
         # Disable the failed entities
         for f_id in failed_ids:
 
@@ -365,6 +365,37 @@ def _expand_failed_risk_group_children(
                     failed_rgs.add(child_name)
                     queue.append(child_name)
 
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for JSON serialization.
+
+        Returns:
+            Dictionary representation with all fields as JSON-serializable primitives.
+        """
+        return {
+            "rules": [
+                {
+                    "entity_scope": rule.entity_scope,
+                    "conditions": [
+                        {
+                            "attr": cond.attr,
+                            "operator": cond.operator,
+                            "value": cond.value,
+                        }
+                        for cond in rule.conditions
+                    ],
+                    "logic": rule.logic,
+                    "rule_type": rule.rule_type,
+                    "probability": rule.probability,
+                    "count": rule.count,
+                }
+                for rule in self.rules
+            ],
+            "attrs": self.attrs,
+            "fail_shared_risk_groups": self.fail_shared_risk_groups,
+            "fail_risk_group_children": self.fail_risk_group_children,
+            "use_cache": self.use_cache,
+        }
+
 
 def _evaluate_condition(entity_attrs: Dict[str, Any], cond: FailureCondition) -> bool:
     """Evaluate a single FailureCondition against entity attributes.