From 408619fc0b397b172947bf17115ff52f68c68001 Mon Sep 17 00:00:00 2001 From: Harinadh Saladi Date: Fri, 6 Feb 2026 05:27:20 +0000 Subject: [PATCH 1/2] Added check for CSCwo96334: /tmp directory disk space for snapshot storage --- aci-preupgrade-validation-script.py | 52 ++++++++ docs/docs/validations.md | 37 +++++- .../faultInst_mixed.json | 32 +++++ .../faultInst_non_tmp.json | 22 ++++ .../faultInst_tmp_pos.json | 32 +++++ .../test_tmp_dir_snapshot_storage_check.py | 119 ++++++++++++++++++ 6 files changed, 293 insertions(+), 1 deletion(-) create mode 100644 tests/checks/tmp_dir_snapshot_storage_check/faultInst_mixed.json create mode 100644 tests/checks/tmp_dir_snapshot_storage_check/faultInst_non_tmp.json create mode 100644 tests/checks/tmp_dir_snapshot_storage_check/faultInst_tmp_pos.json create mode 100644 tests/checks/tmp_dir_snapshot_storage_check/test_tmp_dir_snapshot_storage_check.py diff --git a/aci-preupgrade-validation-script.py b/aci-preupgrade-validation-script.py index bfca5bb..ad23ffc 100644 --- a/aci-preupgrade-validation-script.py +++ b/aci-preupgrade-validation-script.py @@ -5962,6 +5962,57 @@ def configpush_shard_check(tversion, **kwargs): return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url) + +@check_wrapper(check_title='/tmp directory disk space for snapshot storage during upgrade') +def tmp_dir_snapshot_storage_check(tversion, **kwargs): + result = FAIL_UF + headers = ['Fault', 'Pod', 'Node', 'Mount Point', 'Current Usage %', 'Recommended Action'] + data = [] + unformatted_headers = ['Fault', 'Fault DN', 'Recommended Action'] + unformatted_data = [] + recommended_action = 'Contact Cisco TAC for assistance. The /tmp directory may need cleanup or the upgrade may require special handling.' + doc_url = 'https://datacenter.github.io/ACI-Pre-Upgrade-Validation-Script/validations/#tmp-directory-snapshot-storage' + + if not tversion: + return Result(result=MANUAL, msg=TVER_MISSING) + + if tversion.older_than("6.1(4a)"): + dn_regex = node_regex + r'/.+p-\[(?P.+)\]-f' + desc_regex = r'is (?P\d{2,3}%) full' + + # Query for F1527, F1528, or F1529 faults + faultInsts = icurl('class', + 'faultInst.json?query-target-filter=or(eq(faultInst.code,"F1527"),eq(faultInst.code,"F1528"),eq(faultInst.code,"F1529"))') + + for faultInst in faultInsts: + fc = faultInst['faultInst']['attributes']['code'] + dn = re.search(dn_regex, faultInst['faultInst']['attributes']['dn']) + desc = re.search(desc_regex, faultInst['faultInst']['attributes']['descr']) + + # Only flag /tmp directory issues for this check + if dn and desc and dn.group('mountpoint') == '/tmp': + data.append([fc, dn.group('pod'), dn.group('node'), dn.group('mountpoint'), + desc.group('usage'), recommended_action]) + elif dn and dn.group('mountpoint') == '/tmp': + # If we can parse DN but not description, still report it + unformatted_data.append([fc, faultInst['faultInst']['attributes']['dn'], recommended_action]) + + if not data and not unformatted_data: + result = PASS + else: + result = NA + + return Result( + result=result, + headers=headers, + data=data, + unformatted_headers=unformatted_headers, + unformatted_data=unformatted_data, + recommended_action=recommended_action, + doc_url=doc_url, + ) + + # ---- Script Execution ---- @@ -6069,6 +6120,7 @@ class CheckManager: scalability_faults_check, fabric_port_down_check, equipment_disk_limits_exceeded, + tmp_dir_snapshot_storage_check, # Configurations vpc_paired_switches_check, diff --git a/docs/docs/validations.md b/docs/docs/validations.md index e395564..7767974 100644 --- a/docs/docs/validations.md +++ b/docs/docs/validations.md @@ -190,7 +190,8 @@ Items | Defect | This Script [Observer Database Size][d25] | CSCvw45531 | :white_check_mark: | :no_entry_sign: [Stale pconsRA Object][d26] | CSCwp22212 | :warning:{title="Deprecated"} | :no_entry_sign: [ISIS DTEPs Byte Size][d27] | CSCwp15375 | :white_check_mark: | :no_entry_sign: -[Policydist configpushShardCont Crash][d28] | CSCwp95515 | :white_check_mark: | +[Policydist configpushShardCont Crash][d28] | CSCwp95515 | :white_check_mark: | :no_entry_sign: +[/tmp Directory Disk Space for Snapshot Storage][d29] | CSCwo96334 | :white_check_mark: | :no_entry_sign: [d1]: #ep-announce-compatibility [d2]: #eventmgr-db-size-defect-susceptibility @@ -220,6 +221,7 @@ Items | Defect | This Script [d26]: #stale-pconsra-object [d27]: #isis-dteps-byte-size [d28]: #policydist-configpushshardcont-crash +[d29]: #tmp-directory-disk-space-for-snapshot-storage ## General Check Details @@ -2604,6 +2606,38 @@ Due to [CSCwp95515][59], upgrading to an affected version while having any `conf If any instances of `configpushShardCont` are flagged by this script, Cisco TAC must be contacted to identify and resolve the underlying issue before performing the upgrade. +### /tmp Directory Disk Space for Snapshot Storage + +Prior to ACI version 6.1(4), the APIC uses the `/tmp` directory to store database snapshots during the upgrade process. If the `/tmp` directory has insufficient free space (typically indicated by disk space faults F1527, F1528, or F1529), the upgrade process may fail due to inability to create required snapshot files. + +Due to [CSCwo96334][60], starting from ACI version 6.1(4), snapshots are stored in `/data` directory instead of `/tmp`, which provides more available disk space and resolves this issue. + +This check monitors the `/tmp` directory utilization on APICs by querying for the following faults: + +- **F1527** (Minor): Storage unit is 75-84% full +- **F1528** (Major): Storage unit is 85-89% full +- **F1529** (Critical): Storage unit is 90-100% full + +**Impact:** + +If `/tmp` is at or above 75% utilization when upgrading to versions prior to 6.1(4), the upgrade may fail when attempting to create database snapshots. This can result in: + +- Upgrade workflow failure +- Inability to complete APIC database conversion +- Potential need for manual cleanup and upgrade retry + +**Recommended Action:** + +If this check flags high `/tmp` utilization: + +1. Contact Cisco TAC for assistance before proceeding with the upgrade +2. Work with TAC to identify and remove unnecessary files from `/tmp` +3. Consider upgrading to ACI 6.1(4) or later where snapshots use `/data` directory instead +4. Ensure at least 25-30% free space in `/tmp` before attempting upgrade to pre-6.1(4) versions + +**Note:** This check only applies when upgrading to versions older than 6.1(4). For upgrades to 6.1(4) or later, this check returns N/A as the issue is resolved in those versions. + + [0]: https://github.com/datacenter/ACI-Pre-Upgrade-Validation-Script [1]: https://www.cisco.com/c/dam/en/us/td/docs/Website/datacenter/apicmatrix/index.html [2]: https://www.cisco.com/c/en/us/support/switches/nexus-9000-series-switches/products-release-notes-list.html @@ -2664,5 +2698,6 @@ If any instances of `configpushShardCont` are flagged by this script, Cisco TAC [57]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwp22212 [58]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwp15375 [59]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwp95515 +[60]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwo96334 [60]: https://www.cisco.com/c/en/us/solutions/collateral/data-center-virtualization/application-centric-infrastructure/white-paper-c11-743951.html#Inter [61]: https://www.cisco.com/c/en/us/solutions/collateral/data-center-virtualization/application-centric-infrastructure/white-paper-c11-743951.html#EnablePolicyCompression diff --git a/tests/checks/tmp_dir_snapshot_storage_check/faultInst_mixed.json b/tests/checks/tmp_dir_snapshot_storage_check/faultInst_mixed.json new file mode 100644 index 0000000..008d00a --- /dev/null +++ b/tests/checks/tmp_dir_snapshot_storage_check/faultInst_mixed.json @@ -0,0 +1,32 @@ +[ + { + "faultInst": { + "attributes": { + "cause": "threshold-crossed", + "code": "F1529", + "descr": "Storage unit /tmp on Node 1 of pod 1 is 92% full", + "dn": "topology/pod-1/node-1/sys/ch/p-[/tmp]-fault-F1529" + } + } + }, + { + "faultInst": { + "attributes": { + "cause": "threshold-crossed", + "code": "F1527", + "descr": "Storage unit /firmware on Node 2 of pod 1 is 76% full", + "dn": "topology/pod-1/node-2/sys/ch/p-[/firmware]-fault-F1527" + } + } + }, + { + "faultInst": { + "attributes": { + "cause": "threshold-crossed", + "code": "F1528", + "descr": "Storage unit /techsupport on Node 3 of pod 1 is 85% full", + "dn": "topology/pod-1/node-3/sys/ch/p-[/techsupport]-fault-F1528" + } + } + } +] diff --git a/tests/checks/tmp_dir_snapshot_storage_check/faultInst_non_tmp.json b/tests/checks/tmp_dir_snapshot_storage_check/faultInst_non_tmp.json new file mode 100644 index 0000000..c38b959 --- /dev/null +++ b/tests/checks/tmp_dir_snapshot_storage_check/faultInst_non_tmp.json @@ -0,0 +1,22 @@ +[ + { + "faultInst": { + "attributes": { + "cause": "threshold-crossed", + "code": "F1527", + "descr": "Storage unit /firmware on Node 1 of pod 1 is 78% full", + "dn": "topology/pod-1/node-1/sys/ch/p-[/firmware]-fault-F1527" + } + } + }, + { + "faultInst": { + "attributes": { + "cause": "threshold-crossed", + "code": "F1528", + "descr": "Storage unit /techsupport on Node 2 of pod 1 is 88% full", + "dn": "topology/pod-1/node-2/sys/ch/p-[/techsupport]-fault-F1528" + } + } + } +] diff --git a/tests/checks/tmp_dir_snapshot_storage_check/faultInst_tmp_pos.json b/tests/checks/tmp_dir_snapshot_storage_check/faultInst_tmp_pos.json new file mode 100644 index 0000000..2f8442d --- /dev/null +++ b/tests/checks/tmp_dir_snapshot_storage_check/faultInst_tmp_pos.json @@ -0,0 +1,32 @@ +[ + { + "faultInst": { + "attributes": { + "cause": "threshold-crossed", + "code": "F1527", + "descr": "Storage unit /tmp on Node 1 of pod 1 is 80% full", + "dn": "topology/pod-1/node-1/sys/ch/p-[/tmp]-fault-F1527" + } + } + }, + { + "faultInst": { + "attributes": { + "cause": "threshold-crossed", + "code": "F1528", + "descr": "Storage unit /tmp on Node 2 of pod 1 is 87% full", + "dn": "topology/pod-1/node-2/sys/ch/p-[/tmp]-fault-F1528" + } + } + }, + { + "faultInst": { + "attributes": { + "cause": "threshold-crossed", + "code": "F1529", + "descr": "Storage unit /tmp on Node 3 of pod 1 is 95% full", + "dn": "topology/pod-1/node-3/sys/ch/p-[/tmp]-fault-F1529" + } + } + } +] diff --git a/tests/checks/tmp_dir_snapshot_storage_check/test_tmp_dir_snapshot_storage_check.py b/tests/checks/tmp_dir_snapshot_storage_check/test_tmp_dir_snapshot_storage_check.py new file mode 100644 index 0000000..1136c77 --- /dev/null +++ b/tests/checks/tmp_dir_snapshot_storage_check/test_tmp_dir_snapshot_storage_check.py @@ -0,0 +1,119 @@ +import os +import pytest +import logging +import importlib +from helpers.utils import read_data + +script = importlib.import_module("aci-preupgrade-validation-script") + +log = logging.getLogger(__name__) +dir = os.path.dirname(os.path.abspath(__file__)) + +test_function = "tmp_dir_snapshot_storage_check" + +# icurl queries +faultInst_api = 'faultInst.json?query-target-filter=or(eq(faultInst.code,"F1527"),eq(faultInst.code,"F1528"),eq(faultInst.code,"F1529"))' + + +@pytest.mark.parametrize( + "icurl_outputs, tversion, expected_result", + [ + # ===== AFFECTED VERSIONS (< 6.1(4a)) ===== + # Older 4.x version, no /tmp faults + ( + {faultInst_api: []}, + "4.2(7f)", + script.PASS, + ), + # 5.x version, no /tmp faults + ( + {faultInst_api: []}, + "5.2(8f)", + script.PASS, + ), + # 6.0.x version, no /tmp faults + ( + {faultInst_api: []}, + "6.0(5a)", + script.PASS, + ), + # Just before fix version 6.1(3z), no /tmp faults + ( + {faultInst_api: []}, + "6.1(3z)", + script.PASS, + ), + # 4.x version with /tmp faults + ( + {faultInst_api: read_data(dir, "faultInst_tmp_pos.json")}, + "4.2(7t)", + script.FAIL_UF, + ), + # 5.x version with /tmp faults + ( + {faultInst_api: read_data(dir, "faultInst_tmp_pos.json")}, + "5.2(8f)", + script.FAIL_UF, + ), + # 6.0.x version with /tmp faults + ( + {faultInst_api: read_data(dir, "faultInst_tmp_pos.json")}, + "6.0(2h)", + script.FAIL_UF, + ), + # Just before fix version 6.1(3z) with /tmp faults + ( + {faultInst_api: read_data(dir, "faultInst_tmp_pos.json")}, + "6.1(3z)", + script.FAIL_UF, + ), + # Affected version with only non-/tmp faults (should PASS) + ( + {faultInst_api: read_data(dir, "faultInst_non_tmp.json")}, + "5.2(6a)", + script.PASS, + ), + # Affected version with mixed /tmp and non-/tmp faults (should FAIL_UF) + ( + {faultInst_api: read_data(dir, "faultInst_mixed.json")}, + "6.0(3a)", + script.FAIL_UF, + ), + # ===== FIXED VERSIONS (>= 6.1(4a)) ===== + # Exact fix version 6.1(4a) with /tmp faults (should be NA) + ( + {faultInst_api: read_data(dir, "faultInst_tmp_pos.json")}, + "6.1(4a)", + script.NA, + ), + # Exact fix version 6.1(4a) without faults (should be NA) + ( + {faultInst_api: []}, + "6.1(4a)", + script.NA, + ), + # Later 6.1.x version with /tmp faults (should be NA) + ( + {faultInst_api: read_data(dir, "faultInst_tmp_pos.json")}, + "6.1(5a)", + script.NA, + ), + # 6.2.x version with /tmp faults (should be NA) + ( + {faultInst_api: read_data(dir, "faultInst_tmp_pos.json")}, + "6.2(1a)", + script.NA, + ), + # Future 7.x version with /tmp faults (should be NA) + ( + {faultInst_api: read_data(dir, "faultInst_tmp_pos.json")}, + "7.0(1a)", + script.NA, + ), + ], +) +def test_logic(run_check, mock_icurl, tversion, expected_result): + result = run_check( + tversion=script.AciVersion(tversion) if tversion else None, + ) + assert result.result == expected_result From b14584e5ad6650581a73060cc796b07d554d42e6 Mon Sep 17 00:00:00 2001 From: Harinadh Saladi Date: Fri, 6 Feb 2026 05:50:05 +0000 Subject: [PATCH 2/2] Added three more test cases --- .../test_tmp_dir_snapshot_storage_check.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/checks/tmp_dir_snapshot_storage_check/test_tmp_dir_snapshot_storage_check.py b/tests/checks/tmp_dir_snapshot_storage_check/test_tmp_dir_snapshot_storage_check.py index 1136c77..dbdceba 100644 --- a/tests/checks/tmp_dir_snapshot_storage_check/test_tmp_dir_snapshot_storage_check.py +++ b/tests/checks/tmp_dir_snapshot_storage_check/test_tmp_dir_snapshot_storage_check.py @@ -79,6 +79,24 @@ "6.0(3a)", script.FAIL_UF, ), + # 3.x version with /tmp faults + ( + {faultInst_api: read_data(dir, "faultInst_tmp_pos.json")}, + "3.2(10e)", + script.FAIL_UF, + ), + # 4.x version with only non-/tmp faults (should PASS) + ( + {faultInst_api: read_data(dir, "faultInst_non_tmp.json")}, + "4.2(7f)", + script.PASS, + ), + # 6.0.x version with mixed faults + ( + {faultInst_api: read_data(dir, "faultInst_mixed.json")}, + "6.0(5h)", + script.FAIL_UF, + ), # ===== FIXED VERSIONS (>= 6.1(4a)) ===== # Exact fix version 6.1(4a) with /tmp faults (should be NA) (