From b70c28f1c3da44912647e3da58e6e7b17900fcf9 Mon Sep 17 00:00:00 2001 From: Enrique Estrada Date: Fri, 6 Feb 2026 18:43:23 -0600 Subject: [PATCH] issue NewValidation: CSCwn37676 Fixes #302 fixed --- aci-preupgrade-validation-script.py | 58 +++++++ docs/docs/validations.md | 9 + .../fabricNode.json | 46 ++++++ .../fabricNode_no_apic.json | 13 ++ .../test_bootx_service_tmp_files_check.py | 155 ++++++++++++++++++ 5 files changed, 281 insertions(+) create mode 100644 tests/checks/bootx_service_tmp_files_check/fabricNode.json create mode 100644 tests/checks/bootx_service_tmp_files_check/fabricNode_no_apic.json create mode 100644 tests/checks/bootx_service_tmp_files_check/test_bootx_service_tmp_files_check.py diff --git a/aci-preupgrade-validation-script.py b/aci-preupgrade-validation-script.py index 4b83f4c..3d123ef 100644 --- a/aci-preupgrade-validation-script.py +++ b/aci-preupgrade-validation-script.py @@ -6025,6 +6025,63 @@ def apic_downgrade_compat_warning_check(cversion, tversion, **kwargs): return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url) +# Connection Base Check +@check_wrapper(check_title='Bootx Service tmp files') +def bootx_service_tmp_files_check(username, password, fabric_nodes, cversion, **kwargs): + result = PASS + headers = ["APIC ID", "APIC Name", "Folder Location", "Number of files"] + data = [] + recommended_action = 'Review the workaround for the bug and apply it before upgrade' + doc_url = 'https://datacenter.github.io/ACI-Pre-Upgrade-Validation-Script/validations#bootx-service-tmp-files' + + if (cversion.older_than("6.0(2a)")) or (cversion.newer_than("6.0(9a)") and cversion.older_than("6.1(1a)")) or (cversion.newer_than("6.1(2a)")): + result = NA + return Result(result=result, msg="Current Version not affected.") + + apics = [node for node in fabric_nodes if node["fabricNode"]["attributes"]["role"] == "controller"] + if not apics: + return Result(result=ERROR, msg="No fabricNode of APIC. Is the cluster healthy?", doc_url=doc_url) + # condition is version <6, address is populated in fabricNode + folder_name = "/firmware/tmp" + has_error = False + for apic in apics: + apic_id = apic["fabricNode"]["attributes"]["id"] + apic_name = apic["fabricNode"]["attributes"]["name"] + apic_addr = apic["fabricNode"]["attributes"]["address"] + try: + c = Connection(apic_addr) + c.username = username + c.password = password + c.log = LOG_FILE + c.connect() + except Exception as e: + data.append([apic_id, apic_name, "-", str(e)]) + has_error = True + continue + try: + cmd = r"ls -ltr /firmware/tmp | head -1" + c.cmd(cmd) + if "No such file or directory" in c.output: + data.append([apic_id, apic_name, '/firmware/tmp not found', "Check user permissions or retry as 'apic#fallback\\\\admin'"]) + has_error = True + continue + dbstats = c.output.split("\n") + for line in dbstats: + total_files_regex = r"total (?P\d{1,})" + numberof_files_match = re.match(total_files_regex, line) + if numberof_files_match: + files = numberof_files_match.group("files") + if int(files) > 1000: + data.append([apic_id, apic_name, folder_name, files]) + except Exception as e: + data.append([apic_id, apic_name, "-", str(e)]) + has_error = True + continue + if has_error: + result = ERROR + elif data: + result = FAIL_O + return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url) # ---- Script Execution ---- @@ -6188,6 +6245,7 @@ class CheckManager: standby_sup_sync_check, isis_database_byte_check, configpush_shard_check, + bootx_service_tmp_files_check, ] ssh_checks = [ diff --git a/docs/docs/validations.md b/docs/docs/validations.md index 68ca1c0..808f8b1 100644 --- a/docs/docs/validations.md +++ b/docs/docs/validations.md @@ -193,6 +193,7 @@ Items | Defect | This Script [Stale pconsRA Object][d26] | CSCwp22212 | :warning:{title="Deprecated"} | :no_entry_sign: [ISIS DTEPs Byte Size][d27] | CSCwp15375 | :white_check_mark: | :no_entry_sign: [Policydist configpushShardCont Crash][d28] | CSCwp95515 | :white_check_mark: | +[BootX Service tmp files][d29] | CSCwn37676 | :white_check_mark: | :no_entry_sign: [d1]: #ep-announce-compatibility [d2]: #eventmgr-db-size-defect-susceptibility @@ -222,6 +223,7 @@ Items | Defect | This Script [d26]: #stale-pconsra-object [d27]: #isis-dteps-byte-size [d28]: #policydist-configpushshardcont-crash +[d29]: #bootx-service-tmp-files ## General Check Details @@ -2647,6 +2649,12 @@ Due to [CSCwp95515][59], upgrading to an affected version while having any `conf If any instances of `configpushShardCont` are flagged by this script, Cisco TAC must be contacted to identify and resolve the underlying issue before performing the upgrade. +### BootX Service tmp files + +BootX is a new DME service running in ACI 6.X onwards. The purpose of BootX is to create a uniform bootstrapping procedure across all APIC deployment types. + +Due to [CSCwn37676][62] The service BootX can fail to come up with the message '[FAILED] Failed to start bootx' due to the number of files in the '/firmware/tmp'. If the modules is triggered, remove the files under the folder '/firmware/tmp' and restart the process following the workaround listed in the bug's RNE. + [0]: https://github.com/datacenter/ACI-Pre-Upgrade-Validation-Script [1]: https://www.cisco.com/c/dam/en/us/td/docs/Website/datacenter/apicmatrix/index.html @@ -2710,3 +2718,4 @@ If any instances of `configpushShardCont` are flagged by this script, Cisco TAC [59]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwp95515 [60]: https://www.cisco.com/c/en/us/solutions/collateral/data-center-virtualization/application-centric-infrastructure/white-paper-c11-743951.html#Inter [61]: https://www.cisco.com/c/en/us/solutions/collateral/data-center-virtualization/application-centric-infrastructure/white-paper-c11-743951.html#EnablePolicyCompression +[62]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwn37676 diff --git a/tests/checks/bootx_service_tmp_files_check/fabricNode.json b/tests/checks/bootx_service_tmp_files_check/fabricNode.json new file mode 100644 index 0000000..21eed6a --- /dev/null +++ b/tests/checks/bootx_service_tmp_files_check/fabricNode.json @@ -0,0 +1,46 @@ +[ + { + "fabricNode": { + "attributes": { + "address": "10.0.0.1", + "dn": "topology/pod-1/node-1", + "id": "1", + "name": "apic1", + "role": "controller" + } + } + }, + { + "fabricNode": { + "attributes": { + "address": "10.0.0.2", + "dn": "topology/pod-1/node-2", + "id": "2", + "name": "apic2", + "role": "controller" + } + } + }, + { + "fabricNode": { + "attributes": { + "address": "10.0.0.3", + "dn": "topology/pod-1/node-3", + "id": "3", + "name": "apic3", + "role": "controller" + } + } + }, + { + "fabricNode": { + "attributes": { + "address": "10.0.0.101", + "dn": "topology/pod-1/node-101", + "id": "101", + "name": "leaf1", + "role": "leaf" + } + } + } +] diff --git a/tests/checks/bootx_service_tmp_files_check/fabricNode_no_apic.json b/tests/checks/bootx_service_tmp_files_check/fabricNode_no_apic.json new file mode 100644 index 0000000..b82c912 --- /dev/null +++ b/tests/checks/bootx_service_tmp_files_check/fabricNode_no_apic.json @@ -0,0 +1,13 @@ +[ + { + "fabricNode": { + "attributes": { + "address": "10.0.0.101", + "dn": "topology/pod-1/node-101", + "id": "101", + "name": "fab5-leaf1", + "role": "leaf" + } + } + } +] diff --git a/tests/checks/bootx_service_tmp_files_check/test_bootx_service_tmp_files_check.py b/tests/checks/bootx_service_tmp_files_check/test_bootx_service_tmp_files_check.py new file mode 100644 index 0000000..ec911df --- /dev/null +++ b/tests/checks/bootx_service_tmp_files_check/test_bootx_service_tmp_files_check.py @@ -0,0 +1,155 @@ +import os +import pytest +import logging +import importlib +from helpers.utils import read_data + +script = importlib.import_module("aci-preupgrade-validation-script") + +log = logging.getLogger(__name__) +dir = os.path.dirname(os.path.abspath(__file__)) + +test_function = "bootx_service_tmp_files_check" + +fabricNodes = read_data(dir, "fabricNode.json") +apic_ips = [ + mo["fabricNode"]["attributes"]["address"] + for mo in fabricNodes + if mo["fabricNode"]["attributes"]["role"] == "controller" +] + +ls_cmd = "ls -ltr /firmware/tmp | head -1" +ls_output_neg = "total 171" +ls_output_pos = "total 17880" +ls_output_no_such_file = """\ +ls: cannot access /firmware/tmp: No such file or directory +apic1# +""" + + +@pytest.mark.parametrize( + "icurl_outputs, fabric_nodes, cversion, conn_failure, conn_cmds, expected_result, expected_data", + [ + # Connection failure + ( + {}, + fabricNodes, + "6.0(8h)", + True, + [], + script.ERROR, + [ + ["1", "apic1", "-", "Simulated exception at connect()"], + ["2", "apic2", "-", "Simulated exception at connect()"], + ["3", "apic3", "-", "Simulated exception at connect()"], + ], + ), + # Simulatated exception at `ls` command + ( + {}, + fabricNodes, + "6.0(8h)", + False, + { + apic_ip: [ + { + "cmd": ls_cmd, + "output": "", + "exception": Exception("Simulated exception at `ls` command"), + } + ] + for apic_ip in apic_ips + }, + script.ERROR, + [ + ["1", "apic1", "-", "Simulated exception at `ls` command"], + ["2", "apic2", "-", "Simulated exception at `ls` command"], + ["3", "apic3", "-", "Simulated exception at `ls` command"], + ], + ), + # /firmware/tmp dir not found/not accessible + ( + {}, + fabricNodes, + "6.0(8h)", + False, + { + apic_ip: [ + { + "cmd": ls_cmd, + "output": "\n".join([ls_cmd, ls_output_no_such_file]), + "exception": None, + } + ] + for apic_ip in apic_ips + }, + script.ERROR, + [ + ["1", "apic1", "/firmware/tmp not found", "Check user permissions or retry as 'apic#fallback\\\\admin'"], + ["2", "apic2", "/firmware/tmp not found", "Check user permissions or retry as 'apic#fallback\\\\admin'"], + ["3", "apic3", "/firmware/tmp not found", "Check user permissions or retry as 'apic#fallback\\\\admin'"], + ], + ), + # /firmware/tmp dir found, less than 1000 files + ( + {}, + fabricNodes, + "6.0(8h)", + False, + { + apic_ip: [ + { + "cmd": ls_cmd, + "output": "\n".join([ls_cmd, ls_output_neg]), + "exception": None, + } + ] + for apic_ip in apic_ips + }, + script.PASS, + [], + ), + # FAIL_O /firmware/tmp dir found, more than 1000 files + ( + {}, + fabricNodes, + "6.0(8h)", + False, + { + apic_ip: [ + { + "cmd": ls_cmd, + "output": "\n".join([ls_cmd, ls_output_pos]), + "exception": None, + } + ] + for apic_ip in apic_ips + }, + script.FAIL_O, + [ + ["1", "apic1", "/firmware/tmp", "17880"], + ["2", "apic2", "/firmware/tmp", "17880"], + ["3", "apic3", "/firmware/tmp", "17880"], + ], + ), + # ERROR, fabricNode failure + ( + {}, + read_data(dir, "fabricNode_no_apic.json"), + "6.0(8h)", + False, + [], + script.ERROR, + [], + ), + ], +) +def test_logic(run_check, mock_icurl, fabric_nodes, cversion, mock_conn, expected_result, expected_data): + result = run_check( + username="fake_username", + password="fake_password", + fabric_nodes=fabric_nodes, + cversion=script.AciVersion(cversion), + ) + assert result.result == expected_result + assert result.data == expected_data