diff --git a/docs/netlab/initial.md b/docs/netlab/initial.md index eb0aa1651f..9f176afa51 100644 --- a/docs/netlab/initial.md +++ b/docs/netlab/initial.md @@ -71,13 +71,18 @@ All other arguments are passed directly to ansible-playbook ## Wait for Devices to Become Ready -Some devices are not ready immediately after they complete the boot process. For example, Cisco Nexus OS or Juniper vPTX need another minute to realize they have data-plane interfaces. +**netlab initial** starts with a device readiness check to ensure the lab devices are ready for configuration deployment. If you want to execute just this part of the process, use the `--ready` option. + +There are several reasons a device might not be ready when the virtualization providers finish their job: -Likewise, the virtualization provider might prematurely report that the devices are ready. For example, *containerlab* does not wait for VMs running in containers to complete their boot process (see [](clab-vrnetlab) for more details).[^vssh] +* A virtualization provider might prematurely report that the devices are ready. For example, *containerlab* does not wait for VMs running in containers to complete their boot process (see [](clab-vrnetlab) for more details).[^vssh] _netlab_ checks the reachability of SSH servers for all containers that are configured via SSH. +* Some devices are not ready even after their SSH servers start accepting incoming sessions. For example, Cisco Nexus OS or Juniper vPTX requires around a minute to detect data-plane interfaces. In such cases, _netlab_ uses a device-specific Ansible task list to verify that the devices are ready for configuration. [^vssh]: Vagrant waits for all devices to become reachable via SSH before reporting them ready. -**netlab initial** starts with a device readiness check to ensure the lab devices are ready for configuration deployment. If you want to execute just this part of the process, use the `--ready` option. +```{tip} +_netlab_ uses internal (Python) code to check the reachability of SSH servers. If you want to check the SSH servers from an Ansible playbook, set the **‌defaults.netlab.initial.ready.ssh** [topology default](topo-defaults) to **‌ansible** (preferably using a [user defaults file](defaults-user-file)). +``` ## Initial Device Configurations diff --git a/netsim/ansible/device-ready.ansible b/netsim/ansible/device-ready.ansible index 19a46eaca1..fffc7cbed1 100644 --- a/netsim/ansible/device-ready.ansible +++ b/netsim/ansible/device-ready.ansible @@ -1,9 +1,18 @@ #!/usr/bin/env ansible-playbook --- -- name: Deploy initial device configuration - hosts: all:!unprovisioned +- name: Wait for SSH servers + hosts: netlab_ready_ssh:!unprovisioned strategy: "{{ netlab_strategy|default('linear') }}" gather_facts: false + tags: [ ready_ssh ] + tasks: + - import_tasks: tasks/readiness-check/ssh.yml + +- name: Wait for device-specific conditions + hosts: netlab_ready_ansible:!unprovisioned + strategy: "{{ netlab_strategy|default('linear') }}" + gather_facts: false + tags: [ ready_ansible ] tasks: - name: Set variables that cannot be set with VARS set_fact: diff --git a/netsim/ansible/initial-config.ansible b/netsim/ansible/initial-config.ansible index 95da69e92d..8ef9e52aec 100755 --- a/netsim/ansible/initial-config.ansible +++ b/netsim/ansible/initial-config.ansible @@ -14,7 +14,6 @@ node_files: "{{ lookup('env','PWD') }}/node_files" tags: [ always ] - - import_tasks: tasks/wait-for-ready.yml - import_tasks: tasks/initial-config.yml # diff --git a/netsim/ansible/tasks/wait-for-ready.yml b/netsim/ansible/tasks/wait-for-ready.yml index b9aaac7041..0fe590687e 100644 --- a/netsim/ansible/tasks/wait-for-ready.yml +++ b/netsim/ansible/tasks/wait-for-ready.yml @@ -3,17 +3,6 @@ # --- - block: - - name: "Generic readiness tests" - include_tasks: "{{ ready_script }}" - vars: - netlab_device_type: "{{ item }}" - params: - paths: "{{ paths_ready.dirs }}" - files: "{{ paths_ready.files }}" - ready_script: "{{ lookup('first_found',params,errors='ignore') }}" - when: ready_script is string and ready_script != '' - loop: "{{ netlab_ready|default([])}}" - - name: Find device readiness script set_fact: ready_script: "{{ lookup('first_found',params,errors='ignore') }}" diff --git a/netsim/cli/initial/__init__.py b/netsim/cli/initial/__init__.py index 4af3d51849..8a4f0b0b5e 100644 --- a/netsim/cli/initial/__init__.py +++ b/netsim/cli/initial/__init__.py @@ -10,7 +10,7 @@ from ...utils import log from ...utils import status as _status from .. import ansible, error_and_exit, external_commands, get_message, lab_status_change, load_snapshot -from . import configs, deploy, utils +from . import configs, deploy, ready, utils def run_initial(cli_args: typing.List[str]) -> None: @@ -30,13 +30,8 @@ def run_initial(cli_args: typing.List[str]) -> None: configs.run(topology,args,cwd) return elif args.ready: - rest += utils.ansible_args(args) - ansible.check_version() - ansible.playbook('device-ready.ansible',rest) - if topology: - lab_status_change(topology,'devices are ready') + ready.run(topology,args,rest) else: - rest += utils.ansible_args(args) ansible.check_version() deploy.run(topology,args,rest) diff --git a/netsim/cli/initial/deploy.py b/netsim/cli/initial/deploy.py index c5f53f63e4..5d15460955 100644 --- a/netsim/cli/initial/deploy.py +++ b/netsim/cli/initial/deploy.py @@ -14,7 +14,7 @@ from ...data import get_empty_box from ...providers import execute_node from ...utils import log, strings -from .. import _nodeset, ansible, error_and_exit, external_commands, get_message, lab_status_change +from .. import ansible, error_and_exit, external_commands, get_message, lab_status_change from . import configs, ready, utils @@ -100,16 +100,14 @@ def run(topology: Box, args: argparse.Namespace, rest: list) -> None: deploy_parts = utils.get_deploy_parts(args) deploy_text = ", ".join(deploy_parts) or "complete configuration" - devices.process_config_sw_check(topology) - lab_status_change(topology, f"deploying configuration: {deploy_text}") - - nodeset = _nodeset.parse_nodeset(args.limit, topology) if args.limit else list(topology.nodes.keys()) - nodeset = utils.filter_unprovisioned(nodeset, topology) + nodeset = utils.get_deploy_nodeset(args,topology) if not nodeset: error_and_exit("The specified nodeset is empty, there are no nodes to configure") + devices.process_config_sw_check(topology) + if not args.deploy: - log.info(text="Creating configuration snippets") + log.section_header('Creating',f'Device configuration snippets') configs.create_node_configs( topology=topology, nodeset=nodeset, @@ -121,18 +119,21 @@ def run(topology: Box, args: argparse.Namespace, rest: list) -> None: ) log.exit_on_error() - ready.device_ready(nodeset,topology) - (used_internal, status_internal) = deploy_provider_config(nodeset, topology, args) - if used_internal: - print() + ready.run(topology,args,rest) + log.exit_on_error() + + log.section_header('Config',f'Deploying device configurations') + lab_status_change(topology, f"deploying configuration: {deploy_text}") + + (used_internal, status_internal) = deploy_provider_config(nodeset, topology, args) ansible_skip_list = utils.nodeset_ansible_skip(nodeset, topology, args) if len(ansible_skip_list) != len(nodeset): utils.ansible_skip_group(ansible_skip_list) if used_internal: log.info("Starting Ansible playbook to deploy the rest of the configurations") - status_ansible = deploy_ansible_playbook(topology, rest) + status_ansible = deploy_ansible_playbook(topology,rest + utils.ansible_args(args)) utils.ansible_skip_group([]) else: status_ansible = True diff --git a/netsim/cli/initial/ready.py b/netsim/cli/initial/ready.py index ec719aae04..db30b94f35 100644 --- a/netsim/cli/initial/ready.py +++ b/netsim/cli/initial/ready.py @@ -1,6 +1,7 @@ # # netlab initial -- implement standard device readiness checks # +import argparse import concurrent.futures import subprocess import time @@ -11,7 +12,8 @@ from ...augment import devices as a_devices from ...data import append_to_list, get_empty_box from ...utils import log, strings -from .. import error_and_exit, external_commands +from .. import ansible, error_and_exit, external_commands, lab_status_change +from . import utils """ Prepare for SSH readiness check -- copy timeouts and retry counters, check for "sshpass", set up the SSH command @@ -97,7 +99,7 @@ def wait_for_ssh(n_name: str) -> bool: # Try out SSH server o r_data.ssh_ready = True # No errors, we're ready to roll now = time.time() if now > start_time + 5 or log.VERBOSE: # Report progress only if it's worth reporting - strings.print_colored_text('[SSH] ','green') + strings.print_colored_text('[SSH] ','green') print(f'SSH server on node {n_name} (device {n_data.device}) ' +\ f'is ready after {round(now - start_time,1)} seconds',flush=True) return True @@ -112,7 +114,7 @@ def wait_for_ssh(n_name: str) -> bool: # Try out SSH server o now = time.time() if now > start_time + r_data.wait: # Have we exceeded the wait period? r_data.ssh_failed = True - strings.print_colored_text('[SSH] ','red') + strings.print_colored_text('[SSH] ','red') print(f'SSH server on node {n_name} (device {n_data.device}) ' +\ f'is not ready after {round(now - start_time,1)} seconds',flush=True) if log.debug_active('ssh'): # Do we need to report SSH status periodically? @@ -134,7 +136,7 @@ def wait_for_ssh(n_name: str) -> bool: # Try out SSH server o setup_ssh_ready_parameters(waitset,topology) log.exit_on_error() start_time = time.time() - log.info(text=f'Checking SSH servers on {",".join(waitset)}') + log.info(text=f'Checking SSH server(s) on {",".join(waitset)}') with concurrent.futures.ThreadPoolExecutor() as executor: if strings.rich_color: @@ -157,8 +159,23 @@ def wait_for_ssh(n_name: str) -> bool: # Try out SSH server o Execute all "wait for device to be ready" steps recognized by "netlab initial". Further steps might have to be executed by Ansible playbooks """ -def device_ready(nodeset: list, topology: Box) -> None: +def internal_device_ready(waitlists: Box, topology: Box) -> Box: global READY_ACTIONS + + # Iterate over known steps, check whether any device needs that, and execute + # the corresponding ready function + for r_step in READY_ACTIONS.keys(): + if r_step not in waitlists: # Nobody asked for this step, move on + continue + if topology.defaults.netlab.initial.ready[r_step] != 'internal': + continue # We're not using the internal code for this step + + READY_ACTIONS[r_step](waitlists[r_step],topology) + waitlists.pop(r_step) + + return waitlists + +def get_waitlists(nodeset: list, topology: Box) -> Box: waitlists = get_empty_box() defaults = topology.defaults @@ -172,9 +189,21 @@ def device_ready(nodeset: list, topology: Box) -> None: for step in ready_steps: append_to_list(waitlists,step,n_name) - # Iterate over known steps, check whether any device needs that, and execute - # the corresponding ready function - for r_step in READY_ACTIONS.keys(): - if r_step not in waitlists: # Nobody asked for this step, move on - continue - READY_ACTIONS[r_step](waitlists[r_step],topology) + return waitlists + +def run(topology: Box, args: argparse.Namespace, rest: list) -> None: + nodeset = utils.get_deploy_nodeset(args,topology) + node_waits = get_waitlists(nodeset,topology) + if node_waits and not log.QUIET: + log.section_header('Checking',f'Are lab devices ready to be configured?') + + ansible_waits = internal_device_ready(node_waits,topology) + if ansible_waits: + log.info(text='Checking lab devices with an Ansible playbook') + ansible.check_version() + ansible_limit = ['--tag',','.join(['ready_'+wl for wl in ansible_waits.keys()])] + ansible.playbook('device-ready.ansible',rest + utils.min_ansible_args(args) + ansible_limit) + + if args.ready: + lab_status_change(topology,'devices are ready') + log.info(text='Lab devices are ready to be configured') diff --git a/netsim/cli/initial/utils.py b/netsim/cli/initial/utils.py index d8f7376c5f..923d200c45 100644 --- a/netsim/cli/initial/utils.py +++ b/netsim/cli/initial/utils.py @@ -13,7 +13,7 @@ from ...data import global_vars from ...utils import files as _files from ...utils import log, strings -from .. import common_parse_args, parser_lab_location +from .. import _nodeset, common_parse_args, parser_lab_location # @@ -91,11 +91,17 @@ def common_ansible_args() -> list: """ Build Ansible arguments based on 'netlab initial' parameters """ -def ansible_args(args: argparse.Namespace) -> list: +def min_ansible_args(args: argparse.Namespace) -> list: rest = common_ansible_args() - if args.limit: rest = ['--limit',args.limit] + rest + if args.fast or os.environ.get('NETLAB_FAST_CONFIG',None): + rest = ['-e','netlab_strategy=free'] + rest + + return rest + +def ansible_args(args: argparse.Namespace) -> list: + rest = min_ansible_args(args) if args.initial: rest = ['-t','initial'] + rest @@ -111,9 +117,6 @@ def ansible_args(args: argparse.Namespace) -> list: if args.custom: rest = ['-t','custom'] + rest - if args.fast or os.environ.get('NETLAB_FAST_CONFIG',None): - rest = ['-e','netlab_strategy=free'] + rest - return rest """ @@ -205,6 +208,13 @@ def filter_unprovisioned(nodeset: typing.List[str], topology: Box) -> typing.Lis unprovisioned_members = groups.group_members(topology, 'unprovisioned') return [node for node in nodeset if node not in unprovisioned_members] +""" +Get deployment nodeset from args.limit +""" +def get_deploy_nodeset(args: argparse.Namespace, topology: Box) -> list: + nodeset = _nodeset.parse_nodeset(args.limit, topology) if args.limit else list(topology.nodes.keys()) + return filter_unprovisioned(nodeset, topology) + """ ansible_skip_group: Modify Ansible inventory to include _grp_config_skip listing all the hosts that do not need Ansible deployment diff --git a/netsim/defaults/netlab.yml b/netsim/defaults/netlab.yml index db3a31c436..6f5f1f356c 100644 --- a/netsim/defaults/netlab.yml +++ b/netsim/defaults/netlab.yml @@ -12,3 +12,7 @@ create: pickle: tools: ansible: dirs + +initial: + ready: + ssh: internal diff --git a/netsim/devices/cumulus_nvue.yml b/netsim/devices/cumulus_nvue.yml index 6bf7f1a9f8..1a25624846 100644 --- a/netsim/devices/cumulus_nvue.yml +++ b/netsim/devices/cumulus_nvue.yml @@ -29,6 +29,7 @@ group_vars: ansible_network_os: cumulus_nvue ansible_connection: paramiko ansible_python_interpreter: auto_silent + netlab_ready: [ ansible ] features: initial: system_mtu: True diff --git a/netsim/devices/fortios.yml b/netsim/devices/fortios.yml index 433cf4c386..d7137bf24d 100644 --- a/netsim/devices/fortios.yml +++ b/netsim/devices/fortios.yml @@ -32,6 +32,7 @@ group_vars: ansible_httpapi_validate_certs: no ansible_httpapi_port: 80 netlab_console_connection: ssh + netlab_ready: [ ansible ] external: image: none features: diff --git a/netsim/devices/junos.yml b/netsim/devices/junos.yml index 7b604d5600..5929ff7199 100644 --- a/netsim/devices/junos.yml +++ b/netsim/devices/junos.yml @@ -13,6 +13,7 @@ group_vars: ansible_network_os: junos ansible_connection: netconf netlab_console_connection: ssh + netlab_ready: [ ansible ] features: initial: @@ -73,3 +74,7 @@ features: external: image: none + +clab: + group_vars: + netlab_ready: [ ssh, ansible ] diff --git a/netsim/devices/netscaler.yml b/netsim/devices/netscaler.yml index 18ab4f7aa6..60e8021c17 100644 --- a/netsim/devices/netscaler.yml +++ b/netsim/devices/netscaler.yml @@ -24,8 +24,7 @@ clab: ansible_ssh_pass: clab@123 ansible_connection: docker netlab_show_command: [ nscli, -U, '127.0.0.1:clab:clab@123', 'show $@' ] -# netlab_ready: [ ssh ] -# netlab_check_command: who + netlab_ready: [ ansible ] external: image: none diff --git a/netsim/devices/nxos.yml b/netsim/devices/nxos.yml index e9cc951e10..741d279823 100644 --- a/netsim/devices/nxos.yml +++ b/netsim/devices/nxos.yml @@ -11,7 +11,7 @@ clab: group_vars: ansible_ssh_pass: admin ansible_user: admin - netlab_ready: [ ssh ] + netlab_ready: [ ssh, ansible ] image: vrnetlab/vr-n9kv:9.3.8 node: kind: cisco_n9kv @@ -24,6 +24,7 @@ group_vars: ansible_connection: network_cli netlab_check_retries: 50 netlab_check_delay: 10 + netlab_ready: [ ansible ] # yamllint disable-line rule:line-length netlab_ssh_args: "-o KexAlgorithms=+diffie-hellman-group14-sha1 -o PubkeyAcceptedKeyTypes=+ssh-rsa -o HostKeyAlgorithms=+ssh-rsa" bfd: # NXOS requires lower default timer values diff --git a/netsim/devices/sros.yml b/netsim/devices/sros.yml index 5e6c495027..f1a2b5f166 100644 --- a/netsim/devices/sros.yml +++ b/netsim/devices/sros.yml @@ -14,6 +14,7 @@ group_vars: ansible_network_os: sros ansible_connection: paramiko_ssh netlab_console_connection: ssh + netlab_ready: [ ansible ] sros_use_openconfig: False netlab_match_protomap: ospf: [ ospf, ospf3 ] diff --git a/netsim/devices/vjunos-router.yml b/netsim/devices/vjunos-router.yml index 7134ab94a9..000976a7c0 100644 --- a/netsim/devices/vjunos-router.yml +++ b/netsim/devices/vjunos-router.yml @@ -27,6 +27,5 @@ clab: ansible_ssh_pass: admin@123 netlab_check_retries: 60 netlab_check_delay: 10 - netlab_ready: [ ssh ] graphite.icon: router diff --git a/netsim/devices/vjunos-switch.yml b/netsim/devices/vjunos-switch.yml index d7d6acc5bc..3bdc63fa93 100644 --- a/netsim/devices/vjunos-switch.yml +++ b/netsim/devices/vjunos-switch.yml @@ -33,6 +33,5 @@ clab: ansible_ssh_pass: admin@123 netlab_check_retries: 40 netlab_check_delay: 10 - netlab_ready: [ ssh ] graphite.icon: switch diff --git a/netsim/devices/vmx.yml b/netsim/devices/vmx.yml index 4e5b078957..1ef07b4a15 100644 --- a/netsim/devices/vmx.yml +++ b/netsim/devices/vmx.yml @@ -23,8 +23,6 @@ features: clab: image: vrnetlab/vr-vmx:18.2R1.9 build: https://containerlab.dev/manual/kinds/vr-vmx/ - group_vars: - netlab_ready: [ ssh ] node: kind: vr-vmx interface: diff --git a/netsim/devices/vptx.yml b/netsim/devices/vptx.yml index c97a6b2c17..280cf1dc3f 100644 --- a/netsim/devices/vptx.yml +++ b/netsim/devices/vptx.yml @@ -36,6 +36,5 @@ clab: ansible_ssh_pass: admin@123 netlab_check_retries: 40 netlab_check_delay: 10 - netlab_ready: [ ssh ] graphite.icon: switch diff --git a/netsim/devices/vsrx.yml b/netsim/devices/vsrx.yml index f1ca8c4e61..019e245004 100644 --- a/netsim/devices/vsrx.yml +++ b/netsim/devices/vsrx.yml @@ -44,6 +44,5 @@ clab: ansible_ssh_pass: "admin@123" netlab_check_retries: 60 netlab_check_delay: 10 - netlab_ready: [ ssh ] graphite.icon: firewall diff --git a/netsim/devices/vyos.yml b/netsim/devices/vyos.yml index 8442e0ea38..05f77833fb 100644 --- a/netsim/devices/vyos.yml +++ b/netsim/devices/vyos.yml @@ -94,6 +94,7 @@ clab: group_vars: ansible_connection: docker ansible_user: vyos + netlab_ready: [ ansible ] features: stub_loopback: True diff --git a/netsim/outputs/ansible.py b/netsim/outputs/ansible.py index 338b35af71..cd9a9b547a 100644 --- a/netsim/outputs/ansible.py +++ b/netsim/outputs/ansible.py @@ -6,7 +6,7 @@ from box import Box -from ..augment import nodes, plugin +from ..augment import devices, nodes, plugin from ..utils import files as _files from ..utils import log, strings, templates from . import _TopologyOutput, check_writeable @@ -84,6 +84,10 @@ def create(topology: Box) -> Box: if node.get(xg,False): # Add device to the extra group if it has the corresponding attribute set inventory[extra_groups[xg]].hosts[name] = {} + ready = devices.get_node_group_var(node,'netlab_ready',topology.defaults) or [] + for r_item in ready: + inventory[f'netlab_ready_{r_item}'].hosts[name] = {} + if 'devices' in defaults: for group in inventory.keys(): if group in defaults.devices: