Skip to content

Commit a16e6d7

Browse files
committed
[os_must_gather] Add exit rc to os-must-gather main task
With this we allow to run the rescue block. Previously any timeout in the main task would return the code from the echo which was always success. Also, we're removing from the rescue block the always block. And we've create a always section for finding existing os-must-gather directories and the symlink creation. Also we've changed the dest-dir for the generic fallback command to match the same folder of the symlink. Removed oc inspect as we're not getting so many errors from oc adm must-gather so probably this wouldn't be usefull. Finally, we've parametriced SOS_EDPM as cifmw_os_must_gather_sos_edpm and we've added default value to "all" Signed-off-by: Enrique Vallespi Gil <evallesp@redhat.com>
1 parent 070c58c commit a16e6d7

3 files changed

Lines changed: 71 additions & 87 deletions

File tree

roles/os_must_gather/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ testing the new changes.
1212
* `cifmw_os_must_gather_output_dir`: (String) Directory to store logs generated by must-gather tool
1313
* `cifmw_os_must_gather_repo_path`: (string) Path to local clone of openstack-must-gather git repo
1414
* `cifmw_os_must_gather_timeout`: (String) Timeout for must-gather command
15+
* `cifmw_os_must_gather_sos_edpm`: (String) Indicates where to run the SOS report. Default all
1516
* `cifmw_os_must_gather_host_network`: (Bool) Flag to gather host network data
1617
* `cifmw_os_must_gather_namespaces`: (List) List of namespaces required by the gather task in case of failure
1718
* `cifmw_os_must_gather_additional_namespaces`: (String) List of comma separated additional namespaces. Defaults to `kuttl,openshift-storage,sushy-emulator`

roles/os_must_gather/defaults/main.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ cifmw_os_must_gather_image_registry: "quay.rdoproject.org/openstack-k8s-operator
2323
cifmw_os_must_gather_output_dir: "{{ cifmw_basedir }}"
2424
cifmw_os_must_gather_output_log_dir: "{{ cifmw_os_must_gather_output_dir }}/logs/openstack-must-gather"
2525
cifmw_os_must_gather_repo_path: "{{ ansible_user_dir }}/src/github.com/openstack-k8s-operators/openstack-must-gather"
26+
cifmw_os_must_gather_sos_edpm: "all"
2627
cifmw_os_must_gather_timeout: "30m"
2728
cifmw_os_must_gather_volume_percentage: 80
2829
cifmw_os_must_gather_additional_namespaces: "kuttl,openshift-storage,openshift-marketplace,openshift-operators,sushy-emulator,tobiko"

roles/os_must_gather/tasks/main.yml

Lines changed: 69 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
environment:
6262
KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}"
6363
PATH: "{{ cifmw_path }}"
64-
SOS_EDPM: "all"
64+
SOS_EDPM: "{{ cifmw_os_must_gather_sos_edpm }}"
6565
SOS_DECOMPRESS: "0"
6666
OPENSTACK_DATABASES: "{{ cifmw_os_must_gather_dump_db }}"
6767
OMC: "{{ cifmw_os_must_gather_omc }}"
@@ -86,99 +86,81 @@
8686
echo "The must gather command did not finish on time!"
8787
echo "{{ shell_cmd_timeout }} seconds was not enough to finish the task."
8888
fi
89+
exit $rc
8990
}
91+
register: _must_gather_result
9092

93+
rescue:
94+
- name: Log openstack-must-gather failure
95+
ansible.builtin.debug:
96+
msg: "OpenStack must-gather failed, running fallback generic must-gather"
97+
98+
- name: Run fallback generic must-gather command without SOS report when timed out
99+
when:
100+
- _must_gather_result is defined
101+
- _must_gather_result.rc == 124
102+
environment:
103+
KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}"
104+
PATH: "{{ cifmw_path }}"
105+
ansible.builtin.command:
106+
cmd: >-
107+
timeout {{ (cifmw_os_must_gather_timeout | community.general.to_seconds) + 120 }}
108+
oc adm must-gather
109+
--dest-dir {{ cifmw_os_must_gather_output_log_dir }}
110+
--timeout {{ cifmw_os_must_gather_timeout }}
111+
--volume-percentage={{ cifmw_os_must_gather_volume_percentage }}
112+
113+
always:
91114
- name: Find existing os-must-gather directories
92115
ansible.builtin.find:
93116
paths: "{{ cifmw_os_must_gather_output_log_dir }}"
94117
file_type: directory
95118
depth: 1
96119
register: _os_gather_latest_dir
97120

98-
- name: Create a symlink to newest os-must-gather directory
99-
ansible.builtin.file:
100-
src: "{{ (_os_gather_latest_dir.files | sort(attribute='mtime', reverse=True) | first).path | basename }}"
101-
dest: "{{ cifmw_os_must_gather_output_log_dir }}/latest"
102-
state: link
103-
104-
# Collect pod usage
105-
- name: Find all namespaces directories
106-
ansible.builtin.find:
107-
paths: "{{ cifmw_os_must_gather_output_log_dir }}/latest/namespaces"
108-
file_type: directory
109-
depth: 1
110-
register: _os_gather_namespaces
111-
112-
- name: Get resource usage by pods per namespace
113-
when: _os_gather_namespaces.files | length > 1
114-
vars:
115-
namespace_dir: "{{ cifmw_os_must_gather_output_log_dir }}/latest/namespaces/{{ _namespace_path.path | basename }}"
116-
ansible.builtin.shell: |
117-
oc adm top pods -n {{ _namespace_path.path | basename }} > {{ namespace_dir }}/pods-top.log
118-
loop: "{{ _os_gather_namespaces.files }}"
119-
loop_control:
120-
loop_var: _namespace_path
121-
environment:
122-
KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}"
123-
124-
- name: Get node resource usage
125-
ansible.builtin.shell: |
126-
oc adm top nodes > {{ cifmw_os_must_gather_output_log_dir }}/latest/openstack-nodes-top.log
127-
environment:
128-
KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}"
129-
130-
- name: Get all containers usage - sort by cpu
131-
ansible.builtin.shell: |
132-
oc adm top pods --all-namespaces --sort-by=cpu --containers > {{ cifmw_os_must_gather_output_log_dir }}/latest/all-containers-cpu-top.log
133-
environment:
134-
KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}"
135-
136-
- name: Get all containers usage - sort by memory
137-
ansible.builtin.shell: |
138-
oc adm top pods --all-namespaces --sort-by=memory --containers > {{ cifmw_os_must_gather_output_log_dir }}/latest/all-containers-memory-top.log
139-
environment:
140-
KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}"
141-
142-
rescue:
143-
- name: Openstack-must-gather failure
121+
- name: Symlink to newest log folder and run top commands
122+
when: _os_gather_latest_dir.files | length > 0
144123
block:
145-
- name: Log openstack-must-gather failure
146-
ansible.builtin.debug:
147-
msg: "OpenStack must-gather failed, running fallback generic must-gather"
148-
149-
- name: Run fallback generic must-gather command
150-
environment:
151-
KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}"
152-
PATH: "{{ cifmw_path }}"
153-
ansible.builtin.command:
154-
cmd: >-
155-
timeout {{ (cifmw_os_must_gather_timeout | community.general.to_seconds) + 120 }}
156-
oc adm must-gather
157-
--dest-dir {{ ansible_user_dir }}/ci-framework-data/must-gather
158-
--timeout {{ cifmw_os_must_gather_timeout }}
159-
--volume-percentage={{ cifmw_os_must_gather_volume_percentage }}
160-
always:
161-
- name: Create oc_inspect log directory
162-
ansible.builtin.file:
163-
path: "{{ cifmw_os_must_gather_output_dir }}/logs/oc_inspect"
164-
state: directory
165-
mode: "0755"
166-
167-
- name: Inspect the cluster after must-gather failure
168-
ignore_errors: true # noqa: ignore-errors
169-
environment:
170-
KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}"
171-
PATH: "{{ cifmw_path }}"
172-
cifmw.general.ci_script:
173-
output_dir: "{{ cifmw_os_must_gather_output_dir }}/artifacts"
174-
script: |
175-
oc adm inspect namespace/{{ item }} --dest-dir={{ cifmw_os_must_gather_output_dir }}/logs/oc_inspect
176-
loop: >-
177-
{{
178-
(
179-
cifmw_os_must_gather_namespaces | default([]) +
180-
(
181-
cifmw_os_must_gather_additional_namespaces | split(',') | list
182-
)
183-
) | unique
184-
}}
124+
- name: Create a symlink to newest os-must-gather directory
125+
ansible.builtin.file:
126+
src: "{{ (_os_gather_latest_dir.files | sort(attribute='mtime', reverse=True) | first).path | basename }}"
127+
dest: "{{ cifmw_os_must_gather_output_log_dir }}/latest"
128+
state: link
129+
130+
# Collect pod usage
131+
- name: Find all namespaces directories
132+
ansible.builtin.find:
133+
paths: "{{ cifmw_os_must_gather_output_log_dir }}/latest/namespaces"
134+
file_type: directory
135+
depth: 1
136+
register: _os_gather_namespaces
137+
138+
- name: Get resource usage by pods per namespace
139+
when: _os_gather_namespaces.files | length > 1
140+
vars:
141+
namespace_dir: "{{ cifmw_os_must_gather_output_log_dir }}/latest/namespaces/{{ _namespace_path.path | basename }}"
142+
ansible.builtin.shell: |
143+
oc adm top pods -n {{ _namespace_path.path | basename }} > {{ namespace_dir }}/pods-top.log
144+
loop: "{{ _os_gather_namespaces.files }}"
145+
loop_control:
146+
loop_var: _namespace_path
147+
environment:
148+
KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}"
149+
150+
- name: Get node resource usage
151+
ansible.builtin.shell: |
152+
oc adm top nodes > {{ cifmw_os_must_gather_output_log_dir }}/latest/openstack-nodes-top.log
153+
environment:
154+
KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}"
155+
156+
- name: Get all containers usage - sort by cpu
157+
ansible.builtin.shell: |
158+
oc adm top pods --all-namespaces --sort-by=cpu --containers > {{ cifmw_os_must_gather_output_log_dir }}/latest/all-containers-cpu-top.log
159+
environment:
160+
KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}"
161+
162+
- name: Get all containers usage - sort by memory
163+
ansible.builtin.shell: |
164+
oc adm top pods --all-namespaces --sort-by=memory --containers > {{ cifmw_os_must_gather_output_log_dir }}/latest/all-containers-memory-top.log
165+
environment:
166+
KUBECONFIG: "{{ cifmw_openshift_kubeconfig | default(cifmw_os_must_gather_kubeconfig) }}"

0 commit comments

Comments
 (0)