diff --git a/cfe_internal/enterprise/mission_portal.cf b/cfe_internal/enterprise/mission_portal.cf index 4b75aab016..c9f8402768 100644 --- a/cfe_internal/enterprise/mission_portal.cf +++ b/cfe_internal/enterprise/mission_portal.cf @@ -288,9 +288,10 @@ bundle agent mission_portal_apache_from_stage(config, staged_config) string => "Configure apache based on successfully staged config"; classes: - "systemd_supervised" - expression => returnszero("$(paths.systemctl) -q is-active cf-apache > /dev/null 2>&1", "useshell"), - if => fileexists( $(paths.systemctl) ); + "systemd_supervised" -> { "ENT-11189" } + expression => returnszero("$(paths.systemctl) cat cf-apache > /dev/null 2>&1", "useshell"), + if => fileexists( $(paths.systemctl) ), + comment => "Set when cf-apache.service is a unit known to systemd"; "httpd_config_validated" expression => strcmp("$(validate_result[exit_code])", "0"); @@ -367,6 +368,13 @@ bundle agent mission_portal_apache_from_stage(config, staged_config) contain => in_shell, comment => "We restart apache after the new valid config is in place"; + methods: + systemd_supervised:: + "Reset cf-apache failed state" -> { "ENT-11189" } + usebundle => cf_apache_reset_failed_state, + if => "mission_portal_apache_config_repaired", + comment => "Clear any latched failed state before restarting cf-apache"; + services: systemd_supervised:: "cf-apache" @@ -540,3 +548,15 @@ bundle agent cfe_enterprise_selfsigned_cert "DEBUG $(this.bundle): No Certificate Generation Requested" if => "!_cfe_enterprise_selfsigned_cert_regenerate_certificate"; } + +bundle agent cf_apache_reset_failed_state +# @brief Clear any latched 'failed' state on cf-apache.service so subsequent +# service operations are not refused by systemd's start rate limiter +# (StartLimitBurst). Safe no-op when the unit is not in a failed state. +{ + commands: + "$(paths.systemctl) reset-failed cf-apache" -> { "ENT-11189" } + contain => in_shell, + handle => "cf_apache_systemctl_reset_failed", + comment => "Reset latched failed state on cf-apache.service"; +} diff --git a/templates/cf-apache.service.mustache b/templates/cf-apache.service.mustache index 9169be66b4..b0029646b0 100644 --- a/templates/cf-apache.service.mustache +++ b/templates/cf-apache.service.mustache @@ -18,6 +18,11 @@ ExecStart={{{vars.sys.workdir}}}/httpd/bin/apachectl start ExecStop={{{vars.sys.workdir}}}/httpd/bin/apachectl stop ExecReload={{{vars.sys.workdir}}}/httpd/bin/apachectl graceful PIDFile={{{vars.sys.workdir}}}/httpd/httpd.pid +# ENT-11189: apachectl writes the PID file shortly after fork. On a busy host +# (e.g. mid-upgrade with SELinux relabel, cf-postgres and cf-php-fpm churning) +# the default 90s start timeout has been observed to fire while apache is still +# coming up, leaving worker children bound to :80 and the unit in a restart loop. +TimeoutStartSec=300 Restart=always RestartSec=10 UMask=0177