@@ -1031,7 +1031,6 @@ static int service_run_script(svc_t *svc, char *script)
10311031{
10321032 const char * id = svc_ident (svc , NULL , 0 );
10331033 pid_t pid = service_fork (svc );
1034- int status , rc ;
10351034
10361035 if (pid < 0 ) {
10371036 err (1 , "%s: failed forking off script %s" , id , script );
@@ -1047,6 +1046,8 @@ static int service_run_script(svc_t *svc, char *script)
10471046 };
10481047 char pidbuf [16 ];
10491048
1049+ redirect (svc );
1050+
10501051 snprintf (pidbuf , sizeof (pidbuf ), "%d" , svc -> pid );
10511052 setenv ("MAINPID" , pidbuf , 1 );
10521053
@@ -1056,23 +1057,7 @@ static int service_run_script(svc_t *svc, char *script)
10561057 }
10571058
10581059 dbg ("%s: script '%s' started as PID %d" , id , script , pid );
1059- if (waitpid (pid , & status , 0 ) == -1 ) {
1060- warn ("%s: failed calling script %s" , id , script );
1061- return -1 ;
1062- }
1063-
1064- rc = WEXITSTATUS (status );
1065- if (WIFEXITED (status )) {
1066- dbg ("%s: script '%s' exited without signal, status: %d" , id , script , rc );
1067- } else if (WIFSIGNALED (status )) {
1068- dbg ("%s: script '%s' terminated by signal %d" , id , script , WTERMSIG (status ));
1069- if (!rc )
1070- rc = 1 ;
1071- } else {
1072- dbg ("%s: script '%s' exited with status: %d" , id , script , rc );
1073- }
1074-
1075- return rc ;
1060+ return service_script_add (svc , pid , svc -> killdelay );
10761061}
10771062
10781063/* Ensure we don't have any notify socket lingering */
@@ -1093,15 +1078,31 @@ static void service_notify_stop(svc_t *svc)
10931078 */
10941079static void service_cleanup (svc_t * svc )
10951080{
1096- char * fn ;
1081+ char cond [ MAX_COND_LEN ] ;
10971082
10981083 /* PID collected, cancel any pending SIGKILL */
10991084 service_timeout_cancel (svc );
11001085
1101- fn = pid_file (svc );
1102- if (fn && remove (fn ) && errno != ENOENT )
1103- logit (LOG_CRIT , "Failed removing service %s pidfile %s" ,
1104- svc_ident (svc , NULL , 0 ), fn );
1086+ /* Only clean up process' pidfile if managed by us */
1087+ if (svc_has_pidfile (svc )) {
1088+ char * fn = pid_file (svc );
1089+
1090+ if (remove (fn ) && errno != ENOENT )
1091+ logit (LOG_CRIT , "Failed removing service %s pidfile %s" ,
1092+ svc_ident (svc , NULL , 0 ), fn );
1093+ }
1094+
1095+ /*
1096+ * Invalidate the pid/ condition for this service to ensure
1097+ * dependent services are properly stopped and restarted.
1098+ * Without this, the condition is only cleared asynchronously
1099+ * via inotify on pidfile removal, which may not trigger when
1100+ * the daemon fails to clean up its own pidfile, or when the
1101+ * service dies during a reload cycle and goes directly from
1102+ * RUNNING to HALTED (skipping STOPPING where cond_clear()
1103+ * is normally called).
1104+ */
1105+ cond_clear (mkcond (svc , cond , sizeof (cond )));
11051106
11061107 service_notify_stop (svc );
11071108
@@ -1140,7 +1141,7 @@ int service_stop(svc_t *svc)
11401141 service_timeout_cancel (svc );
11411142
11421143 if (svc -> stop_script [0 ]) {
1143- logit (LOG_CONSOLE | LOG_NOTICE , "%s[%d], calling stop:%s ..." , id , svc -> pid , svc -> stop_script );
1144+ logit (LOG_CONSOLE | LOG_NOTICE , "Stopping %s[%d], calling stop:%s ..." , id , svc -> pid , svc -> stop_script );
11441145 } else if (!svc_is_sysv (svc )) {
11451146 char * nm = pid_get_name (svc -> pid , NULL , 0 );
11461147 const char * sig = sig_name (svc -> sighalt );
@@ -1155,10 +1156,10 @@ int service_stop(svc_t *svc)
11551156 }
11561157
11571158 dbg ("Sending %s to pid:%d name:%s(%s)" , sig , svc -> pid , id , nm );
1158- logit (LOG_CONSOLE | LOG_NOTICE , "%s[%d], stopping , sending %s ..." , id , svc -> pid , sig );
1159+ logit (LOG_CONSOLE | LOG_NOTICE , "Stopping %s[%d], sending %s ..." , id , svc -> pid , sig );
11591160 } else {
11601161 compose_cmdline (svc , cmdline , sizeof (cmdline ));
1161- logit (LOG_CONSOLE | LOG_NOTICE , "%s[%d], calling '%s stop' ..." , id , svc -> pid , cmdline );
1162+ logit (LOG_CONSOLE | LOG_NOTICE , "Stopping %s[%d], calling '%s stop' ..." , id , svc -> pid , cmdline );
11621163 }
11631164
11641165 /*
@@ -1278,16 +1279,16 @@ static int service_reload(svc_t *svc)
12781279 print_desc ("Restarting " , svc -> desc );
12791280
12801281 if (svc -> reload_script [0 ]) {
1281- logit (LOG_CONSOLE | LOG_NOTICE , "%s[%d], calling reload:%s ..." , id , svc -> pid , svc -> reload_script );
1282+ logit (LOG_CONSOLE | LOG_NOTICE , "Reloading %s[%d], calling reload:%s ..." , id , svc -> pid , svc -> reload_script );
12821283 rc = service_run_script (svc , svc -> reload_script );
12831284 } else if (svc -> sighup ) {
12841285 if (svc -> pid <= 1 ) {
12851286 dbg ("%s[%d]: bad PID, cannot reload service" , id , svc -> pid );
12861287 svc -> start_time = svc -> pid = 0 ;
12871288 goto done ;
12881289 }
1289- dbg ("%s[%d], sending SIGHUP" , id , svc -> pid );
1290- logit (LOG_CONSOLE | LOG_NOTICE , "%s[%d], sending SIGHUP ..." , id , svc -> pid );
1290+ dbg ("Reloading %s[%d], sending SIGHUP" , id , svc -> pid );
1291+ logit (LOG_CONSOLE | LOG_NOTICE , "Reloading %s[%d], sending SIGHUP ..." , id , svc -> pid );
12911292 rc = kill (svc -> pid , SIGHUP );
12921293 if (rc == -1 && (errno == ESRCH || errno == ENOENT )) {
12931294 /* nobody home, reset internal state machine */
@@ -3134,6 +3135,19 @@ int service_step(svc_t *svc)
31343135 case COND_ON :
31353136 kill (svc -> pid , SIGCONT );
31363137 svc_set_state (svc , SVC_RUNNING_STATE );
3138+
3139+ /*
3140+ * Propagate reload (~): upstream reloaded, so
3141+ * we must also reload/restart, not just resume.
3142+ */
3143+ if (svc -> flux_reload && !svc_is_changed (svc )) {
3144+ if (svc_is_noreload (svc ))
3145+ service_stop (svc );
3146+ else
3147+ service_reload (svc );
3148+ break ;
3149+ }
3150+
31373151 /* Reassert condition if we go from waiting and no change */
31383152 if (!svc_is_changed (svc )) {
31393153 if (svc -> notify == SVC_NOTIFY_PID ) {
0 commit comments