Skip to content

Commit 3c9c0da

Browse files
authored
Merge pull request #3863 from nrwahl2/nrwahl2-fencing2
Some fencer device registration refactors
2 parents 4bff815 + fd4669a commit 3c9c0da

12 files changed

Lines changed: 354 additions & 375 deletions

File tree

cts/cts-fencing.in

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -570,34 +570,6 @@ class FenceTests(Tests):
570570
test.add_log_pattern("Delaying 'off' action targeting node3 using true3",
571571
negative=True)
572572

573-
def build_nodeid_tests(self):
574-
"""Register tests that use a corosync node id."""
575-
our_uname = localname()
576-
577-
# verify nodeid is supplied when nodeid is in the metadata parameters
578-
test = self.new_test("supply_nodeid",
579-
"Verify nodeid is given when fence agent has nodeid as parameter")
580-
581-
test.add_cmd("stonith_admin",
582-
args=f'--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list={our_uname}"')
583-
test.add_cmd("stonith_admin", args=f"--output-as=xml -F {our_uname} -t 3")
584-
test.add_log_pattern(f"as nodeid with fence action 'off' targeting {our_uname}")
585-
586-
# verify nodeid is _NOT_ supplied when nodeid is not in the metadata parameters
587-
test = self.new_test("do_not_supply_nodeid",
588-
"Verify nodeid is _NOT_ given when fence agent does not have nodeid as parameter")
589-
590-
# use a host name that won't be in corosync.conf
591-
test.add_cmd("stonith_admin",
592-
args='--output-as=xml -R true1 -a fence_dummy_no_nodeid '
593-
f'-o mode=pass -o pcmk_host_list="regr-test {our_uname}"')
594-
test.add_cmd("stonith_admin", args="--output-as=xml -F regr-test -t 3")
595-
test.add_log_pattern("as nodeid with fence action 'off' targeting regr-test",
596-
negative=True)
597-
test.add_cmd("stonith_admin", args=f"--output-as=xml -F {our_uname} -t 3")
598-
test.add_log_pattern("as nodeid with fence action 'off' targeting {our_uname}",
599-
negative=True)
600-
601573
def build_unfence_tests(self):
602574
"""Register tests that verify unfencing."""
603575
our_uname = localname()
@@ -917,7 +889,6 @@ def main():
917889
tests.build_fence_no_merge_tests()
918890
tests.build_unfence_tests()
919891
tests.build_unfence_on_target_tests()
920-
tests.build_nodeid_tests()
921892
tests.build_remap_tests()
922893
tests.build_query_tests()
923894
tests.build_metadata_tests()

cts/support/fence_dummy.in

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!@PYTHON@
22
"""Dummy fence agent for testing."""
33

4-
__copyright__ = "Copyright 2012-2024 the Pacemaker project contributors"
4+
__copyright__ = "Copyright 2012-2025 the Pacemaker project contributors"
55
__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"
66

77
import io
@@ -159,14 +159,6 @@ ALL_OPT = {
159159
"shortdesc": "Ignored",
160160
"order": 4
161161
},
162-
"nodeid": {
163-
"getopt": "i:",
164-
"longopt": "nodeid",
165-
"help": "-i, --nodeid Corosync id of fence target (ignored)",
166-
"required": "0",
167-
"shortdesc": "Ignored",
168-
"order": 4
169-
},
170162
"uuid": {
171163
"getopt": "U:",
172164
"longopt": "uuid",
@@ -446,8 +438,6 @@ def main():
446438
no_reboot = True
447439
elif sys.argv[0].endswith("_no_on"):
448440
no_on = True
449-
elif sys.argv[0].endswith("_no_nodeid"):
450-
del ALL_OPT["nodeid"]
451441

452442
device_opt = ALL_OPT.keys()
453443

daemons/fenced/fenced_cib.c

Lines changed: 100 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -194,27 +194,60 @@ update_stonith_watchdog_timeout_ms(xmlNode *cib)
194194
stonith_watchdog_timeout_ms = timeout_ms;
195195
}
196196

197+
/*!
198+
* \internal
199+
* \brief Mark a fence device dirty if its \c cib_registered flag is \c TRUE
200+
*
201+
* \param[in] key Ignored
202+
* \param[in,out] value Fence device (<tt>fenced_device_t *</tt>)
203+
* \param[in] user_data Ignored
204+
*
205+
* \note This function is suitable for use with \c g_hash_table_foreach().
206+
*/
207+
static void
208+
mark_dirty_if_cib_registered(gpointer key, gpointer value, gpointer user_data)
209+
{
210+
fenced_device_t *device = value;
211+
212+
if (device->cib_registered) {
213+
device->dirty = TRUE;
214+
}
215+
}
216+
217+
/*!
218+
* \internal
219+
* \brief Return the value of a fence device's \c dirty flag
220+
*
221+
* \param[in] key Ignored
222+
* \param[in] value Fence device (<tt>fenced_device_t *</tt>)
223+
* \param[in] user_data Ignored
224+
*
225+
* \return \c dirty flag of \p value
226+
*
227+
* \note This function is suitable for use with
228+
* \c g_hash_table_foreach_remove().
229+
*/
230+
static gboolean
231+
device_is_dirty(gpointer key, gpointer value, gpointer user_data)
232+
{
233+
fenced_device_t *device = value;
234+
235+
return device->dirty;
236+
}
237+
197238
/*!
198239
* \internal
199240
* \brief Update all STONITH device definitions based on current CIB
200241
*/
201242
static void
202243
cib_devices_update(void)
203244
{
204-
GHashTableIter iter;
205-
stonith_device_t *device = NULL;
206-
207245
crm_info("Updating devices to version %s.%s.%s",
208246
crm_element_value(local_cib, PCMK_XA_ADMIN_EPOCH),
209247
crm_element_value(local_cib, PCMK_XA_EPOCH),
210248
crm_element_value(local_cib, PCMK_XA_NUM_UPDATES));
211249

212-
g_hash_table_iter_init(&iter, device_list);
213-
while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) {
214-
if (device->cib_registered) {
215-
device->dirty = TRUE;
216-
}
217-
}
250+
fenced_foreach_device(mark_dirty_if_cib_registered, NULL);
218251

219252
/* have list repopulated if cib has a watchdog-fencing-resource
220253
TODO: keep a cached list for queries happening while we are refreshing
@@ -224,78 +257,78 @@ cib_devices_update(void)
224257

225258
fenced_scheduler_run(local_cib);
226259

227-
g_hash_table_iter_init(&iter, device_list);
228-
while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) {
229-
if (device->dirty) {
230-
g_hash_table_iter_remove(&iter);
231-
}
232-
}
260+
fenced_foreach_device_remove(device_is_dirty);
233261
}
234262

263+
#define PRIMITIVE_ID_XP_FRAGMENT "/" PCMK_XE_PRIMITIVE "[@" PCMK_XA_ID "='"
264+
235265
static void
236-
update_cib_stonith_devices(const char *event, xmlNode * msg)
266+
update_cib_stonith_devices(const xmlNode *patchset)
237267
{
238-
int format = 1;
239-
xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CIB_UPDATE_RESULT,
240-
NULL, NULL);
241-
xmlNode *patchset = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
242268
char *reason = NULL;
243269

244-
CRM_CHECK(patchset != NULL, return);
245-
crm_element_value_int(patchset, PCMK_XA_FORMAT, &format);
246-
247-
if (format != 2) {
248-
crm_warn("Unknown patch format: %d", format);
249-
return;
250-
}
251-
252-
for (xmlNode *change = pcmk__xe_first_child(patchset, NULL, NULL, NULL);
270+
for (const xmlNode *change = pcmk__xe_first_child(patchset, NULL, NULL,
271+
NULL);
253272
change != NULL; change = pcmk__xe_next(change, NULL)) {
254273

255274
const char *op = crm_element_value(change, PCMK_XA_OPERATION);
256275
const char *xpath = crm_element_value(change, PCMK_XA_PATH);
257-
const char *shortpath = NULL;
276+
const char *primitive_xpath = NULL;
258277

259278
if (pcmk__str_eq(op, PCMK_VALUE_MOVE, pcmk__str_null_matches)
260279
|| (strstr(xpath, "/" PCMK_XE_STATUS) != NULL)) {
261280
continue;
262281
}
263282

264-
if (pcmk__str_eq(op, PCMK_VALUE_DELETE, pcmk__str_none)
265-
&& (strstr(xpath, "/" PCMK_XE_PRIMITIVE) != NULL)) {
283+
primitive_xpath = strstr(xpath, PRIMITIVE_ID_XP_FRAGMENT);
284+
if ((primitive_xpath != NULL)
285+
&& pcmk__str_eq(op, PCMK_VALUE_DELETE, pcmk__str_none)) {
286+
266287
const char *rsc_id = NULL;
267-
char *search = NULL;
268-
char *mutable = NULL;
288+
const char *end_quote = NULL;
269289

270-
if ((strstr(xpath, PCMK_XE_INSTANCE_ATTRIBUTES) != NULL)
271-
|| (strstr(xpath, PCMK_XE_META_ATTRIBUTES) != NULL)) {
290+
if ((strstr(primitive_xpath, PCMK_XE_INSTANCE_ATTRIBUTES) != NULL)
291+
|| (strstr(primitive_xpath, PCMK_XE_META_ATTRIBUTES) != NULL)) {
272292

273293
reason = pcmk__str_copy("(meta) attribute deleted from "
274294
"resource");
275295
break;
276296
}
277-
mutable = pcmk__str_copy(xpath);
278-
rsc_id = strstr(mutable, PCMK_XE_PRIMITIVE "[@" PCMK_XA_ID "=\'");
279-
if (rsc_id != NULL) {
280-
rsc_id += strlen(PCMK_XE_PRIMITIVE "[@" PCMK_XA_ID "=\'");
281-
search = strchr(rsc_id, '\'');
297+
298+
rsc_id = primitive_xpath + sizeof(PRIMITIVE_ID_XP_FRAGMENT) - 1;
299+
end_quote = strchr(rsc_id, '\'');
300+
301+
CRM_LOG_ASSERT(end_quote != NULL);
302+
if (end_quote == NULL) {
303+
crm_err("Bug: Malformed item in Pacemaker-generated patchset");
304+
continue;
282305
}
283-
if (search != NULL) {
284-
*search = 0;
285-
stonith_device_remove(rsc_id, true);
306+
307+
if (strchr(end_quote, '/') == NULL) {
308+
/* The primitive element itself was deleted. If this was a
309+
* fencing resource, it's faster to remove it directly than to
310+
* run the scheduler and update all device registrations.
311+
*/
312+
char *copy = strndup(rsc_id, end_quote - rsc_id);
313+
314+
pcmk__assert(copy != NULL);
315+
stonith_device_remove(copy, true);
316+
286317
/* watchdog_device_update called afterwards
287318
to fall back to implicit definition if needed */
288-
} else {
289-
crm_warn("Ignoring malformed CIB update (resource deletion)");
319+
320+
free(copy);
321+
continue;
290322
}
291-
free(mutable);
292-
293-
} else if (strstr(xpath, "/" PCMK_XE_RESOURCES)
294-
|| strstr(xpath, "/" PCMK_XE_CONSTRAINTS)
295-
|| strstr(xpath, "/" PCMK_XE_RSC_DEFAULTS)) {
296-
shortpath = strrchr(xpath, '/');
297-
pcmk__assert(shortpath != NULL);
298-
reason = crm_strdup_printf("%s %s", op, shortpath+1);
323+
}
324+
325+
if (strstr(xpath, "/" PCMK_XE_RESOURCES)
326+
|| strstr(xpath, "/" PCMK_XE_CONSTRAINTS)
327+
|| strstr(xpath, "/" PCMK_XE_RSC_DEFAULTS)) {
328+
329+
const char *shortpath = strrchr(xpath, '/');
330+
331+
reason = crm_strdup_printf("%s %s", op, shortpath + 1);
299332
break;
300333
}
301334
}
@@ -313,8 +346,8 @@ static void
313346
watchdog_device_update(void)
314347
{
315348
if (stonith_watchdog_timeout_ms > 0) {
316-
if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) &&
317-
!stonith_watchdog_targets) {
349+
if (!fenced_has_watchdog_device()
350+
&& (stonith_watchdog_targets == NULL)) {
318351
/* getting here watchdog-fencing enabled, no device there yet
319352
and reason isn't stonith_watchdog_targets preventing that
320353
*/
@@ -325,23 +358,22 @@ watchdog_device_update(void)
325358
STONITH_WATCHDOG_ID,
326359
st_namespace_internal,
327360
STONITH_WATCHDOG_AGENT,
328-
NULL, /* stonith_device_register will add our
361+
NULL, /* fenced_device_register() will add our
329362
own name as PCMK_STONITH_HOST_LIST param
330363
so we can skip that here
331364
*/
332365
NULL);
333-
rc = stonith_device_register(xml, TRUE);
366+
rc = fenced_device_register(xml, true);
334367
pcmk__xml_free(xml);
335-
if (rc != pcmk_ok) {
336-
rc = pcmk_legacy2rc(rc);
368+
if (rc != pcmk_rc_ok) {
337369
exit_code = CRM_EX_FATAL;
338370
crm_crit("Cannot register watchdog pseudo fence agent: %s",
339371
pcmk_rc_str(rc));
340372
stonith_shutdown(0);
341373
}
342374
}
343375

344-
} else if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) != NULL) {
376+
} else if (fenced_has_watchdog_device()) {
345377
/* be silent if no device - todo parameter to stonith_device_remove */
346378
stonith_device_remove(STONITH_WATCHDOG_ID, true);
347379
}
@@ -465,6 +497,7 @@ update_fencing_topology(const char *event, xmlNode *msg)
465497
static void
466498
update_cib_cache_cb(const char *event, xmlNode * msg)
467499
{
500+
xmlNode *patchset = NULL;
468501
long long timeout_ms_saved = stonith_watchdog_timeout_ms;
469502
bool need_full_refresh = false;
470503

@@ -483,7 +516,6 @@ update_cib_cache_cb(const char *event, xmlNode * msg)
483516
if (local_cib != NULL) {
484517
int rc = pcmk_ok;
485518
xmlNode *wrapper = NULL;
486-
xmlNode *patchset = NULL;
487519

488520
crm_element_value_int(msg, PCMK__XA_CIB_RC, &rc);
489521
if (rc != pcmk_ok) {
@@ -498,6 +530,11 @@ update_cib_cache_cb(const char *event, xmlNode * msg)
498530
switch (rc) {
499531
case pcmk_ok:
500532
case -pcmk_err_old_data:
533+
/* @TODO Full refresh (with or without query) in case of
534+
* -pcmk_err_old_data? It seems wrong to call
535+
* stonith_device_remove() based on primitive deletion in an
536+
* old diff.
537+
*/
501538
break;
502539
case -pcmk_err_diff_resync:
503540
case -pcmk_err_diff_failed:
@@ -532,7 +569,7 @@ update_cib_cache_cb(const char *event, xmlNode * msg)
532569
} else {
533570
// Partial refresh
534571
update_fencing_topology(event, msg);
535-
update_cib_stonith_devices(event, msg);
572+
update_cib_stonith_devices(patchset);
536573
}
537574

538575
watchdog_device_update();

0 commit comments

Comments
 (0)