From 6a67816c7b25fba8c8756f5cdac7fd34777d3eb2 Mon Sep 17 00:00:00 2001 From: helq Date: Mon, 26 Feb 2024 18:42:50 -0500 Subject: [PATCH 001/110] Refactoring director function to generalize (first step to define API for director function) --- .gitignore | 2 +- codes/surrogate/switch.h | 2 +- src/surrogate/init.c | 2 +- src/surrogate/switch.c | 175 ++++++++++++++++++++++----------------- 4 files changed, 102 insertions(+), 79 deletions(-) diff --git a/.gitignore b/.gitignore index 51a9c2eb..70223fe9 100644 --- a/.gitignore +++ b/.gitignore @@ -40,4 +40,4 @@ ross.csv install-mastiff/include/codes/model-net-method.h # commonly used building folder -/build/ +/build*/ diff --git a/codes/surrogate/switch.h b/codes/surrogate/switch.h index 553f3a11..3a56360a 100644 --- a/codes/surrogate/switch.h +++ b/codes/surrogate/switch.h @@ -61,7 +61,7 @@ extern struct switch_at_struct switch_at; // Switch -void director_switch(tw_pe * pe, tw_event_sig gvt_sig); +void director_call(tw_pe * pe, tw_event_sig gvt_sig); #ifdef __cplusplus } diff --git a/src/surrogate/init.c b/src/surrogate/init.c index 79bb7e71..16772fc2 100644 --- a/src/surrogate/init.c +++ b/src/surrogate/init.c @@ -64,7 +64,7 @@ void surrogate_configure( PRINTF_ONCE("\n"); // Injecting into ROSS the function to be called at GVT and the instant in time to trigger GVT - g_tw_gvt_arbitrary_fun = director_switch; + g_tw_gvt_arbitrary_fun = director_call; #ifdef USE_RAND_TIEBREAKER tw_event_sig time_stamp = {0}; diff --git a/src/surrogate/switch.c b/src/surrogate/switch.c index 4b29ab18..32086f46 100644 --- a/src/surrogate/switch.c +++ b/src/surrogate/switch.c @@ -382,11 +382,92 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_stime gvt) { } +// This is an impure function, calling it twice WILL give different results. Only call it once! +bool hit_trigger(tw_stime gvt) { + if ( switch_at.current_i < switch_at.total + && g_tw_trigger_arbitrary_fun.active == ARBITRARY_FUN_triggered) { + double const switch_time = switch_at.time_stampts[switch_at.current_i]; +#ifdef USE_RAND_TIEBREAKER + assert(g_tw_trigger_arbitrary_fun.sig_at.recv_ts == switch_at.time_stampts[switch_at.current_i]); +#else + assert(g_tw_trigger_arbitrary_fun.at == switch_at.time_stampts[switch_at.current_i]); +#endif + assert(gvt >= switch_time); // current gvt shouldn't be that far ahead from the point we wanted to trigger it + + // Activating next switch + if (++switch_at.current_i < switch_at.total) { + double const next_switch = switch_at.time_stampts[switch_at.current_i]; + // Setting trigger for next switch + #ifdef USE_RAND_TIEBREAKER + tw_event_sig time_stamp = {0}; + time_stamp.recv_ts = next_switch; + //printf("Adding a trigger to activate next switch!\n"); + tw_trigger_arbitrary_fun_at(time_stamp); + #else + //printf("Adding a trigger to activate next switch!\n"); + tw_trigger_arbitrary_fun_at(next_switch); + #endif + } + // + return true; + } else { + return false; + } +} + + +#ifdef USE_RAND_TIEBREAKER +void switch_model(tw_pe * pe, tw_event_sig gvt_sig) { +#else +void switch_model(tw_pe * pe, tw_stime gvt) { +#endif + // Rollback if in optimistic mode +#ifdef USE_RAND_TIEBREAKER + if (g_tw_synchronization_protocol == OPTIMISTIC) { + assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0); + rollback_and_cancel_events_pe(pe, gvt_sig); + //assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) <= 0); + assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0); + } +#else + if (g_tw_synchronization_protocol == OPTIMISTIC) { + assert(pe->GVT == gvt); + rollback_and_cancel_events_pe(pe, gvt); + //assert(tw_event_sig_compare(pe->GVT_sig, gvt) <= 0); + assert(pe->GVT == gvt); + } +#endif + surr_config.director.switch_surrogate(); + if (DEBUG_DIRECTOR && g_tw_mynode == 0) { + printf("Switching to %s\n", surr_config.director.is_surrogate_on() ? "surrogate" : "high-fidelity"); + } + + // "Freezing" network events and activating LP's switch functions + if (freeze_network_on_switch) { + if (surr_config.director.is_surrogate_on()) { + model_net_method_switch_to_surrogate(); +#ifdef USE_RAND_TIEBREAKER + events_high_def_to_surrogate_switch(pe, gvt_sig); +#else + events_high_def_to_surrogate_switch(pe, gvt); +#endif + } else { + model_net_method_switch_to_highdef(); #ifdef USE_RAND_TIEBREAKER -void director_switch(tw_pe * pe, tw_event_sig gvt_sig) { + events_surrogate_to_high_def_switch(pe, gvt_sig); +#else + events_surrogate_to_high_def_switch(pe, gvt); +#endif + } + } +} + + +#ifdef USE_RAND_TIEBREAKER +void director_call(tw_pe * pe, tw_event_sig gvt_sig) { tw_stime const gvt = gvt_sig.recv_ts; #else -void director_switch(tw_pe * pe, tw_stime gvt) { +void director_call(tw_pe * pe, tw_stime gvt) { #endif assert(is_surrogate_configured); @@ -431,83 +512,24 @@ void director_switch(tw_pe * pe, tw_stime gvt) { } // Detecting if we are going to switch - if (switch_at.current_i < switch_at.total - && g_tw_trigger_arbitrary_fun.active == ARBITRARY_FUN_triggered) { - double const switch_time = switch_at.time_stampts[switch_at.current_i]; -#ifdef USE_RAND_TIEBREAKER - assert(g_tw_trigger_arbitrary_fun.sig_at.recv_ts == switch_at.time_stampts[switch_at.current_i]); -#else - assert(g_tw_trigger_arbitrary_fun.at == switch_at.time_stampts[switch_at.current_i]); -#endif - assert(gvt >= switch_time); // current gvt shouldn't be that far ahead from the point we wanted to trigger it - } else { + if (! hit_trigger(gvt)) { return; } - // ---- Past this means that we are in fact switching ---- + bool const pre_switch_status = surr_config.director.is_surrogate_on(); - double const start = tw_clock_read(); // Asking the director/model to switch if (DEBUG_DIRECTOR && g_tw_mynode == 0) { if (DEBUG_DIRECTOR == 2) { printf("\n"); } - printf("Switching at %f", gvt); - } - // Rollback if in optimistic mode -#ifdef USE_RAND_TIEBREAKER - if (g_tw_synchronization_protocol == OPTIMISTIC) { - assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0); - rollback_and_cancel_events_pe(pe, gvt_sig); - //assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) <= 0); - assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0); - } -#else - if (g_tw_synchronization_protocol == OPTIMISTIC) { - assert(pe->GVT == gvt); - rollback_and_cancel_events_pe(pe, gvt); - //assert(tw_event_sig_compare(pe->GVT_sig, gvt) <= 0); - assert(pe->GVT == gvt); - } -#endif - surr_config.director.switch_surrogate(); - if (DEBUG_DIRECTOR && g_tw_mynode == 0) { - printf(" to %s\n", surr_config.director.is_surrogate_on() ? "surrogate" : "high-fidelity"); + printf("Switching at %f\n", gvt); } - // "Freezing" network events and activating LP's switch functions - if (freeze_network_on_switch) { - if (surr_config.director.is_surrogate_on()) { - model_net_method_switch_to_surrogate(); -#ifdef USE_RAND_TIEBREAKER - events_high_def_to_surrogate_switch(pe, gvt_sig); -#else - events_high_def_to_surrogate_switch(pe, gvt); -#endif - } else { - model_net_method_switch_to_highdef(); -#ifdef USE_RAND_TIEBREAKER - events_surrogate_to_high_def_switch(pe, gvt_sig); -#else - events_surrogate_to_high_def_switch(pe, gvt); -#endif - } - } - - // Activating next switch - if (++switch_at.current_i < switch_at.total) { - double const next_switch = switch_at.time_stampts[switch_at.current_i]; - // Setting trigger for next switch -#ifdef USE_RAND_TIEBREAKER - tw_event_sig time_stamp = {0}; - time_stamp.recv_ts = next_switch; - //printf("Adding a trigger to activate next switch!\n"); - tw_trigger_arbitrary_fun_at(time_stamp); -#else - //printf("Adding a trigger to activate next switch!\n"); - tw_trigger_arbitrary_fun_at(next_switch); -#endif - } + double const start = tw_clock_read(); + switch_model(pe, gvt_sig); + double const end = tw_clock_read(); + surrogate_switching_time += end - start; if (DEBUG_DIRECTOR == 1 && g_tw_mynode == 0) { printf("Switch completed!\n"); @@ -515,17 +537,18 @@ void director_switch(tw_pe * pe, tw_stime gvt) { if (DEBUG_DIRECTOR > 1) { printf("PE %lu: Switch completed!\n", g_tw_mynode); } - double const end = tw_clock_read(); - surrogate_switching_time += end - start; // Determining time in surrogate - if (surr_config.director.is_surrogate_on()) { - // Start tracking time spent in surrogate mode - surrogate_time_last = end; - } else { - // We are done tracking time spent in surrogate mode - time_in_surrogate += start - surrogate_time_last; + if (pre_switch_status != surr_config.director.is_surrogate_on()) { + if (surr_config.director.is_surrogate_on()) { + // Start tracking time spent in surrogate mode + surrogate_time_last = end; + } else { + // We are done tracking time spent in surrogate mode + time_in_surrogate += start - surrogate_time_last; + } } } // // === END OF Director functionality +// vim: set tabstop=4 shiftwidth=4 expandtab : From 9b32a71cf69a79e3a28cee0f02aecf41dd26d5e7 Mon Sep 17 00:00:00 2001 From: helq Date: Thu, 11 Apr 2024 12:42:09 -0400 Subject: [PATCH 002/110] Hardcoded example skipping iterations for TWO applications (MILC and Jacobi) --- .../print-iterations.py | 142 ++++++++++++++++++ src/network-workloads/model-net-mpi-replay.c | 63 +++++++- 2 files changed, 197 insertions(+), 8 deletions(-) create mode 100644 scripts/workload-iteration-times/print-iterations.py diff --git a/scripts/workload-iteration-times/print-iterations.py b/scripts/workload-iteration-times/print-iterations.py new file mode 100644 index 00000000..a5e88010 --- /dev/null +++ b/scripts/workload-iteration-times/print-iterations.py @@ -0,0 +1,142 @@ +# Adapted from example from matplotlib lib + +from typing import Any, TextIO +import argparse +import pathlib + +import matplotlib.pyplot as plt +import matplotlib +import numpy as np + + +def plot_sequence(ax: Any, seq: Any, names: Any, height: Any, color: str = 'red', print_names: bool = True): + ax.vlines(seq, 0, height, color=f"tab:{color}") # The vertical stems. + ax.plot(seq, np.zeros_like(seq), "-o", color="k", markerfacecolor="w") + + # annotate lines + if print_names: + for d, h, r in zip(seq, height, names): + ax.annotate(r, xy=(d, h), + xytext=(3, np.sign(h)*3), textcoords="offset points", + horizontalalignment="right", + verticalalignment="bottom" if h > 0 else "top") + + +# hardcoded data +def iterations_count_example(): + iterations = np.array([5700202, 11141148, 16735521, 22248304, 28018657, 33344653, 39131394, 44535575, 49924184, 55265978, 60797003, 65999354, 71477966, 77089252, 82388323, 87510575, 92672984, 97968684, 103413575, 108791049, 114191370, 119281369, 124947369, 130269516, 135814413, 140706572, 146191543, 152244928, 157549505, 163252774]) + names = np.arange(iterations.size) + # height = np.ones_like(names) + height = iterations.astype(np.float64) + height[1:] -= iterations[:-1] + # mean_height = height.mean() + # height /= mean_height + + iterations2 = np.array([4475938, 8527507, 12500772, 16932824, 21122232, 24629352, 28727112, 32812390, 37119760, 40873748, 44831210, 49236742, 53495581, 57186915, 61102874, 65089296, 69034116, 72827668, 77306215, 81505333, 84962239, 88817963, 92788913, 97258245, 101298185, 105234798, 109230081, 113176951, 117033360, 120922482, 125158680, 129445759, 132927795, 136967719, 140707240, 144980904, 148570317, 152949619, 157429076, 161858572, 165599534, 169169124, 172576205, 176267989, 179822127, 183531146, 187147511, 190685445, 194270774, 197863388, 201349592, 204959427, 208557228, 212286717, 215720477, 219201662, 222629090, 226452092, 230156036, 233856397, 237545455, 241265332, 245016561, 248662995, 252212229, 255620388, 259105490, 262543988, 266118703, 269713894, 273230378, 276923706, 280425248, 284046990, 287508037, 291266834, 294812966, 298512239, 302113836, 305636975, 309307151, 312842662, 316463094, 320055020, 323542940, 327139573, 330811189, 334388299, 337788549, 341498322, 345104703, 348880050, 352448690, 356106442, 359506153, 363094952, 366703208, 370233755, 373770752, 377222496]) + names2 = np.arange(iterations2.size) + # height2 = -1 * np.ones_like(names2) + height2 = iterations2.astype(np.float64) + height2[1:] -= iterations2[:-1] + # height2 /= mean_height + height2 *= -1 + + return (iterations, names, height), (iterations2, names2, height2) + + +# class JobAvgIterations(TypedDict): +# iterations: + + +# typing cannot be done for structured arrays :S +def parse_iteration_log(log_file: TextIO): + log_pattern = r'ITERATION (\d+) node \d+ job (\d+) rank \d+ time (\d*\.?\d+)\n' + log_iters = np.fromregex(log_file, log_pattern, [('iter', np.int64), ('job', np.int64), ('time', np.float64)]) + + def get_avg_for_iters(job: np.int64): + def avg(it: np.int64) -> np.float64: + matched_iters = log_iters[np.bitwise_and(log_iters['job'] == job, log_iters['iter'] == it)] + return np.mean(matched_iters['time'].astype(np.float64)) + return avg + + jobs: dict[int, np.ndarray[Any, Any]] = {} + for job in np.unique(log_iters['job']): + iterations = np.unique(log_iters[log_iters['job'] == job]['iter']) + # avg_timestamp = np.vectorize(get_avg_for_iters(job), otypes=(np.float64,))(iterations) + avg_timestamp = np.array([get_avg_for_iters(job)(it) for it in iterations]) + assert(iterations.size == avg_timestamp.size) + + # finding time that each iteration took + avg_iter_time = avg_timestamp.copy() + avg_iter_time[1:] -= avg_timestamp[:-1] + # "removing" iterations for which we don't know how much they actually took + to_rem = iterations.copy() + to_rem[1:] -= to_rem[:-1] + 1 + to_rem[0] = 0 # Assuming the first value hasn't been skipped + avg_iter_time[to_rem != 0] = 0 + + combined = np.zeros_like(iterations, dtype=[('iter', np.int64), ('time', np.float64), ('iter_time', np.float64)]) + combined['iter'] = iterations + combined['time'] = avg_timestamp + combined['iter_time'] = avg_iter_time + jobs[int(job)] = combined + + return jobs + + +# if __name__ == "__main__": +# (iterations, names, height), (iterations2, names2, height2) = iterations_count_example() +# fig, ax = plt.subplots(figsize=(8.8, 4), layout="constrained") +# plot_sequence(ax, iterations, names, height, 'blue') +# plot_sequence(ax, iterations2, names2, height2, 'red') +# plt.setp(ax.get_xticklabels(), rotation=30, ha="right") +# plt.show() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + _ = parser.add_argument('file', type=argparse.FileType('r')) + _ = parser.add_argument('--output', type=pathlib.Path, help='Name of output figure', default=None) + _ = parser.add_argument('--no-iter-count', dest='iter_count', action='store_false') + args = parser.parse_args() + + if args.output: + matplotlib.use("pgf") + matplotlib.rcParams.update({ + "pgf.texsystem": "pdflatex", + 'font.family': 'serif', + 'font.size': 16, + 'text.usetex': True, + 'pgf.rcfonts': False, + }) + + parsed_logs = parse_iteration_log(args.file) + + # Creating plot with data + fig, ax = plt.subplots(figsize=(8.8, 4), layout="constrained") + ax.set_xlabel("Total virtual time (ns)") + ax.set_ylabel("Virtual time per iteration (ns)") + #ax.set(title="") + smallest_timestamp = list(parsed_logs.values())[0]['time'][0] + ax.plot([0, smallest_timestamp], [0, 0], "-", color="k", markerfacecolor="w") + + color_table = ['red', 'blue', 'green', 'black'] + for i, job in enumerate(parsed_logs.keys()): + # Flipping second sequence if there are only two jobs + mul = -1 if len(parsed_logs) == 2 and i == 1 else 1 + plot_sequence( + ax, + parsed_logs[job]['time'], + parsed_logs[job]['iter'], + mul * parsed_logs[job]['iter_time'], + color=color_table[i], + print_names=args.iter_count) + + plt.setp(ax.get_xticklabels(), rotation=30, ha="right") + + #ax.margins(y=0.1) + if args.output: + plt.tight_layout() + plt.savefig(f'{args.output}.pgf', bbox_inches='tight') + plt.savefig(f'{args.output}.pdf', bbox_inches='tight') + else: + plt.show() diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 1433b2a3..b58f572c 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -1117,24 +1117,60 @@ void arrive_syn_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * lp) } } -void skip_iteration_rc(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) { +// Surrogate switiching structure +struct AvgSurrogateSwitchingTimesForApp { + int app_id; + int skip_at_iter; + int resume_at_iter; + double time_per_iter; + bool done[72]; // This is a flag to indicate whethe we already completed this skipping stage +}; + +static int iters_skipped(struct AvgSurrogateSwitchingTimesForApp * avgSur) { + return avgSur->resume_at_iter - avgSur->skip_at_iter; +} + +static struct AvgSurrogateSwitchingTimesForApp skip_iter_config[] = { + // done, app_id, skip_at_iter, resume_at_iter, time_per_iter + {0, 3, 21, 14403235, {false}}, + {1, 7, 59, 4982017, {false}}, + {1, 79, 195, 3581337, {false}}, +}; + +struct AvgSurrogateSwitchingTimesForApp * get_switch_config(struct nw_state * s) { + int n_jumps = (sizeof(skip_iter_config)/sizeof(skip_iter_config[0])); + for (int i=0; i < n_jumps; i++) { + struct AvgSurrogateSwitchingTimesForApp * jump = &skip_iter_config[i]; + if (!jump->done[s->local_rank] && jump->app_id == s->app_id) { + return jump; + } + } + return NULL; +} + +static void skip_iteration_rc(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) { // TODO: implement!! } -void skip_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) +static void skip_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) { struct codes_workload_op * mpi_op = (struct codes_workload_op*) malloc(sizeof(struct codes_workload_op)); m->mpi_op = mpi_op; - // consuming all events until iteration 95 from iteration 4 + struct AvgSurrogateSwitchingTimesForApp * switch_config = get_switch_config(s); + assert(switch_config != NULL); + int const resume_at_iter = switch_config->resume_at_iter; + + // consuming all events until indicated iteration is reached bool reached_end = false; while (!reached_end) { codes_workload_get_next(wrkld_id, s->app_id, s->local_rank, mpi_op); switch (mpi_op->op_type) { case CODES_WK_MARK: - if (mpi_op->u.send.tag == 95) { + if (mpi_op->u.send.tag == resume_at_iter) { reached_end = true; + codes_workload_get_next_rc(wrkld_id, s->app_id, s->local_rank, mpi_op); } break; // If we reach the end of simulation, rollback once to allow the operation to be processed normally @@ -1147,17 +1183,28 @@ void skip_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) } } + switch_config->done[s->local_rank] = true; + tw_event *e = tw_event_new(lp->gid, 0.0, lp); nw_message* msg = (nw_message*) tw_event_data(e); msg->msg_type = MPI_OP_GET_NEXT; tw_event_send(e); } -bool have_we_hit_surrogate_switch(struct codes_workload_op * mpi_op) { - //return mpi_op->u.send.tag == 4; +static bool have_we_hit_surrogate_switch(struct nw_state* s, struct codes_workload_op * mpi_op) { + struct AvgSurrogateSwitchingTimesForApp * switch_config = get_switch_config(s); + if (switch_config != NULL) { + return mpi_op->u.send.tag == switch_config->skip_at_iter; + } return false; } +static double time_to_skip_iterations(struct nw_state* s, struct codes_workload_op * mpi_op) { + struct AvgSurrogateSwitchingTimesForApp * switch_config = get_switch_config(s); + assert(switch_config != NULL); + return switch_config->time_per_iter * iters_skipped(switch_config); +} + /* Debugging functions, may generate unused function warning */ /*static void print_waiting_reqs(uint32_t * reqs, int count) { @@ -2994,8 +3041,8 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l m->rc.saved_marker_time = tw_now(lp); // If we have reached the surrogate switch time, skip next iteration(s) - if (have_we_hit_surrogate_switch(mpi_op)) { - tw_event *e = tw_event_new(lp->gid, 2076575.16 * 91, lp); + if (have_we_hit_surrogate_switch(s, mpi_op)) { + tw_event *e = tw_event_new(lp->gid, time_to_skip_iterations(s, mpi_op), lp); nw_message* msg = (nw_message*) tw_event_data(e); msg->msg_type = SURR_SKIP_ITERATION; tw_event_send(e); From 42f7cd57ea11726f8396693c97d81413e593c7fb Mon Sep 17 00:00:00 2001 From: helq Date: Thu, 18 Apr 2024 09:53:54 -0400 Subject: [PATCH 003/110] Improving figure generation script --- .gitignore | 5 +- scripts/workload-iteration-times/README.txt | 6 ++ .../print-iterations.py | 78 +++++++++++++++---- src/network-workloads/model-net-mpi-replay.c | 8 +- 4 files changed, 79 insertions(+), 18 deletions(-) create mode 100644 scripts/workload-iteration-times/README.txt diff --git a/.gitignore b/.gitignore index 70223fe9..b023d553 100644 --- a/.gitignore +++ b/.gitignore @@ -39,5 +39,8 @@ ross.csv install-mastiff/include/codes/model-net-method.h -# commonly used building folder +# commonly used building stuff /build*/ +/build* +.cache +compile_commands.json diff --git a/scripts/workload-iteration-times/README.txt b/scripts/workload-iteration-times/README.txt new file mode 100644 index 00000000..bc2d5a16 --- /dev/null +++ b/scripts/workload-iteration-times/README.txt @@ -0,0 +1,6 @@ +To reproduce figures: + +```bash +python print-iterations.py /home/helq/Research/HPC/code/kronos/2024-feb-22/experiments/union/milc-jacobi/results/exp-007/iteration-logs --output figures/milc-jacobi-hf --legends Jacobi MILC +python print-iterations.py /home/helq/Research/HPC/code/kronos/2024-feb-22/experiments/union/milc-jacobi/results/exp-003/iteration-logs --output figures/milc-jacobi-surrogate --legends Jacobi MILC +``` diff --git a/scripts/workload-iteration-times/print-iterations.py b/scripts/workload-iteration-times/print-iterations.py index a5e88010..b60bc5ae 100644 --- a/scripts/workload-iteration-times/print-iterations.py +++ b/scripts/workload-iteration-times/print-iterations.py @@ -3,19 +3,59 @@ from typing import Any, TextIO import argparse import pathlib +import colorsys import matplotlib.pyplot as plt import matplotlib +from matplotlib.patches import Rectangle +from matplotlib.lines import Line2D import numpy as np - - -def plot_sequence(ax: Any, seq: Any, names: Any, height: Any, color: str = 'red', print_names: bool = True): - ax.vlines(seq, 0, height, color=f"tab:{color}") # The vertical stems. - ax.plot(seq, np.zeros_like(seq), "-o", color="k", markerfacecolor="w") +import matplotlib.colors as mc + + +def adjust_lightness(color: str | tuple[float, float, float], amount: float = 0.5): + """ + Taken from: https://stackoverflow.com/a/49601444 + Smaller than 1 amounts darkness, larger than 1 lightens + Examples: + >> adjust_lightness('g', 1.3) + >> adjust_lightness('#F034A3', 0.6) + >> adjust_lightness((.3,.55,.1), 1.5) + """ + try: + c = mc.cnames[color] # type: ignore[reportArgumentType] + except: + c = color + c = colorsys.rgb_to_hls(*mc.to_rgb(c)) + return colorsys.hls_to_rgb(c[0], max(0, min(1, amount * c[1])), c[2]) + + +def plot_sequence( + ax: Any, + seq: Any, + names: Any, + height: Any, + color: str = 'red', + print_names: bool = True +): + box = Rectangle((0, 0), seq[0], height[0], color=adjust_lightness(color, 1.7)) + ax.add_patch(box) + for start, end, heit in zip(seq, height[1:], height[1:]): + box = Rectangle((start, 0), end, heit, color=adjust_lightness(color, 1.7)) + ax.add_patch(box) + + ax.vlines(seq, 0, height, color=adjust_lightness(color, 1.3)) + + non_zero_height = height != 0 + cleaned_seq = seq[non_zero_height] + cleaned_height = height[non_zero_height] + ax.scatter(cleaned_seq, cleaned_height, marker='.', color=color) + # ax.plot(seq, np.zeros_like(seq), "-o", color="k", markerfacecolor="w") # annotate lines if print_names: - for d, h, r in zip(seq, height, names): + cleaned_names = names[non_zero_height] + for d, h, r in zip(cleaned_seq, cleaned_height, cleaned_names): ax.annotate(r, xy=(d, h), xytext=(3, np.sign(h)*3), textcoords="offset points", horizontalalignment="right", @@ -96,7 +136,8 @@ def avg(it: np.int64) -> np.float64: parser = argparse.ArgumentParser() _ = parser.add_argument('file', type=argparse.FileType('r')) _ = parser.add_argument('--output', type=pathlib.Path, help='Name of output figure', default=None) - _ = parser.add_argument('--no-iter-count', dest='iter_count', action='store_false') + _ = parser.add_argument('--iter-count', dest='iter_count', action='store_true') + _ = parser.add_argument('--legends', nargs='+', help='Application names', required=False) args = parser.parse_args() if args.output: @@ -112,17 +153,18 @@ def avg(it: np.int64) -> np.float64: parsed_logs = parse_iteration_log(args.file) # Creating plot with data - fig, ax = plt.subplots(figsize=(8.8, 4), layout="constrained") + fig, ax = plt.subplots(figsize=(6, 3), layout="constrained") ax.set_xlabel("Total virtual time (ns)") - ax.set_ylabel("Virtual time per iteration (ns)") + ax.set_ylabel("Virtual time \nper iteration (ns)") #ax.set(title="") - smallest_timestamp = list(parsed_logs.values())[0]['time'][0] - ax.plot([0, smallest_timestamp], [0, 0], "-", color="k", markerfacecolor="w") + largest_timestamp = max(v['time'].max() for v in parsed_logs.values()) + ax.plot([0, largest_timestamp], [0, 0], "-", color="k", markerfacecolor="w") - color_table = ['red', 'blue', 'green', 'black'] + color_table = ['tab:red', 'tab:blue', 'tab:green', 'tab:black'] for i, job in enumerate(parsed_logs.keys()): # Flipping second sequence if there are only two jobs - mul = -1 if len(parsed_logs) == 2 and i == 1 else 1 + # mul = -1 if len(parsed_logs) == 2 and i == 1 else 1 + mul = 1 plot_sequence( ax, parsed_logs[job]['time'], @@ -132,6 +174,16 @@ def avg(it: np.int64) -> np.float64: print_names=args.iter_count) plt.setp(ax.get_xticklabels(), rotation=30, ha="right") + + if args.legends: + custom_lines = [] + legends = [] + for legend, color in zip(args.legends, color_table): + # Finding legend for application with ID i + legend: str + legends.append(legend) + custom_lines.append(Line2D([0], [0], color=color)) + ax.legend(custom_lines, legends) #ax.margins(y=0.1) if args.output: diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index b58f572c..7f9f554b 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -1131,10 +1131,10 @@ static int iters_skipped(struct AvgSurrogateSwitchingTimesForApp * avgSur) { } static struct AvgSurrogateSwitchingTimesForApp skip_iter_config[] = { - // done, app_id, skip_at_iter, resume_at_iter, time_per_iter - {0, 3, 21, 14403235, {false}}, - {1, 7, 59, 4982017, {false}}, - {1, 79, 195, 3581337, {false}}, + // app_id, skip_at_iter, resume_at_iter, time_per_iter, done + //{0, 3, 21, 14403235, {false}}, + //{1, 7, 59, 4982017, {false}}, + //{1, 79, 195, 3581337, {false}}, }; struct AvgSurrogateSwitchingTimesForApp * get_switch_config(struct nw_state * s) { From 44d5f69acad51f74a2ec8406068f7b62fb9fad10 Mon Sep 17 00:00:00 2001 From: helq Date: Mon, 29 Apr 2024 12:42:22 -0400 Subject: [PATCH 004/110] Fix: replacing O(n) table lookup for O(1) When calling the function `jobmap_list_to_local`, this would go through the entire list of IDs until it finds a matching ID. This is O(n) in the average and worst cases. For small networks, this won't take much time, so it never flared up as an issue. When running larger network simulations, at 8K nodes, there was a significant slowdown. This function was found, after extensive profiling, to be the principal culprit. The fix is simple, make a table where looking for an ID is O(1). A simple array does the trick. After running some experiments, there's a significant speedup of 30% for a network of 8448 with a job using all nodes. The job was uniform random and the simulation was run for 10ms (virtual time). --- src/util/jobmap-impl/jobmap-list.c | 56 +++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 13 deletions(-) diff --git a/src/util/jobmap-impl/jobmap-list.c b/src/util/jobmap-impl/jobmap-list.c index 7876727f..5ab3abf4 100644 --- a/src/util/jobmap-impl/jobmap-list.c +++ b/src/util/jobmap-impl/jobmap-list.c @@ -31,6 +31,12 @@ struct jobmap_list { int num_jobs; int *rank_counts; int **global_ids; + + // This is a look up table containing the same info as above, but with O(1) access. + // It is used by `jobmap_list_to_local`. This solves a scalibility bug that appears + // when all jobs combined have many nodes (> 8K nodes) + int highest_global_id; + struct codes_jobmap_id * id_to_jobmap; }; #define COND_REALLOC(_len_expr, _cap_var, _buf_var) \ @@ -148,6 +154,37 @@ static int jobmap_list_configure(void const * params, void ** ctx) } } while (!feof(f)); + // === Building id_to_jobmap lookup table === + // There's some room for improvement (we can probably loop fewer times and fuze some + // loops together), but they are relatively inexpensive when done once at the start + // of the simulation, so this acceptable + + // Finding highest global id. Although we should be able to get this from the network + // configuration file, we look it up in here to keep different parts of CODES separated/modularized + lst->highest_global_id = -1; + for(int i=0; inum_jobs; i++) { + for(int j=0; j < lst->rank_counts[i]; j++) { + if(lst->highest_global_id < lst->global_ids[i][j]) { + lst->highest_global_id = lst->global_ids[i][j]; + } + } + } + lst->id_to_jobmap = calloc(lst->highest_global_id + 1, sizeof(*lst->id_to_jobmap)); + for (int i=0; i<=lst->highest_global_id; i++) { + lst->id_to_jobmap[i].job = -1; + lst->id_to_jobmap[i].rank = -1; + } + // Finally, filling up the table + for(int i=0; inum_jobs; i++) { + for(int j=0; j < lst->rank_counts[i]; j++) { + int const id = lst->global_ids[i][j]; + lst->id_to_jobmap[id].job = i; + lst->id_to_jobmap[id].rank = j; + } + } + // === === + + // returning if everything went alright if (rc == 0) { fclose(f); free(line_buf); @@ -160,6 +197,7 @@ static int jobmap_list_configure(void const * params, void ** ctx) } free(lst->global_ids); free(lst->rank_counts); + free(lst->id_to_jobmap); free(lst); *ctx = NULL; return -1; @@ -168,23 +206,14 @@ static int jobmap_list_configure(void const * params, void ** ctx) static struct codes_jobmap_id jobmap_list_to_local(int id, void const * ctx) { - struct codes_jobmap_id rtn; - rtn.job = -1; - rtn.rank = -1; - struct jobmap_list const *lst = (struct jobmap_list const *)ctx; - for(int i=0; inum_jobs; i++) { - for(int j=0; j < lst->rank_counts[i]; j++) { - if(id == lst->global_ids[i][j]) { - rtn.job = i; - rtn.rank = j; - return rtn; - } - } + // invalid id from what we got in the config + if (id < 0 || lst->highest_global_id < id) { + return (struct codes_jobmap_id) { .job = -1, .rank = -1 }; } - return rtn; + return lst->id_to_jobmap[id]; } static int jobmap_list_to_global(struct codes_jobmap_id id, void const * ctx) @@ -221,6 +250,7 @@ static void jobmap_list_destroy(void * ctx) free(lst->global_ids); free(lst->rank_counts); + free(lst->id_to_jobmap); free(ctx); } From c589d49a7996f03765cf7baabaadbbcd59b4e443 Mon Sep 17 00:00:00 2001 From: helq Date: Wed, 8 May 2024 18:48:26 -0400 Subject: [PATCH 005/110] Injecting iteration time as an argument --- .../workload-iteration-times/print-iterations.py | 7 +++++++ src/network-workloads/model-net-mpi-replay.c | 15 ++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/scripts/workload-iteration-times/print-iterations.py b/scripts/workload-iteration-times/print-iterations.py index b60bc5ae..efb637e5 100644 --- a/scripts/workload-iteration-times/print-iterations.py +++ b/scripts/workload-iteration-times/print-iterations.py @@ -138,6 +138,7 @@ def avg(it: np.int64) -> np.float64: _ = parser.add_argument('--output', type=pathlib.Path, help='Name of output figure', default=None) _ = parser.add_argument('--iter-count', dest='iter_count', action='store_true') _ = parser.add_argument('--legends', nargs='+', help='Application names', required=False) + _ = parser.add_argument('--no-show-plot', dest='show_plot', action='store_false') args = parser.parse_args() if args.output: @@ -152,6 +153,12 @@ def avg(it: np.int64) -> np.float64: parsed_logs = parse_iteration_log(args.file) + final_timestamp = float(max(job['time'].max() for job in parsed_logs.values())) + print("Simulation end =", final_timestamp) + + if not args.show_plot: + exit(0) + # Creating plot with data fig, ax = plt.subplots(figsize=(6, 3), layout="constrained") ax.set_xlabel("Total virtual time (ns)") diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 7f9f554b..6a063d6e 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -158,6 +158,10 @@ static double sampling_interval = 5000000; static double sampling_end_time = 3000000000; static int enable_debug = 0; +// More hardcoded values for surrogate switch +static int start_iter_skip_app = -1; +static double avg_time_app = -1.0; + /* set group context */ struct codes_mctx mapping_context; enum MAPPING_CONTEXTS @@ -1132,9 +1136,9 @@ static int iters_skipped(struct AvgSurrogateSwitchingTimesForApp * avgSur) { static struct AvgSurrogateSwitchingTimesForApp skip_iter_config[] = { // app_id, skip_at_iter, resume_at_iter, time_per_iter, done - //{0, 3, 21, 14403235, {false}}, - //{1, 7, 59, 4982017, {false}}, - //{1, 79, 195, 3581337, {false}}, + {0, 3, 21, 14403235, {false}}, + {1, 7, 59, 4982017, {false}}, + {1, 79, 195, 3581337, {false}}, }; struct AvgSurrogateSwitchingTimesForApp * get_switch_config(struct nw_state * s) { @@ -3327,6 +3331,8 @@ const tw_optdef app_opt [] = TWOPT_CHAR("lp-io-dir", lp_io_dir, "Where to place io output (unspecified -> no output"), TWOPT_UINT("lp-io-use-suffix", lp_io_use_suffix, "Whether to append uniq suffix to lp-io directory (default 0)"), TWOPT_CHAR("offset_file", offset_file, "offset file name"), + TWOPT_UINT("start-iter-skip-app", start_iter_skip_app, "Hardcoded value to indicate when to switch to surrogate for app 1"), + TWOPT_STIME("avg-time-app", avg_time_app, "Hardcoded value for Avg. iteration time for app 1"), #ifdef ENABLE_CORTEX_PYTHON TWOPT_CHAR("cortex-file", cortex_file, "Python file (without .py) containing the CoRtEx translation class"), TWOPT_CHAR("cortex-class", cortex_class, "Python class implementing the CoRtEx translator"), @@ -3470,6 +3476,9 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) tw_opt_add(app_opt); tw_opt_add(cc_app_opt); tw_init(argc, argv); + skip_iter_config[2].skip_at_iter = start_iter_skip_app; + skip_iter_config[2].time_per_iter = avg_time_app; + #ifdef USE_RDAMARIS if(g_st_ross_rank) { // keep damaris ranks from running code between here up until tw_end() From 9f605f09421a8c74da2e308aad9c2fb4f9d71a10 Mon Sep 17 00:00:00 2001 From: helq Date: Sun, 7 Jul 2024 19:26:37 -0400 Subject: [PATCH 006/110] Fixing compilation warning `incompatible-pointer-types` This bug was introduced when building the network surrogate. To build the surrogate, we need to track the input queue "size" (the input message queue to the routers from the workloads). If the network surrogate wouldn't live down in specific network models (it has been implemented right now only on dragonfly-dally), it should actually reside within the model-net layer, and thus, individual models shouldn't need to track the state of the input queue. Hopefully, we can move the network surrogate from dragonfly-dally into model-net. --- src/network-workloads/model-net-mpi-replay.c | 1 + src/networks/model-net/dragonfly.c | 3 ++- src/networks/model-net/fattree.c | 3 ++- src/networks/model-net/loggp.c | 6 ++++-- src/networks/model-net/simplenet-upd.c | 7 +++++-- src/networks/model-net/simplep2p.c | 6 ++++-- src/networks/model-net/slimfly.c | 3 ++- src/networks/model-net/torus.c | 3 ++- 8 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 1433b2a3..26d31694 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -18,6 +18,7 @@ #include "codes/quickhash.h" #include "codes/codes-jobmap.h" #include "codes/congestion-controller-core.h" +#include "codes/surrogate/init.h" /* turning on track lp will generate a lot of output messages */ #define DBG_COMM 1 diff --git a/src/networks/model-net/dragonfly.c b/src/networks/model-net/dragonfly.c index eb5e81bb..faee79d8 100644 --- a/src/networks/model-net/dragonfly.c +++ b/src/networks/model-net/dragonfly.c @@ -1087,7 +1087,8 @@ static tw_stime dragonfly_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { (void)message_offset; (void)sched_params; diff --git a/src/networks/model-net/fattree.c b/src/networks/model-net/fattree.c index eb1c49b5..e7db6c61 100644 --- a/src/networks/model-net/fattree.c +++ b/src/networks/model-net/fattree.c @@ -1527,7 +1527,8 @@ static tw_stime fattree_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { #if DEBUG_RC packet_event_f++; diff --git a/src/networks/model-net/loggp.c b/src/networks/model-net/loggp.c index 22904287..def3eb22 100644 --- a/src/networks/model-net/loggp.c +++ b/src/networks/model-net/loggp.c @@ -119,7 +119,8 @@ static tw_stime loggp_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt); + int is_last_pckt, + bool is_there_another_pckt_in_queue); static void loggp_packet_event_rc(tw_lp *sender); tw_stime loggp_recv_msg_event( @@ -611,7 +612,8 @@ static tw_stime loggp_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { (void)message_offset; tw_event * e_new; diff --git a/src/networks/model-net/simplenet-upd.c b/src/networks/model-net/simplenet-upd.c index 5b5edc78..5955f228 100644 --- a/src/networks/model-net/simplenet-upd.c +++ b/src/networks/model-net/simplenet-upd.c @@ -100,7 +100,8 @@ static tw_stime simplenet_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt); + int is_last_pckt, + bool is_there_another_pckt_in_queue); static void simplenet_packet_event_rc(tw_lp *sender); @@ -523,7 +524,8 @@ static tw_stime simplenet_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { (void)message_offset; // unused... (void)sched_params; // unused... @@ -549,6 +551,7 @@ static tw_stime simplenet_packet_event( msg->event_type = SN_MSG_START; msg->is_pull = req->is_pull; msg->pull_size = req->pull_size; + //msg->is_there_another_pckt_in_queue = is_there_another_pckt_in_queue; /*Fill in simplenet information*/ if(is_last_pckt) /* Its the last packet so pass in remote event information*/ diff --git a/src/networks/model-net/simplep2p.c b/src/networks/model-net/simplep2p.c index e7609870..6eb9ac0d 100644 --- a/src/networks/model-net/simplep2p.c +++ b/src/networks/model-net/simplep2p.c @@ -132,7 +132,8 @@ static tw_stime simplep2p_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt); + int is_last_pckt, + bool is_there_another_pckt_in_queue); static void simplep2p_packet_event_rc(tw_lp *sender); @@ -807,7 +808,8 @@ static tw_stime simplep2p_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { (void)message_offset; (void)sched_params; diff --git a/src/networks/model-net/slimfly.c b/src/networks/model-net/slimfly.c index da122ec6..94188942 100644 --- a/src/networks/model-net/slimfly.c +++ b/src/networks/model-net/slimfly.c @@ -1158,7 +1158,8 @@ static tw_stime slimfly_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { // printf("slim packet event\n"); diff --git a/src/networks/model-net/torus.c b/src/networks/model-net/torus.c index 7db338e6..6ae6c7e6 100644 --- a/src/networks/model-net/torus.c +++ b/src/networks/model-net/torus.c @@ -498,7 +498,8 @@ static tw_stime torus_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { (void)message_offset; // not using atm... (void)sched_params; // not using atm... From 1df7bb7ce933119f5b54b8a9aa43a6ce6d6acc7d Mon Sep 17 00:00:00 2001 From: helq Date: Mon, 8 Jul 2024 22:17:38 -0400 Subject: [PATCH 007/110] Updating code after ROSS change on gvt hook --- codes/surrogate/switch.h | 2 +- src/surrogate/init.c | 12 ++--- src/surrogate/switch.c | 97 ++++++++++++++++------------------------ 3 files changed, 42 insertions(+), 69 deletions(-) diff --git a/codes/surrogate/switch.h b/codes/surrogate/switch.h index 3a56360a..c538e769 100644 --- a/codes/surrogate/switch.h +++ b/codes/surrogate/switch.h @@ -61,7 +61,7 @@ extern struct switch_at_struct switch_at; // Switch -void director_call(tw_pe * pe, tw_event_sig gvt_sig); +void director_call(tw_pe * pe); #ifdef __cplusplus } diff --git a/src/surrogate/init.c b/src/surrogate/init.c index 16772fc2..4ed587c6 100644 --- a/src/surrogate/init.c +++ b/src/surrogate/init.c @@ -64,15 +64,9 @@ void surrogate_configure( PRINTF_ONCE("\n"); // Injecting into ROSS the function to be called at GVT and the instant in time to trigger GVT - g_tw_gvt_arbitrary_fun = director_call; - -#ifdef USE_RAND_TIEBREAKER - tw_event_sig time_stamp = {0}; - time_stamp.recv_ts = switch_at.time_stampts[0]; - tw_trigger_arbitrary_fun_at(time_stamp); -#else - tw_trigger_arbitrary_fun_at(switch_at.time_stampts[0]); -#endif + g_tw_gvt_hook = director_call; + + tw_trigger_gvt_hook_at(switch_at.time_stampts[0]); // freeing timestamps before it dissapears for (size_t i = 0; i < len; i++) { diff --git a/src/surrogate/switch.c b/src/surrogate/switch.c index 32086f46..f88df3ed 100644 --- a/src/surrogate/switch.c +++ b/src/surrogate/switch.c @@ -74,8 +74,9 @@ static inline bool does_any_pe(bool val) { //} +static void rollback_and_cancel_events_pe(tw_pe * pe) { #ifdef USE_RAND_TIEBREAKER -static void rollback_and_cancel_events_pe(tw_pe * pe, tw_event_sig gvt_sig) { + tw_event_sig const gvt_sig = pe->GVT_sig; tw_stime const gvt = gvt_sig.recv_ts; // Backtracking the simulation to GVT for (unsigned int i = 0; i < g_tw_nkp; i++) { @@ -84,7 +85,7 @@ static void rollback_and_cancel_events_pe(tw_pe * pe, tw_event_sig gvt_sig) { assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0); assert(pe->GVT_sig.recv_ts == gvt); // redundant but needed because compiler cries that gvt is never used #else -static void rollback_and_cancel_events_pe(tw_pe * pe, tw_stime gvt) { + tw_stime const gvt = pe->GVT; // Backtracking the simulation to GVT for (unsigned int i = 0; i < g_tw_nkp; i++) { tw_kp_rollback_to(g_tw_kp[i], gvt); @@ -115,11 +116,12 @@ static void rollback_and_cancel_events_pe(tw_pe * pe, tw_stime gvt) { } } +static void shift_events_to_future_pe(tw_pe * pe) { #ifdef USE_RAND_TIEBREAKER -static void shift_events_to_future_pe(tw_pe * pe, tw_event_sig gvt_sig) { - tw_stime gvt = gvt_sig.recv_ts; // pe->GVT_sig.recv_ts; + tw_event_sig gvt_sig = pe->GVT_sig; + tw_stime gvt = gvt_sig.recv_ts; #else -static void shift_events_to_future_pe(tw_pe * pe, tw_stime gvt) { + tw_stime gvt = pe->GVT; #endif tw_event * next_event = tw_pq_dequeue(pe->pq); @@ -165,11 +167,11 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_stime gvt) { next_event->recv_ts += switch_offset; next_event->sig.recv_ts = next_event->recv_ts; } - assert(next_event->recv_ts >= g_tw_trigger_arbitrary_fun.sig_at.recv_ts); + assert(next_event->recv_ts >= g_tw_trigger_gvt_hook.sig_at.recv_ts); #else next_event->recv_ts += switch_offset; } - assert(next_event->recv_ts >= g_tw_trigger_arbitrary_fun.at); + assert(next_event->recv_ts >= g_tw_trigger_gvt_hook.at); #endif // store event in deque_events to inject immediately back to the queue @@ -272,17 +274,18 @@ static tw_event *** order_events_per_lps(tw_pe * pe) { // - Looking at all events in the PE, "freezing" those in the network model // and letting the workload events be processed further // - Going through every LP and calling their respective functions +static void events_high_def_to_surrogate_switch(tw_pe * pe) { #ifdef USE_RAND_TIEBREAKER -static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_event_sig gvt) { + tw_event_sig gvt_sig = pe->GVT_sig; #else -static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_stime gvt) { + tw_stime gvt = pe->GVT; #endif if (g_tw_synchronization_protocol != OPTIMISTIC && g_tw_synchronization_protocol != SEQUENTIAL) { tw_error(TW_LOC, "Sorry, sending packets to the future hasn't been implement in this mode"); } tw_event *** lps_events = order_events_per_lps(pe); - shift_events_to_future_pe(pe, gvt); + shift_events_to_future_pe(pe); // Going through all LPs in PE and running their specific functions for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) { @@ -293,7 +296,7 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_stime gvt) { // coincide with current GVT (the current GVT often does not // correspond to the (last) time stored in KPs). #ifdef USE_RAND_TIEBREAKER - lp->kp->last_sig = gvt; + lp->kp->last_sig = gvt_sig; #else lp->kp->last_time = gvt; #endif @@ -321,7 +324,7 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_stime gvt) { // This will force a global update on all the new remote events (instead of waiting until the next GVT cycle to update events to process) if (g_tw_synchronization_protocol == OPTIMISTIC) { - rollback_and_cancel_events_pe(pe, gvt); + rollback_and_cancel_events_pe(pe); } assert(lps_events[0] != NULL); @@ -330,12 +333,12 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_stime gvt) { } +static void events_surrogate_to_high_def_switch(tw_pe * pe) { #ifdef USE_RAND_TIEBREAKER -static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_event_sig gvt) { + tw_event_sig gvt_sig = pe->GVT_sig; #else -static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_stime gvt) { + tw_stime gvt = pe->GVT; #endif - (void) pe; // Going through all LPs in PE and running their specific functions for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) { @@ -347,7 +350,7 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_stime gvt) { // correspond to the (last) time stored in KPs). #ifdef USE_RAND_TIEBREAKER tw_event_sig const previous_sig = lp->kp->last_sig; - lp->kp->last_sig = gvt; + lp->kp->last_sig = gvt_sig; #else tw_stime const previous_time = lp->kp->last_time; lp->kp->last_time = gvt; @@ -385,12 +388,12 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_stime gvt) { // This is an impure function, calling it twice WILL give different results. Only call it once! bool hit_trigger(tw_stime gvt) { if ( switch_at.current_i < switch_at.total - && g_tw_trigger_arbitrary_fun.active == ARBITRARY_FUN_triggered) { + && g_tw_trigger_gvt_hook.active == GVT_HOOK_triggered) { double const switch_time = switch_at.time_stampts[switch_at.current_i]; #ifdef USE_RAND_TIEBREAKER - assert(g_tw_trigger_arbitrary_fun.sig_at.recv_ts == switch_at.time_stampts[switch_at.current_i]); + assert(g_tw_trigger_gvt_hook.sig_at.recv_ts == switch_at.time_stampts[switch_at.current_i]); #else - assert(g_tw_trigger_arbitrary_fun.at == switch_at.time_stampts[switch_at.current_i]); + assert(g_tw_trigger_gvt_hook.at == switch_at.time_stampts[switch_at.current_i]); #endif assert(gvt >= switch_time); // current gvt shouldn't be that far ahead from the point we wanted to trigger it @@ -398,15 +401,8 @@ bool hit_trigger(tw_stime gvt) { if (++switch_at.current_i < switch_at.total) { double const next_switch = switch_at.time_stampts[switch_at.current_i]; // Setting trigger for next switch - #ifdef USE_RAND_TIEBREAKER - tw_event_sig time_stamp = {0}; - time_stamp.recv_ts = next_switch; //printf("Adding a trigger to activate next switch!\n"); - tw_trigger_arbitrary_fun_at(time_stamp); - #else - //printf("Adding a trigger to activate next switch!\n"); - tw_trigger_arbitrary_fun_at(next_switch); - #endif + tw_trigger_gvt_hook_at(next_switch); } // return true; @@ -416,25 +412,15 @@ bool hit_trigger(tw_stime gvt) { } -#ifdef USE_RAND_TIEBREAKER -void switch_model(tw_pe * pe, tw_event_sig gvt_sig) { -#else -void switch_model(tw_pe * pe, tw_stime gvt) { -#endif +void switch_model(tw_pe * pe) { // Rollback if in optimistic mode #ifdef USE_RAND_TIEBREAKER if (g_tw_synchronization_protocol == OPTIMISTIC) { - assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0); - rollback_and_cancel_events_pe(pe, gvt_sig); - //assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) <= 0); - assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0); + rollback_and_cancel_events_pe(pe); } #else if (g_tw_synchronization_protocol == OPTIMISTIC) { - assert(pe->GVT == gvt); - rollback_and_cancel_events_pe(pe, gvt); - //assert(tw_event_sig_compare(pe->GVT_sig, gvt) <= 0); - assert(pe->GVT == gvt); + rollback_and_cancel_events_pe(pe); } #endif surr_config.director.switch_surrogate(); @@ -446,30 +432,23 @@ void switch_model(tw_pe * pe, tw_stime gvt) { if (freeze_network_on_switch) { if (surr_config.director.is_surrogate_on()) { model_net_method_switch_to_surrogate(); -#ifdef USE_RAND_TIEBREAKER - events_high_def_to_surrogate_switch(pe, gvt_sig); -#else - events_high_def_to_surrogate_switch(pe, gvt); -#endif + events_high_def_to_surrogate_switch(pe); } else { model_net_method_switch_to_highdef(); -#ifdef USE_RAND_TIEBREAKER - events_surrogate_to_high_def_switch(pe, gvt_sig); -#else - events_surrogate_to_high_def_switch(pe, gvt); -#endif + events_surrogate_to_high_def_switch(pe); } } } +void director_call(tw_pe * pe) { + assert(is_surrogate_configured); + #ifdef USE_RAND_TIEBREAKER -void director_call(tw_pe * pe, tw_event_sig gvt_sig) { - tw_stime const gvt = gvt_sig.recv_ts; + tw_stime gvt = pe->GVT_sig.recv_ts; #else -void director_call(tw_pe * pe, tw_stime gvt) { + tw_stime gvt = pe->GVT; #endif - assert(is_surrogate_configured); static int i = 0; if (g_tw_mynode == 0) { @@ -481,14 +460,14 @@ void director_call(tw_pe * pe, tw_stime gvt) { printf("GVT %d at %f in %s arbitrary-fun-status=", i++, gvt, surr_config.director.is_surrogate_on() ? "surrogate-mode" : "high-definition"); - switch (g_tw_trigger_arbitrary_fun.active) { - case ARBITRARY_FUN_enabled: + switch (g_tw_trigger_gvt_hook.active) { + case GVT_HOOK_enabled: printf("enabled\n"); break; - case ARBITRARY_FUN_disabled: + case GVT_HOOK_disabled: printf("disabled\n"); break; - case ARBITRARY_FUN_triggered: + case GVT_HOOK_triggered: printf("triggered\n"); break; } @@ -527,7 +506,7 @@ void director_call(tw_pe * pe, tw_stime gvt) { } double const start = tw_clock_read(); - switch_model(pe, gvt_sig); + switch_model(pe); double const end = tw_clock_read(); surrogate_switching_time += end - start; From 6af7eb115ab996ead941a231a759f2a09684c71b Mon Sep 17 00:00:00 2001 From: helq Date: Sun, 7 Jul 2024 19:26:37 -0400 Subject: [PATCH 008/110] Fixing compilation warning `incompatible-pointer-types` This bug was introduced when building the network surrogate. To build the surrogate, we need to track the input queue "size" (the input message queue to the routers from the workloads). If the network surrogate wouldn't live down in specific network models (it has been implemented right now only on dragonfly-dally), it should actually reside within the model-net layer, and thus, individual models shouldn't need to track the state of the input queue. Hopefully, we can move the network surrogate from dragonfly-dally into model-net. --- src/network-workloads/model-net-mpi-replay.c | 1 + src/networks/model-net/dragonfly.c | 3 ++- src/networks/model-net/fattree.c | 3 ++- src/networks/model-net/loggp.c | 6 ++++-- src/networks/model-net/simplenet-upd.c | 7 +++++-- src/networks/model-net/simplep2p.c | 6 ++++-- src/networks/model-net/slimfly.c | 3 ++- src/networks/model-net/torus.c | 3 ++- 8 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 6a063d6e..bf6860b9 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -18,6 +18,7 @@ #include "codes/quickhash.h" #include "codes/codes-jobmap.h" #include "codes/congestion-controller-core.h" +#include "codes/surrogate/init.h" /* turning on track lp will generate a lot of output messages */ #define DBG_COMM 1 diff --git a/src/networks/model-net/dragonfly.c b/src/networks/model-net/dragonfly.c index eb5e81bb..faee79d8 100644 --- a/src/networks/model-net/dragonfly.c +++ b/src/networks/model-net/dragonfly.c @@ -1087,7 +1087,8 @@ static tw_stime dragonfly_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { (void)message_offset; (void)sched_params; diff --git a/src/networks/model-net/fattree.c b/src/networks/model-net/fattree.c index eb1c49b5..e7db6c61 100644 --- a/src/networks/model-net/fattree.c +++ b/src/networks/model-net/fattree.c @@ -1527,7 +1527,8 @@ static tw_stime fattree_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { #if DEBUG_RC packet_event_f++; diff --git a/src/networks/model-net/loggp.c b/src/networks/model-net/loggp.c index 22904287..def3eb22 100644 --- a/src/networks/model-net/loggp.c +++ b/src/networks/model-net/loggp.c @@ -119,7 +119,8 @@ static tw_stime loggp_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt); + int is_last_pckt, + bool is_there_another_pckt_in_queue); static void loggp_packet_event_rc(tw_lp *sender); tw_stime loggp_recv_msg_event( @@ -611,7 +612,8 @@ static tw_stime loggp_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { (void)message_offset; tw_event * e_new; diff --git a/src/networks/model-net/simplenet-upd.c b/src/networks/model-net/simplenet-upd.c index 5b5edc78..5955f228 100644 --- a/src/networks/model-net/simplenet-upd.c +++ b/src/networks/model-net/simplenet-upd.c @@ -100,7 +100,8 @@ static tw_stime simplenet_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt); + int is_last_pckt, + bool is_there_another_pckt_in_queue); static void simplenet_packet_event_rc(tw_lp *sender); @@ -523,7 +524,8 @@ static tw_stime simplenet_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { (void)message_offset; // unused... (void)sched_params; // unused... @@ -549,6 +551,7 @@ static tw_stime simplenet_packet_event( msg->event_type = SN_MSG_START; msg->is_pull = req->is_pull; msg->pull_size = req->pull_size; + //msg->is_there_another_pckt_in_queue = is_there_another_pckt_in_queue; /*Fill in simplenet information*/ if(is_last_pckt) /* Its the last packet so pass in remote event information*/ diff --git a/src/networks/model-net/simplep2p.c b/src/networks/model-net/simplep2p.c index e7609870..6eb9ac0d 100644 --- a/src/networks/model-net/simplep2p.c +++ b/src/networks/model-net/simplep2p.c @@ -132,7 +132,8 @@ static tw_stime simplep2p_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt); + int is_last_pckt, + bool is_there_another_pckt_in_queue); static void simplep2p_packet_event_rc(tw_lp *sender); @@ -807,7 +808,8 @@ static tw_stime simplep2p_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { (void)message_offset; (void)sched_params; diff --git a/src/networks/model-net/slimfly.c b/src/networks/model-net/slimfly.c index da122ec6..94188942 100644 --- a/src/networks/model-net/slimfly.c +++ b/src/networks/model-net/slimfly.c @@ -1158,7 +1158,8 @@ static tw_stime slimfly_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { // printf("slim packet event\n"); diff --git a/src/networks/model-net/torus.c b/src/networks/model-net/torus.c index 7db338e6..6ae6c7e6 100644 --- a/src/networks/model-net/torus.c +++ b/src/networks/model-net/torus.c @@ -498,7 +498,8 @@ static tw_stime torus_packet_event( void const * remote_event, void const * self_event, tw_lp *sender, - int is_last_pckt) + int is_last_pckt, + bool is_there_another_pckt_in_queue) { (void)message_offset; // not using atm... (void)sched_params; // not using atm... From 472cc5ae7f99012661f62333c195bd55d0951b2a Mon Sep 17 00:00:00 2001 From: helq Date: Thu, 23 Jan 2025 15:01:46 -0500 Subject: [PATCH 009/110] Removing hardcoded test and we can pass a config file now The configuration file should be of the form: > %d %d %d %f where each value corresponds to > job_id skip_at_iter resume_at_iter time_per_iter The configuration file is passed through the --skipping-iterations-file parameter. --- src/network-workloads/model-net-mpi-replay.c | 158 ++++++++++++++++--- 1 file changed, 137 insertions(+), 21 deletions(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index bf6860b9..ab5d7e62 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -5,6 +5,7 @@ */ #include #include +#include #include #include #include "codes/codes-workload.h" @@ -101,6 +102,7 @@ int period_count[MAX_JOBS]; long period_time[MAX_JOBS][64]; float period_interval[MAX_JOBS][64]; char file_name_of_job[MAX_JOBS][8192]; +char skipping_iterations_file[8192]; tw_stime max_elapsed_time_per_job[MAX_JOBS] = {0}; @@ -159,9 +161,9 @@ static double sampling_interval = 5000000; static double sampling_end_time = 3000000000; static int enable_debug = 0; -// More hardcoded values for surrogate switch -static int start_iter_skip_app = -1; -static double avg_time_app = -1.0; +// We can skip multiple iterations using an average as our predicted iteration time. This will skip ahead to a future step in the simulation +static struct AvgSurrogateSwitchingTimesForApp *skip_iter_config; +static size_t skip_iter_config_size = 0; /* set group context */ struct codes_mctx mapping_context; @@ -371,6 +373,10 @@ struct nw_state char output_buf[512]; char col_stats[64]; struct ross_model_sample ross_sample; + + // Configuration to tell the node when to skip some iterations + struct AvgSurrogateSwitchingTimesForApp *switch_config; + size_t switch_config_size; }; /* data for handling reverse computation. @@ -1128,25 +1134,41 @@ struct AvgSurrogateSwitchingTimesForApp { int skip_at_iter; int resume_at_iter; double time_per_iter; - bool done[72]; // This is a flag to indicate whethe we already completed this skipping stage + bool done; // This is a flag to indicate whethe we already completed this skipping stage }; +static int comp_AvgSurrogateSwitchingTimesForApp( + struct AvgSurrogateSwitchingTimesForApp *left, + struct AvgSurrogateSwitchingTimesForApp *right +) { + if (left->app_id < right->app_id) { + return -1; + } + if (left->app_id > right->app_id) { + return 1; + } + // else: left->app_id == right->app_id + + if (left->skip_at_iter < right->skip_at_iter) { + return -1; + } + if (left->skip_at_iter > right->skip_at_iter) { + return 1; + } + + return 0; +} + static int iters_skipped(struct AvgSurrogateSwitchingTimesForApp * avgSur) { return avgSur->resume_at_iter - avgSur->skip_at_iter; } -static struct AvgSurrogateSwitchingTimesForApp skip_iter_config[] = { - // app_id, skip_at_iter, resume_at_iter, time_per_iter, done - {0, 3, 21, 14403235, {false}}, - {1, 7, 59, 4982017, {false}}, - {1, 79, 195, 3581337, {false}}, -}; - -struct AvgSurrogateSwitchingTimesForApp * get_switch_config(struct nw_state * s) { - int n_jumps = (sizeof(skip_iter_config)/sizeof(skip_iter_config[0])); - for (int i=0; i < n_jumps; i++) { - struct AvgSurrogateSwitchingTimesForApp * jump = &skip_iter_config[i]; - if (!jump->done[s->local_rank] && jump->app_id == s->app_id) { +static struct AvgSurrogateSwitchingTimesForApp * get_switch_config(struct nw_state * s) { + assert(s->switch_config != NULL); + for (int i=0; i < s->switch_config_size; i++) { + struct AvgSurrogateSwitchingTimesForApp * jump = &s->switch_config[i]; + assert(jump->app_id == s->app_id); + if (!jump->done) { return jump; } } @@ -1188,7 +1210,7 @@ static void skip_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) } } - switch_config->done[s->local_rank] = true; + switch_config->done = true; tw_event *e = tw_event_new(lp->gid, 0.0, lp); nw_message* msg = (nw_message*) tw_event_data(e); @@ -2669,6 +2691,32 @@ void nw_test_init(nw_state* s, tw_lp* lp) " num_sends num_bytes_sent sample_end_time"); } } + + if (skip_iter_config_size > 0) { + size_t size = 0; + // Finding number of times to skip for this job + for (size_t i = 0; i < skip_iter_config_size; i++) { + if (lid.job == skip_iter_config[i].app_id) { + size++; + } + } + // Constructing switch_config + s->switch_config_size = size; + if (size > 0) { + s->switch_config = malloc(size * sizeof(struct AvgSurrogateSwitchingTimesForApp)); + size_t j = 0; + for (size_t i = 0; i < skip_iter_config_size; i++) { + if (lid.job == skip_iter_config[i].app_id) { + s->switch_config[j] = skip_iter_config[i]; + j++; + } + } + } + } else { + s->switch_config = NULL; + s->switch_config_size = 0; + } + return; } @@ -2810,6 +2858,7 @@ void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp) case SURR_SKIP_ITERATION: skip_iteration(s, lp, bf, m); + break; } } @@ -3192,6 +3241,10 @@ void nw_test_finalize(nw_state* s, tw_lp* lp) // rc_stack_destroy(s->indices); rc_stack_destroy(s->processed_ops); rc_stack_destroy(s->processed_wait_op); + + if (s->switch_config != NULL) { + free(s->switch_config); + } } void nw_test_event_handler_rc(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp) @@ -3261,6 +3314,10 @@ void nw_test_event_handler_rc(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * l case CLI_OTHER_FINISH: handle_other_finish_rc(s, lp, bf, m); break; + + case SURR_SKIP_ITERATION: + skip_iteration_rc(s, lp, bf, m); + break; } } @@ -3332,13 +3389,12 @@ const tw_optdef app_opt [] = TWOPT_CHAR("lp-io-dir", lp_io_dir, "Where to place io output (unspecified -> no output"), TWOPT_UINT("lp-io-use-suffix", lp_io_use_suffix, "Whether to append uniq suffix to lp-io directory (default 0)"), TWOPT_CHAR("offset_file", offset_file, "offset file name"), - TWOPT_UINT("start-iter-skip-app", start_iter_skip_app, "Hardcoded value to indicate when to switch to surrogate for app 1"), - TWOPT_STIME("avg-time-app", avg_time_app, "Hardcoded value for Avg. iteration time for app 1"), #ifdef ENABLE_CORTEX_PYTHON TWOPT_CHAR("cortex-file", cortex_file, "Python file (without .py) containing the CoRtEx translation class"), TWOPT_CHAR("cortex-class", cortex_class, "Python class implementing the CoRtEx translator"), TWOPT_CHAR("cortex-gen", cortex_gen, "Python function to pre-generate MPI events"), #endif + TWOPT_CHAR("skipping-iterations-file", skipping_iterations_file, "Configuration file name for which steps to skip"), TWOPT_END() }; @@ -3477,8 +3533,6 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) tw_opt_add(app_opt); tw_opt_add(cc_app_opt); tw_init(argc, argv); - skip_iter_config[2].skip_at_iter = start_iter_skip_app; - skip_iter_config[2].time_per_iter = avg_time_app; #ifdef USE_RDAMARIS if(g_st_ross_rank) @@ -3629,6 +3683,64 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) } + // Loading surrogacy configuration + if(strlen(skipping_iterations_file) > 0) { + FILE *file = fopen(skipping_iterations_file, "r"); + if(!file) { + tw_error(TW_LOC, "\n Could not open file %s ", workloads_conf_file); + } + + // Finding number of skipping iteration rows + int i = 0; + for(; !feof(file); i++) { + struct AvgSurrogateSwitchingTimesForApp skip_row; + + int ref = fscanf(file, "%d %d %d %lf", &skip_row.app_id, &skip_row.skip_at_iter, &skip_row.resume_at_iter, &skip_row.time_per_iter); + + if (ref != 4) { // We couldn't read all four values + fprintf(stderr, "Warning: Couldn't read a row of 'skipping-iterations-file'. Stopping after reading %d rows.\n", i); + break; + } + } + + skip_iter_config_size = i; + + skip_iter_config = malloc(skip_iter_config_size * sizeof(struct AvgSurrogateSwitchingTimesForApp)); + + fseek(file, 0, SEEK_SET); + for(i = 0; !feof(file); i++) { + struct AvgSurrogateSwitchingTimesForApp *skip_row = &skip_iter_config[i]; + + int ref = fscanf(file, "%d %d %d %lf", &skip_row->app_id, &skip_row->skip_at_iter, &skip_row->resume_at_iter, &skip_row->time_per_iter); + + skip_row->done = false; + + if (ref != 4) { // We couldn't read all four values + break; + } + } + assert(i == skip_iter_config_size); + fclose(file); + + // Sorting. To skip iterations we asume that all skips for a specific job appear in increasing order + qsort( + skip_iter_config, + skip_iter_config_size, + sizeof(struct AvgSurrogateSwitchingTimesForApp), + (int (*)(const void *, const void *)) comp_AvgSurrogateSwitchingTimesForApp); + + // Printing configuration + if(!g_tw_mynode && skip_iter_config_size) { + printf("\n\nConfiguration for skipping selected iterations of one or more jobs has been loaded.\n"); + printf("| job_id skip_at_iter resume_at_iter time_per_iter\n"); + for (size_t i=0; iapp_id, skip_row->skip_at_iter, skip_row->resume_at_iter, skip_row->time_per_iter); + } + printf("\n"); + } + } + MPI_Comm_rank(MPI_COMM_CODES, &rank); MPI_Comm_size(MPI_COMM_CODES, &nprocs); @@ -3813,6 +3925,10 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) if(alloc_spec) codes_jobmap_destroy(jobmap_ctx); + if (skip_iter_config != NULL) { + free(skip_iter_config); + } + print_surrogate_stats(); #ifdef USE_RDAMARIS From 57fc7e3aa03dd4221d5c2393c0b34b17a49a8edf Mon Sep 17 00:00:00 2001 From: helq Date: Fri, 24 Jan 2025 07:05:29 -0500 Subject: [PATCH 010/110] Fixing a memory bug when reading from file Reading data from `skipping_iterations_file` happens at two stages, first we find how much data to load into memory, then we malloc the space and load the data. One extra row of data had been loaded, which overwrote a couple of bytes for some other structure. This ocassionally would mean a segfault (which only showed up when running the simulation in parallel). --- src/network-workloads/model-net-mpi-replay.c | 40 ++++++++++---------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index ab5d7e62..d0dea3ec 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -426,6 +426,7 @@ struct nw_message int saved_syn_length; unsigned long saved_prev_switch; double saved_prev_max_time; + struct AvgSurrogateSwitchingTimesForApp * switch_config_used; } rc; }; @@ -1176,33 +1177,33 @@ static struct AvgSurrogateSwitchingTimesForApp * get_switch_config(struct nw_sta } static void skip_iteration_rc(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) { - // TODO: implement!! + m->rc.switch_config_used->done = false; } -static void skip_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) +static void skip_to_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) { - struct codes_workload_op * mpi_op = (struct codes_workload_op*) malloc(sizeof(struct codes_workload_op)); - m->mpi_op = mpi_op; + struct codes_workload_op mpi_op; struct AvgSurrogateSwitchingTimesForApp * switch_config = get_switch_config(s); assert(switch_config != NULL); int const resume_at_iter = switch_config->resume_at_iter; + m->rc.switch_config_used = switch_config; // consuming all events until indicated iteration is reached bool reached_end = false; while (!reached_end) { - codes_workload_get_next(wrkld_id, s->app_id, s->local_rank, mpi_op); + codes_workload_get_next(wrkld_id, s->app_id, s->local_rank, &mpi_op); - switch (mpi_op->op_type) { + switch (mpi_op.op_type) { case CODES_WK_MARK: - if (mpi_op->u.send.tag == resume_at_iter) { + if (mpi_op.u.send.tag == resume_at_iter) { reached_end = true; - codes_workload_get_next_rc(wrkld_id, s->app_id, s->local_rank, mpi_op); + codes_workload_get_next_rc(wrkld_id, s->app_id, s->local_rank, &mpi_op); } break; // If we reach the end of simulation, rollback once to allow the operation to be processed normally case CODES_WK_END: - codes_workload_get_next_rc(wrkld_id, s->app_id, s->local_rank, mpi_op); + codes_workload_get_next_rc(wrkld_id, s->app_id, s->local_rank, &mpi_op); reached_end = true; break; default: @@ -1226,7 +1227,7 @@ static bool have_we_hit_surrogate_switch(struct nw_state* s, struct codes_worklo return false; } -static double time_to_skip_iterations(struct nw_state* s, struct codes_workload_op * mpi_op) { +static double time_to_skip_iterations(struct nw_state* s) { struct AvgSurrogateSwitchingTimesForApp * switch_config = get_switch_config(s); assert(switch_config != NULL); return switch_config->time_per_iter * iters_skipped(switch_config); @@ -2857,7 +2858,7 @@ void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp) break; case SURR_SKIP_ITERATION: - skip_iteration(s, lp, bf, m); + skip_to_iteration(s, lp, bf, m); break; } } @@ -3096,7 +3097,7 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l // If we have reached the surrogate switch time, skip next iteration(s) if (have_we_hit_surrogate_switch(s, mpi_op)) { - tw_event *e = tw_event_new(lp->gid, time_to_skip_iterations(s, mpi_op), lp); + tw_event *e = tw_event_new(lp->gid, time_to_skip_iterations(s), lp); nw_message* msg = (nw_message*) tw_event_data(e); msg->msg_type = SURR_SKIP_ITERATION; tw_event_send(e); @@ -3356,6 +3357,8 @@ void nw_test_event_handler_commit(nw_state* s, tw_bf * bf, nw_message * m, tw_lp free(m->mpi_op); break; + case SURR_SKIP_ITERATION: + break; } } @@ -3683,7 +3686,7 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) } - // Loading surrogacy configuration + // Loading skipping iterations configuration if(strlen(skipping_iterations_file) > 0) { FILE *file = fopen(skipping_iterations_file, "r"); if(!file) { @@ -3707,19 +3710,14 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) skip_iter_config = malloc(skip_iter_config_size * sizeof(struct AvgSurrogateSwitchingTimesForApp)); + // Loading in memory all times to skip iterations fseek(file, 0, SEEK_SET); - for(i = 0; !feof(file); i++) { + for(i = 0; i < skip_iter_config_size; i++) { struct AvgSurrogateSwitchingTimesForApp *skip_row = &skip_iter_config[i]; - int ref = fscanf(file, "%d %d %d %lf", &skip_row->app_id, &skip_row->skip_at_iter, &skip_row->resume_at_iter, &skip_row->time_per_iter); - + fscanf(file, "%d %d %d %lf", &skip_row->app_id, &skip_row->skip_at_iter, &skip_row->resume_at_iter, &skip_row->time_per_iter); skip_row->done = false; - - if (ref != 4) { // We couldn't read all four values - break; - } } - assert(i == skip_iter_config_size); fclose(file); // Sorting. To skip iterations we asume that all skips for a specific job appear in increasing order From 2711b6bcb71281930401b62eedff27287c3c5152 Mon Sep 17 00:00:00 2001 From: helq Date: Fri, 24 Jan 2025 08:30:25 -0500 Subject: [PATCH 011/110] Allowing to run without skipping configuration file --- src/network-workloads/model-net-mpi-replay.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index d0dea3ec..b62715c3 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -1165,7 +1165,9 @@ static int iters_skipped(struct AvgSurrogateSwitchingTimesForApp * avgSur) { } static struct AvgSurrogateSwitchingTimesForApp * get_switch_config(struct nw_state * s) { - assert(s->switch_config != NULL); + if (s->switch_config == NULL) { + return NULL; + } for (int i=0; i < s->switch_config_size; i++) { struct AvgSurrogateSwitchingTimesForApp * jump = &s->switch_config[i]; assert(jump->app_id == s->app_id); From 1412a4e9a943a0df40cdc6f0af9c4ac92c4bf026 Mon Sep 17 00:00:00 2001 From: helq Date: Fri, 24 Jan 2025 15:50:55 -0500 Subject: [PATCH 012/110] Saving apps iteration logs into single files per PE --- src/network-workloads/model-net-mpi-replay.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index b62715c3..d5d1b8b1 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -3760,7 +3760,21 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) modelnet_mpi_replay_read_config(); //Xin: output iteration time into log file - iteration_log = fopen("iteration-logs", "w+"); + + char const iteration_dir[] = "iteration-logs"; + if (!g_tw_mynode) { + int ret = mkdir("iteration-logs", 0775); + if(ret != 0) + { + tw_error(TW_LOC, "mkdir(\"%s/\")", iteration_dir); + } + } + MPI_Barrier(MPI_COMM_CODES); + int buffer_size = snprintf(NULL, 0, "%s/pe=%d.txt", iteration_dir, g_tw_mynode) + 1; + char *iteration_log_path = malloc(buffer_size); + snprintf(iteration_log_path, buffer_size, "%s/pe=%d.txt", iteration_dir, g_tw_mynode); + iteration_log = fopen(iteration_log_path, "w+"); + free(iteration_log_path); if(!iteration_log) { printf("\n Error logging iteration times... quitting "); From bb5b369fe11280afa0a9a00fb71c707370581793 Mon Sep 17 00:00:00 2001 From: helq Date: Sat, 25 Jan 2025 15:28:38 -0500 Subject: [PATCH 013/110] Guaranteeing that "workload period" config works in parallel --- src/network-workloads/model-net-mpi-replay.c | 25 ++++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index d5d1b8b1..364707f6 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -37,6 +37,7 @@ #define BAR_TAG 1234 #define PRINT_SYNTH_TRAFFIC 1 #define MAX_JOBS 64 +#define MAX_PERIODS_PER_APP 512 #define NEAR_ZERO .0001 //timestamp for use to be 'close to zero' but still allow progress, zero offset events are hard on the PDES engine #define OUTPUT_MARKS 0 @@ -99,8 +100,8 @@ float mean_interval_of_job[MAX_JOBS]; long job_timer1[MAX_JOBS]; long job_timer2[MAX_JOBS]; int period_count[MAX_JOBS]; -long period_time[MAX_JOBS][64]; -float period_interval[MAX_JOBS][64]; +long period_time[MAX_JOBS][MAX_PERIODS_PER_APP]; +float period_interval[MAX_JOBS][MAX_PERIODS_PER_APP]; char file_name_of_job[MAX_JOBS][8192]; char skipping_iterations_file[8192]; @@ -2672,8 +2673,7 @@ void nw_test_init(nw_state* s, tw_lp* lp) e2 = tw_event_new(lp->gid, ts2, lp); m_new2 = (nw_message*)tw_event_data(e2); m_new2->msg_type = CLI_BCKGND_CHANGE; - m_new2->fwd.msg_send_time = period_interval[lid.job][k]; - m_new2->rc.saved_send_time = mean_interval_of_job[s->app_id]; + m_new2->fwd.msg_send_time = period_interval[lid.job][k]; // Warning: this is overwriting a variable meant for message type MPI_SEND_ARRIVED_CB tw_event_send(e2); } } @@ -2839,9 +2839,10 @@ void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp) break; case CLI_BCKGND_CHANGE: - mean_interval_of_job[s->app_id] = m->fwd.msg_send_time; - printf("======== CHANGE [now: %lf] App:%d | Interval: %f\n", tw_now(lp), s->app_id, mean_interval_of_job[s->app_id]); - break; + m->rc.saved_send_time = mean_interval_of_job[s->app_id]; // Warning: this is overwriting a variable meant for message type MPI_OP_GET_NEXT (specifically CODES_WK_ALLREDUCE) and CLI_BCKGND_ARRIVE + mean_interval_of_job[s->app_id] = m->fwd.msg_send_time; + m->rc.saved_marker_time = tw_now(lp); + break; case CLI_BCKGND_ARRIVE: arrive_syn_tr(s, bf, m, lp); @@ -3361,6 +3362,10 @@ void nw_test_event_handler_commit(nw_state* s, tw_bf * bf, nw_message * m, tw_lp break; case SURR_SKIP_ITERATION: break; + + case CLI_BCKGND_CHANGE: + printf("======== CHANGE [now: %lf] App|Job:%d | Period: %f\n", m->rc.saved_marker_time, s->app_id, m->fwd.msg_send_time); + break; } } @@ -3651,7 +3656,13 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) char ref2 = '\n'; while(!feof(period_file)) { + if (j >= MAX_JOBS) { + tw_error(TW_LOC, "Exceeded number of max workloads in workloads period file. Max: %d", MAX_JOBS); + } ref2 = fscanf(period_file, "%d", &period_count[j]); + if (period_count[j] > MAX_PERIODS_PER_APP) { + tw_error(TW_LOC, "Too many periods for workload app %d", period_count[j]); + } if(ref2 != EOF){ printf("======== [ID: %d] Period count: %d\n", j, period_count[j]); for(int k = 0; k < period_count[j]; k++){ From a4e052a4f483064d7a9ebf071b2ce1592b94fb41 Mon Sep 17 00:00:00 2001 From: helq Date: Sat, 25 Jan 2025 16:22:34 -0500 Subject: [PATCH 014/110] Changing time in period file to double (from long) --- src/network-workloads/model-net-mpi-replay.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 364707f6..a68bc0c4 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -100,7 +100,7 @@ float mean_interval_of_job[MAX_JOBS]; long job_timer1[MAX_JOBS]; long job_timer2[MAX_JOBS]; int period_count[MAX_JOBS]; -long period_time[MAX_JOBS][MAX_PERIODS_PER_APP]; +double period_time[MAX_JOBS][MAX_PERIODS_PER_APP]; float period_interval[MAX_JOBS][MAX_PERIODS_PER_APP]; char file_name_of_job[MAX_JOBS][8192]; char skipping_iterations_file[8192]; @@ -3666,8 +3666,8 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) if(ref2 != EOF){ printf("======== [ID: %d] Period count: %d\n", j, period_count[j]); for(int k = 0; k < period_count[j]; k++){ - fscanf(period_file, "%ld:%f", &period_time[j][k], &period_interval[j][k]); - printf("======== [ID: %d] Period time and interval: %ld and %f\n", j, period_time[j][k], period_interval[j][k]); + fscanf(period_file, "%lf:%f", &period_time[j][k], &period_interval[j][k]); + printf("======== [ID: %d] Period time and interval: %lf and %f\n", j, period_time[j][k], period_interval[j][k]); } } j++; From 795628ddafb8873ac81c39d2503218411dbda02d Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 18 Feb 2025 14:02:32 -0500 Subject: [PATCH 015/110] Stdout for surrogate only from PE 0 --- src/network-workloads/model-net-mpi-replay.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index a68bc0c4..19724ad8 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -3656,18 +3656,22 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) char ref2 = '\n'; while(!feof(period_file)) { - if (j >= MAX_JOBS) { + if (j >= MAX_JOBS && !g_tw_mynode) { tw_error(TW_LOC, "Exceeded number of max workloads in workloads period file. Max: %d", MAX_JOBS); } ref2 = fscanf(period_file, "%d", &period_count[j]); - if (period_count[j] > MAX_PERIODS_PER_APP) { + if (period_count[j] > MAX_PERIODS_PER_APP && !g_tw_mynode) { tw_error(TW_LOC, "Too many periods for workload app %d", period_count[j]); } if(ref2 != EOF){ - printf("======== [ID: %d] Period count: %d\n", j, period_count[j]); + if (!g_tw_mynode) { + printf("======== [ID: %d] Period count: %d\n", j, period_count[j]); + } for(int k = 0; k < period_count[j]; k++){ fscanf(period_file, "%lf:%f", &period_time[j][k], &period_interval[j][k]); - printf("======== [ID: %d] Period time and interval: %lf and %f\n", j, period_time[j][k], period_interval[j][k]); + if (!g_tw_mynode) { + printf("======== [ID: %d] Period time and interval: %lf and %f\n", j, period_time[j][k], period_interval[j][k]); + } } } j++; From a7121ec6643811e0b3e424a9080800957e2606cd Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 18 Feb 2025 17:58:40 -0500 Subject: [PATCH 016/110] Implementing custom LP status printing for model-net-lps --- src/networks/model-net/core/model-net-lp.c | 111 +++++++++++++++++++++ src/util/rc-stack.c | 23 ++++- 2 files changed, 131 insertions(+), 3 deletions(-) diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c index 3ff97f37..e49035e3 100644 --- a/src/networks/model-net/core/model-net-lp.c +++ b/src/networks/model-net/core/model-net-lp.c @@ -131,6 +131,22 @@ tw_lptype model_net_base_lp = { sizeof(model_net_base_state), }; +// Functionality to check for correct implementation of reverse event handler +static void print_model_net_state(FILE * out, model_net_base_state * state); +static void print_event_state(FILE * out, model_net_wrap_msg * state); + +// ROSS function pointer table to check reverse event handler +crv_checkpointer model_net_chkptr = { + &model_net_base_lp, + 0, + (save_checkpoint_state_f) NULL, + (clean_checkpoint_state_f) NULL, + (check_states_f) NULL, + (print_lpstate_f) print_model_net_state, + (print_checkpoint_state_f) print_model_net_state, + (print_event_f) print_event_state, +}; + static void model_net_commit_event(model_net_base_state * ns, tw_bf *b, model_net_wrap_msg * m, tw_lp * lp) { if(m->h.event_type == MN_BASE_PASS) @@ -268,6 +284,7 @@ void model_net_base_register(int *do_config_nets){ } } } + crv_add_custom_state_checkpoint(&model_net_chkptr); } static void base_read_config(const char * anno, model_net_base_params *p){ @@ -1117,6 +1134,100 @@ tw_event* model_net_method_congestion_event(tw_lpid dest_gid, } +/* START Checking reverse handler functionality */ +static void print_model_net_state(FILE * out, model_net_base_state * state) { + fprintf(out, " net_id = %d\n", state->net_id); + fprintf(out, " nics_per_router = %d\n", state->nics_per_router); + fprintf(out, "*in_sched_send_loop = %p\n", state->in_sched_send_loop); + fprintf(out, " in_sched_recv_loop = %d\n", state->in_sched_recv_loop); + fprintf(out, " msg_id = %lu\n", state->msg_id); + fprintf(out, "** sched_send = %p\n", state->sched_send); + fprintf(out, "* sched_recv = %p\n", state->sched_recv); + fprintf(out, "* params = %p\n", state->params); + fprintf(out, "* sub_type = %p\n", state->sub_type); + fprintf(out, "* sub_model_type = %p\n", state->sub_model_type); + fprintf(out, "* sub_state = %p\n", state->sub_state); + fprintf(out, "next_available_time = %f\n", state->next_available_time); + fprintf(out, "*node_copy_next_available_time = %p\n", state->node_copy_next_available_time); + fprintf(out, "*sched_loop_pre_surrogate = %p\n", state->sched_loop_pre_surrogate); + fprintf(out, "sched_recv_loop_pre_surrogate = %d\n", state->sched_recv_loop_pre_surrogate); +} + +static void print_type(FILE * out, enum model_net_base_event_type type) { + switch (type) { + case MN_BASE_NEW_MSG: + fprintf(out, "MN_BASE_NEW_MSG"); + break; + case MN_BASE_SCHED_NEXT: + fprintf(out, "MN_BASE_SCHED_NEXT"); + break; + case MN_BASE_SAMPLE: + fprintf(out, "MN_BASE_SAMPLE"); + break; + case MN_BASE_PASS: + fprintf(out, "MN_BASE_PASS"); + break; + case MN_BASE_END_NOTIF: + fprintf(out, "MN_BASE_END_NOTIF"); + break; + case MN_CONGESTION_EVENT: + fprintf(out, "MN_CONGESTION_EVENT"); + break; + } +} + +static void print_model_net_request(FILE * out, char const * starts_with, model_net_request * req) { + fprintf(out, "%sfinal_dest_lp = %ld\n", starts_with, req->final_dest_lp); + fprintf(out, "%sdest_mn_lp = %ld\n", starts_with, req->dest_mn_lp); + fprintf(out, "%ssrc_lp = %ld\n", starts_with, req->src_lp); + fprintf(out, "%smsg_start_time = %f\n", starts_with, req->msg_start_time); + fprintf(out, "%smsg_new_mn_event = %f\n", starts_with, req->msg_new_mn_event); + fprintf(out, "%smsg_size = %ld\n", starts_with, req->msg_size); + fprintf(out, "%spull_size = %ld\n", starts_with, req->pull_size); + fprintf(out, "%spacket_size = %ld\n", starts_with, req->packet_size); + fprintf(out, "%smsg_id = %ld\n", starts_with, req->msg_id); + fprintf(out, "%snet_id = %d\n", starts_with, req->net_id); + fprintf(out, "%sis_pull = %d\n", starts_with, req->is_pull); + fprintf(out, "%squeue_offset = %d\n", starts_with, req->queue_offset); + fprintf(out, "%sremote_event_size = %d\n", starts_with, req->remote_event_size); + fprintf(out, "%sself_event_size = %d\n", starts_with, req->self_event_size); + fprintf(out, "%scategory = '%s'\n", starts_with, req->category); + fprintf(out, "%sapp_id = %d\n", starts_with, req->app_id); +} + +static void print_event_state(FILE * out, model_net_wrap_msg * msg) { + fprintf(out, "h\n"); + fprintf(out, "|.src = %lu\n", msg->h.src); + fprintf(out, "|.event_type = %d (", msg->h.event_type); + print_type(out, msg->h.event_type); + fprintf(out, ")\n"); + fprintf(out, "|.magic = %d\n", msg->h.magic); + switch (msg->h.event_type) { + case MN_BASE_NEW_MSG: + case MN_BASE_SCHED_NEXT: + // We can check m_base values + fprintf(out, "m_base\n"); + fprintf(out, " |.req\n"); + print_model_net_request(out, " | |.", &msg->msg.m_base.req); + fprintf(out, " |.is_from_remote = %d\n", msg->msg.m_base.is_from_remote); + fprintf(out, " |.isQueueReq = %d\n", msg->msg.m_base.isQueueReq); + fprintf(out, " |.save_ts = %f\n", msg->msg.m_base.save_ts); + fprintf(out, " |.sched_params.prio = %d\n", msg->msg.m_base.sched_params.prio); + fprintf(out, " |.rc\n"); + fprintf(out, " | |.req\n"); + print_model_net_request(out, " | | |.", &msg->msg.m_base.rc.req); + fprintf(out, " | |.sched_params.prio = %d\n", msg->msg.m_base.rc.sched_params.prio); + fprintf(out, " | |.rtn = %d\n", msg->msg.m_base.rc.rtn); + fprintf(out, " | |.prio = %d\n", msg->msg.m_base.rc.prio); + fprintf(out, " |.created_in_surrogate = %d\n", msg->msg.m_base.created_in_surrogate); + break; + default: + fprintf(out, "The content of this message cannot be deciphered yet with the information given\n"); + } +} + +/* END checking reverse handler functionality */ + void model_net_method_switch_to_surrogate(void) { is_freezing_on = true; } diff --git a/src/util/rc-stack.c b/src/util/rc-stack.c index ebb2131f..7b0540e7 100644 --- a/src/util/rc-stack.c +++ b/src/util/rc-stack.c @@ -12,7 +12,8 @@ enum rc_stack_mode { RC_NONOPT, // not in optimistic mode RC_OPT, // optimistic mode - RC_OPT_DBG // optimistic *debug* mode (requires special handling) + RC_OPT_DBG, // optimistic *debug* mode (requires special handling) + RC_SEQ_RV_DBG, // sequential rollback chek, a *debug* mode that requires special handling }; typedef struct rc_entry_s { @@ -40,11 +41,12 @@ void rc_stack_create(struct rc_stack **s){ } switch (g_tw_synchronization_protocol) { case OPTIMISTIC: - ss->mode = RC_OPT; - break; case OPTIMISTIC_REALTIME: ss->mode = RC_OPT; break; + case SEQUENTIAL_ROLLBACK_CHECK: + ss->mode = RC_SEQ_RV_DBG; + break; case OPTIMISTIC_DEBUG: ss->mode = RC_OPT_DBG; break; @@ -103,6 +105,21 @@ void rc_stack_gc(tw_lp const *lp, struct rc_stack *s) { if (s->mode == RC_OPT_DBG) return; + // rollback until only one event is left + if (s->mode == RC_SEQ_RV_DBG) { + struct qlist_head *ent = s->head.next; + while (ent->next != &s->head) { + rc_entry *r = qlist_entry(ent, rc_entry, ql); + qlist_del(ent); + if (r->free_fn) r->free_fn(r->data); + free(r); + s->count--; + ent = s->head.next; + } + return; + } + + // Removing all stored rollback events from stack struct qlist_head *ent = s->head.next; while (ent != &s->head) { rc_entry *r = qlist_entry(ent, rc_entry, ql); From ca303200d57d6ce0c459b55eda17b6c23d92ecaa Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 18 Feb 2025 18:06:05 -0500 Subject: [PATCH 017/110] Fixing small bug found when rollbacking model-net-event --- src/networks/model-net/core/model-net-lp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c index e49035e3..8a52c7da 100644 --- a/src/networks/model-net/core/model-net-lp.c +++ b/src/networks/model-net/core/model-net-lp.c @@ -788,6 +788,7 @@ void handle_new_msg( // don't forget to set packet size, now that we're responsible for it! r->msg_new_mn_event = tw_now(lp); r->packet_size = ns->params->packet_size; + b->c30 = 1; r->msg_id = ns->msg_id++; void * m_data = m+1; void *remote = NULL, *local = NULL; @@ -881,6 +882,10 @@ void handle_new_msg_rc( *in_sched_loop = 0; } model_net_sched_add_rc(ss, &m->msg.m_base.rc, lp); + + if (b->c30) { + ns->msg_id--; + } } /// bitfields used From c2afcd1f6415dd0ac4866af25eacebffdebad7f2 Mon Sep 17 00:00:00 2001 From: helq Date: Mon, 24 Feb 2025 11:02:33 -0500 Subject: [PATCH 018/110] Cleaning up some structs and fixing a reverse handler case --- src/network-workloads/model-net-mpi-replay.c | 21 +++++++------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 19724ad8..34c6a61d 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -228,7 +228,6 @@ struct mpi_msgs_queue int source_rank; int dest_rank; int64_t num_bytes; - int64_t seq_id; tw_stime req_init_time; dumpi_req_id req_id; struct qlist_head ql; @@ -238,8 +237,8 @@ struct mpi_msgs_queue struct completed_requests { unsigned int req_id; + int index; // for rollbacking struct qlist_head ql; - int index; }; /* for wait operations, store the pending operation and number of completed waits so far. */ @@ -250,7 +249,6 @@ struct pending_waits int num_completed; int count; tw_stime start_time; - struct qlist_head ql; }; struct msg_size_info @@ -387,7 +385,7 @@ struct nw_state struct nw_message { // forward message handler - int msg_type; + enum MPI_NW_EVENTS msg_type; int op_type; int num_rngs; model_net_event_return event_rc; @@ -399,7 +397,6 @@ struct nw_message int dest_rank; int64_t num_bytes; int num_matched; - int data_type; double sim_start_time; // for callbacks - time message was received double msg_send_time; @@ -919,7 +916,6 @@ static void gen_synthetic_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * l { // printf("%d - %d >= %d\n",s->gen_data,s->prev_switch,perm_switch_thresh); bf->c2 = 1; - m->rc.saved_prev_switch = s->prev_switch; s->prev_switch = s->gen_data; //Amount of data pushed at time when switch initiated dest_svr[0] = tw_rand_integer(lp->rng, 0, num_clients - 1); if(dest_svr[0] == s->local_rank) @@ -1352,7 +1348,6 @@ static int notify_posted_wait(nw_state* s, if(op_type == CODES_WK_WAIT && (wait_elem->req_ids[0] == completed_req)) { - m->fwd.wait_completed = 1; wait_completed = 1; } else if(op_type == CODES_WK_WAITALL @@ -1365,6 +1360,7 @@ static int notify_posted_wait(nw_state* s, if(wait_elem->req_ids[i] == completed_req) { wait_elem->num_completed++; + m->fwd.wait_completed++; //This is just the individual request handle - not the entire wait. if(wait_elem->num_completed > wait_elem->count) printf("\n Num completed %d count %d LP %llu ", wait_elem->num_completed, @@ -1383,7 +1379,6 @@ static int notify_posted_wait(nw_state* s, } wait_completed = 1; } - m->fwd.wait_completed = 1; //This is just the individual request handle - not the entire wait. } } } @@ -1827,8 +1822,6 @@ static void codes_exec_mpi_recv_rc( if(m->fwd.found_match >= 0) { - ns->recv_time = m->rc.saved_recv_time; - ns->ross_sample.recv_time = m->rc.saved_recv_time_sample; //int queue_count = qlist_count(&ns->arrival_queue); mpi_msgs_queue * qi = (mpi_msgs_queue*)rc_stack_pop(ns->processed_ops); @@ -1880,7 +1873,6 @@ static void codes_exec_mpi_recv( m->rc.saved_recv_time = s->recv_time; m->rc.saved_recv_time_sample = s->ross_sample.recv_time; - m->rc.saved_num_bytes = mpi_op->u.recv.num_bytes; mpi_msgs_queue * recv_op = (mpi_msgs_queue*) malloc(sizeof(mpi_msgs_queue)); recv_op->req_init_time = tw_now(lp); @@ -2199,8 +2191,9 @@ static void update_completed_queue_rc(nw_state * s, tw_bf * bf, nw_message * m, add_completed_reqs(s, lp, m->fwd.num_matched); codes_issue_next_event_rc(lp); } - if(m->fwd.wait_completed > 0) - s->wait_op->num_completed--; + if(m->fwd.wait_completed > 0) { + s->wait_op->num_completed -= m->fwd.wait_completed; + } } static void update_completed_queue(nw_state* s, @@ -2733,7 +2726,7 @@ void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp) rc_stack_gc(lp, s->processed_ops); rc_stack_gc(lp, s->processed_wait_op); - switch(m->msg_type) + switch((enum MPI_NW_EVENTS) m->msg_type) { case MPI_SEND_ARRIVED: update_arrival_queue(s, bf, m, lp); From c4c1491317b06f527dcc39ef0673a5ec5f47f05d Mon Sep 17 00:00:00 2001 From: helq Date: Mon, 24 Feb 2025 14:17:34 -0500 Subject: [PATCH 019/110] Refactoring struct in model-net-mpi-replay The struct nw_message was messy. It kept on getting longer and longer as more and more values were stored in the struct to use later for rollback. Now, it is more managable and it uses less memory than before. --- src/network-workloads/model-net-mpi-replay.c | 208 ++++++++++++------- 1 file changed, 130 insertions(+), 78 deletions(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 34c6a61d..be15fa30 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -408,23 +408,73 @@ struct nw_message short wait_completed; short rend_send; } fwd; - struct - { - int saved_perm; - double saved_send_time; - double saved_send_time_sample; - double saved_recv_time; - double saved_recv_time_sample; - double saved_wait_time; - double saved_wait_time_sample; - double saved_delay; - double saved_delay_sample; - double saved_marker_time; - int64_t saved_num_bytes; - int saved_syn_length; - unsigned long saved_prev_switch; - double saved_prev_max_time; - struct AvgSurrogateSwitchingTimesForApp * switch_config_used; + + // A different struct for each type of MPI_NW_EVENTS + union { + // For CLI_BCKGND_GEN + struct { + int saved_syn_length; + int saved_perm; // Used by PERMUTATION + unsigned long saved_prev_switch; // Used by PERMUTATION + } gen; + + // For CLI_BCKGND_ARRIVE and MPI_SEND_ARRIVED_CB + struct { + double saved_prev_max_time; + double saved_send_time; + double saved_send_time_sample; + } arrive; + + // For CLI_BCKGND_CHANGE + struct { + double saved_send_time; + double saved_marker_time; + } change; + + // For MPI_OP_GET_NEXT there are also different types + struct { + double saved_elapsed_time; + union { + // CODES_WK_ALLREDUCE + struct { + double saved_send_time; + double saved_delay; + } all_reduce; + // CODES_WK_RECV and CODES_WK_IRECV + struct { + double saved_recv_time; + double saved_recv_time_sample; + } recv; + // CODES_WK_DELAY + struct { + double saved_delay; + double saved_delay_sample; + } delay; + // CODES_WK_END and CODES_WK_MARK + struct { + double saved_marker_time; + } mark; + }; + } mpi_next; + + // For MPI_SEND_ARRIVED and MPI_REND_ARRIVED and MPI_SEND_POSTED + struct { + double saved_wait_time; + double saved_wait_time_sample; + double saved_recv_time; + double saved_recv_time_sample; + int64_t saved_num_bytes; + } mpi_send; + + // For MPI_REND_ACK_ARRIVED + struct { + int64_t saved_num_bytes; + } mpi_ack; + + // Surrogate variables + struct { + struct AvgSurrogateSwitchingTimesForApp * config_used; + } surr; } rc; }; @@ -838,12 +888,12 @@ static void gen_synthetic_tr_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_lp } if(bf->c2) { - s->prev_switch = m->rc.saved_prev_switch; - s->saved_perm_dest = m->rc.saved_perm; + s->prev_switch = m->rc.gen.saved_prev_switch; + s->saved_perm_dest = m->rc.gen.saved_perm; tw_rand_reverse_unif(lp->rng); } int i; - for (i=0; i < m->rc.saved_syn_length; i++){ + for (i=0; i < m->rc.gen.saved_syn_length; i++){ model_net_event_rc2(lp, &m->event_rc); s->gen_data -= payload_sz; num_syn_bytes_sent -= payload_sz; @@ -856,8 +906,10 @@ static void gen_synthetic_tr_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_lp if(bf->c5) finish_bckgnd_traffic_rc(s, bf, m, lp); - if(bf->c7) + if(bf->c7) { + s->saved_perm_dest = m->rc.gen.saved_perm; tw_rand_reverse_unif(lp->rng); + } } /* generate synthetic traffic */ @@ -897,8 +949,8 @@ static void gen_synthetic_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * l case PERMUTATION: { - m->rc.saved_prev_switch = s->prev_switch; //for reverse computation - m->rc.saved_perm = s->saved_perm_dest; + m->rc.gen.saved_prev_switch = s->prev_switch; //for reverse computation + m->rc.gen.saved_perm = s->saved_perm_dest; length = 1; dest_svr = (int*) calloc(1, sizeof(int)); @@ -984,7 +1036,7 @@ static void gen_synthetic_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * l tw_error(TW_LOC, "Undefined traffic pattern"); } /* Record length for reverse handler*/ - m->rc.saved_syn_length = length; + m->rc.gen.saved_syn_length = length; char prio[12]; switch(s->qos_level){ @@ -1075,23 +1127,23 @@ void arrive_syn_tr_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * lp) num_syn_bytes_recvd -= data; s->num_bytes_recvd -= data; s->ross_sample.num_bytes_recvd -= data; - s->send_time = m->rc.saved_send_time; - s->ross_sample.send_time = m->rc.saved_send_time_sample; + s->send_time = m->rc.arrive.saved_send_time; + s->ross_sample.send_time = m->rc.arrive.saved_send_time_sample; if((tw_now(lp) - m->fwd.sim_start_time) > s->max_time) { - s->max_time = m->rc.saved_prev_max_time; - s->ross_sample.max_time = m->rc.saved_prev_max_time; + s->max_time = m->rc.arrive.saved_prev_max_time; + s->ross_sample.max_time = m->rc.arrive.saved_prev_max_time; } } void arrive_syn_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * lp) { (void)bf; (void)lp; - m->rc.saved_send_time = s->send_time; - m->rc.saved_send_time_sample = s->ross_sample.send_time; + m->rc.arrive.saved_send_time = s->send_time; + m->rc.arrive.saved_send_time_sample = s->ross_sample.send_time; if((tw_now(lp) - m->fwd.sim_start_time) > s->max_time) { - m->rc.saved_prev_max_time = s->max_time; + m->rc.arrive.saved_prev_max_time = s->max_time; s->max_time = tw_now(lp) - m->fwd.sim_start_time; s->ross_sample.max_time = tw_now(lp) - m->fwd.sim_start_time; } @@ -1176,7 +1228,7 @@ static struct AvgSurrogateSwitchingTimesForApp * get_switch_config(struct nw_sta } static void skip_iteration_rc(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) { - m->rc.switch_config_used->done = false; + m->rc.surr.config_used->done = false; } static void skip_to_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) @@ -1186,7 +1238,7 @@ static void skip_to_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * struct AvgSurrogateSwitchingTimesForApp * switch_config = get_switch_config(s); assert(switch_config != NULL); int const resume_at_iter = switch_config->resume_at_iter; - m->rc.switch_config_used = switch_config; + m->rc.surr.config_used = switch_config; // consuming all events until indicated iteration is reached bool reached_end = false; @@ -1643,8 +1695,8 @@ static int rm_matching_rcv(nw_state * ns, else { bf->c12 = 1; - m->rc.saved_recv_time = ns->recv_time; - m->rc.saved_recv_time_sample = ns->ross_sample.recv_time; + m->rc.mpi_send.saved_recv_time = ns->recv_time; + m->rc.mpi_send.saved_recv_time_sample = ns->ross_sample.recv_time; ns->recv_time += (tw_now(lp) - m->fwd.sim_start_time); ns->ross_sample.recv_time += (tw_now(lp) - m->fwd.sim_start_time); } @@ -1711,8 +1763,8 @@ static int rm_matching_send(nw_state * ns, send_ack_back(ns, bf, m, lp, qi, qitem->req_id); } - m->rc.saved_recv_time = ns->recv_time; - m->rc.saved_recv_time_sample = ns->ross_sample.recv_time; + m->rc.mpi_next.recv.saved_recv_time = ns->recv_time; + m->rc.mpi_next.recv.saved_recv_time_sample = ns->ross_sample.recv_time; ns->recv_time += (tw_now(lp) - qitem->req_init_time); ns->ross_sample.recv_time += (tw_now(lp) - qitem->req_init_time); @@ -1774,8 +1826,8 @@ static void codes_exec_comp_delay( tw_stime ts; nw_message* msg; - m->rc.saved_delay = s->compute_time; - m->rc.saved_delay_sample = s->ross_sample.compute_time; + m->rc.mpi_next.delay.saved_delay = s->compute_time; + m->rc.mpi_next.delay.saved_delay_sample = s->ross_sample.compute_time; s->compute_time += (mpi_op->u.delay.nsecs/compute_time_speedup); s->ross_sample.compute_time += (mpi_op->u.delay.nsecs/compute_time_speedup); ts = (mpi_op->u.delay.nsecs/compute_time_speedup); @@ -1811,8 +1863,8 @@ static void codes_exec_mpi_recv_rc( nw_message* m, tw_lp* lp) { - ns->recv_time = m->rc.saved_recv_time; - ns->ross_sample.recv_time = m->rc.saved_recv_time_sample; + ns->recv_time = m->rc.mpi_next.recv.saved_recv_time; + ns->ross_sample.recv_time = m->rc.mpi_next.recv.saved_recv_time_sample; if(bf->c11) codes_issue_next_event_rc(lp); @@ -1871,8 +1923,8 @@ static void codes_exec_mpi_recv( If no matching isend is found, the receive operation is queued in the pending queue of receive operations. */ - m->rc.saved_recv_time = s->recv_time; - m->rc.saved_recv_time_sample = s->ross_sample.recv_time; + m->rc.mpi_next.recv.saved_recv_time = s->recv_time; + m->rc.mpi_next.recv.saved_recv_time_sample = s->ross_sample.recv_time; mpi_msgs_queue * recv_op = (mpi_msgs_queue*) malloc(sizeof(mpi_msgs_queue)); recv_op->req_init_time = tw_now(lp); @@ -1942,7 +1994,7 @@ static void codes_exec_mpi_send_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_ int indx = s->sampling_indx; s->mpi_wkld_samples[indx].num_sends_sample--; - s->mpi_wkld_samples[indx].num_bytes_sample -= m->rc.saved_num_bytes; + s->mpi_wkld_samples[indx].num_bytes_sample -= m->rc.mpi_ack.saved_num_bytes; if(bf->c1) { @@ -1968,9 +2020,9 @@ static void codes_exec_mpi_send_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_ if(bf->c3) { - s->num_bytes_sent -= m->rc.saved_num_bytes; - s->ross_sample.num_bytes_sent -= m->rc.saved_num_bytes; - num_bytes_sent -= m->rc.saved_num_bytes; + s->num_bytes_sent -= m->rc.mpi_ack.saved_num_bytes; + s->ross_sample.num_bytes_sent -= m->rc.mpi_ack.saved_num_bytes; + num_bytes_sent -= m->rc.mpi_ack.saved_num_bytes; } } /* executes MPI send and isend operations */ @@ -2033,7 +2085,7 @@ static void codes_exec_mpi_send(nw_state* s, if(lp->gid == TRACK_LP) printf("\n Sender rank %llu global dest rank %d dest-rank %d bytes %"PRIu64" Tag %d", LLU(s->nw_id), global_dest_rank, mpi_op->u.send.dest_rank, mpi_op->u.send.num_bytes, mpi_op->u.send.tag); - m->rc.saved_num_bytes = mpi_op->u.send.num_bytes; + m->rc.mpi_ack.saved_num_bytes = mpi_op->u.send.num_bytes; /* model-net event */ tw_lpid dest_rank = codes_mapping_get_lpid_from_relative(global_dest_rank, NULL, "nw-lp", NULL, 0); @@ -2186,8 +2238,8 @@ static void update_completed_queue_rc(nw_state * s, tw_bf * bf, nw_message * m, { struct pending_waits* wait_elem = (struct pending_waits*)rc_stack_pop(s->processed_wait_op); s->wait_op = wait_elem; - s->wait_time = m->rc.saved_wait_time; - s->ross_sample.wait_time = m->rc.saved_wait_time_sample; + s->wait_time = m->rc.mpi_send.saved_wait_time; + s->ross_sample.wait_time = m->rc.mpi_send.saved_wait_time_sample; add_completed_reqs(s, lp, m->fwd.num_matched); codes_issue_next_event_rc(lp); } @@ -2228,8 +2280,8 @@ static void update_completed_queue(nw_state* s, bf->c31 = 1; m->fwd.num_matched = clear_completed_reqs(s, lp, s->wait_op->req_ids, s->wait_op->count); - m->rc.saved_wait_time = s->wait_time; - m->rc.saved_wait_time_sample = s->ross_sample.wait_time; + m->rc.mpi_send.saved_wait_time = s->wait_time; + m->rc.mpi_send.saved_wait_time_sample = s->ross_sample.wait_time; s->wait_time += (tw_now(lp) - s->wait_op->start_time); s->ross_sample.wait_time += (tw_now(lp) - s->wait_op->start_time); @@ -2345,8 +2397,8 @@ static void update_arrival_queue_rc(nw_state* s, } if(bf->c12) { - s->recv_time = m->rc.saved_recv_time; - s->ross_sample.recv_time = m->rc.saved_recv_time_sample; + s->recv_time = m->rc.mpi_send.saved_recv_time; + s->ross_sample.recv_time = m->rc.mpi_send.saved_recv_time_sample; } //if(bf->c10) @@ -2374,8 +2426,8 @@ static void update_arrival_queue(nw_state* s, tw_bf * bf, nw_message * m, tw_lp //if(s->local_rank != m->fwd.dest_rank) // printf("\n Dest rank %d local rank %d ", m->fwd.dest_rank, s->local_rank); - m->rc.saved_recv_time = s->recv_time; - m->rc.saved_recv_time_sample = s->ross_sample.recv_time; + m->rc.mpi_send.saved_recv_time = s->recv_time; + m->rc.mpi_send.saved_recv_time_sample = s->ross_sample.recv_time; s->num_bytes_recvd += m->fwd.num_bytes; s->ross_sample.num_bytes_recvd += m->fwd.num_bytes; num_bytes_recvd += m->fwd.num_bytes; @@ -2438,8 +2490,8 @@ static void update_message_time( (void)bf; (void)lp; - m->rc.saved_send_time = s->send_time; - m->rc.saved_send_time_sample = s->ross_sample.send_time; + m->rc.arrive.saved_send_time = s->send_time; + m->rc.arrive.saved_send_time_sample = s->ross_sample.send_time; s->send_time += m->fwd.msg_send_time; s->ross_sample.send_time += m->fwd.msg_send_time; } @@ -2452,8 +2504,8 @@ static void update_message_time_rc( { (void)bf; (void)lp; - s->send_time = m->rc.saved_send_time; - s->ross_sample.send_time = m->rc.saved_send_time_sample; + s->send_time = m->rc.arrive.saved_send_time; + s->ross_sample.send_time = m->rc.arrive.saved_send_time_sample; } /* initializes the network node LP, loads the trace file in the structs, calls the first MPI operation to be executed */ @@ -2772,8 +2824,8 @@ void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp) codes_issue_next_event(lp); } - m->rc.saved_recv_time = s->recv_time; - m->rc.saved_recv_time_sample = s->ross_sample.recv_time; + m->rc.mpi_send.saved_recv_time = s->recv_time; + m->rc.mpi_send.saved_recv_time_sample = s->ross_sample.recv_time; s->recv_time += (tw_now(lp) - m->fwd.sim_start_time); s->ross_sample.recv_time += (tw_now(lp) - m->fwd.sim_start_time); @@ -2832,9 +2884,9 @@ void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp) break; case CLI_BCKGND_CHANGE: - m->rc.saved_send_time = mean_interval_of_job[s->app_id]; // Warning: this is overwriting a variable meant for message type MPI_OP_GET_NEXT (specifically CODES_WK_ALLREDUCE) and CLI_BCKGND_ARRIVE + m->rc.change.saved_send_time = mean_interval_of_job[s->app_id]; // Warning: this is overwriting a variable meant for message type MPI_OP_GET_NEXT (specifically CODES_WK_ALLREDUCE) and CLI_BCKGND_ARRIVE mean_interval_of_job[s->app_id] = m->fwd.msg_send_time; - m->rc.saved_marker_time = tw_now(lp); + m->rc.change.saved_marker_time = tw_now(lp); break; case CLI_BCKGND_ARRIVE: @@ -2904,8 +2956,8 @@ static void get_next_mpi_operation_rc(nw_state* s, tw_bf * bf, nw_message * m, t { // if (bf->c28) // tw_rand_reverse_unif(lp->rng); - s->compute_time = m->rc.saved_delay; - s->ross_sample.compute_time = m->rc.saved_delay_sample; + s->compute_time = m->rc.mpi_next.delay.saved_delay; + s->ross_sample.compute_time = m->rc.mpi_next.delay.saved_delay_sample; } } break; @@ -2914,8 +2966,8 @@ static void get_next_mpi_operation_rc(nw_state* s, tw_bf * bf, nw_message * m, t if(bf->c27) { s->num_all_reduce--; - s->col_time = m->rc.saved_send_time; - s->all_reduce_time = m->rc.saved_delay; + s->col_time = m->rc.mpi_next.all_reduce.saved_send_time; + s->all_reduce_time = m->rc.mpi_next.all_reduce.saved_delay; } else { @@ -2992,7 +3044,7 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l /* Notify ranks from other job that checkpoint traffic has * completed */ //int num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx); - m->rc.saved_marker_time = tw_now(lp); + m->rc.mpi_next.mark.saved_marker_time = tw_now(lp); notify_root_rank(s, lp, bf, m); // printf("Client rank %llu completed workload, local rank %d .\n", s->nw_id, s->local_rank); @@ -3060,9 +3112,9 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l if(s->col_time > 0) { bf->c27 = 1; - m->rc.saved_delay = s->all_reduce_time; + m->rc.mpi_next.all_reduce.saved_delay = s->all_reduce_time; s->all_reduce_time += tw_now(lp) - s->col_time; - m->rc.saved_send_time = s->col_time; + m->rc.mpi_next.all_reduce.saved_send_time = s->col_time; s->col_time = 0; s->num_all_reduce++; } @@ -3089,7 +3141,7 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l case CODES_WK_MARK: { - m->rc.saved_marker_time = tw_now(lp); + m->rc.mpi_next.mark.saved_marker_time = tw_now(lp); // If we have reached the surrogate switch time, skip next iteration(s) if (have_we_hit_surrogate_switch(s, mpi_op)) { @@ -3279,8 +3331,8 @@ void nw_test_event_handler_rc(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * l if(bf->c8) update_completed_queue_rc(s, bf, m, lp); - s->recv_time = m->rc.saved_recv_time; - s->ross_sample.recv_time = m->rc.saved_recv_time_sample; + s->recv_time = m->rc.mpi_send.saved_recv_time; + s->ross_sample.recv_time = m->rc.mpi_send.saved_recv_time_sample; } break; @@ -3293,7 +3345,7 @@ void nw_test_event_handler_rc(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * l break; case CLI_BCKGND_CHANGE: - mean_interval_of_job[s->app_id] = m->rc.saved_send_time; + mean_interval_of_job[s->app_id] = m->rc.change.saved_send_time; break; case CLI_BCKGND_ARRIVE: @@ -3325,11 +3377,11 @@ void nw_test_event_handler_commit(nw_state* s, tw_bf * bf, nw_message * m, tw_lp case MPI_OP_GET_NEXT: switch (m->mpi_op->op_type) { case CODES_WK_END: - printf("Network node %d Rank %llu App %d finished at %lf \n", s->local_rank, LLU(s->nw_id), s->app_id, m->rc.saved_marker_time); + printf("Network node %d Rank %llu App %d finished at %lf \n", s->local_rank, LLU(s->nw_id), s->app_id, m->rc.mpi_next.mark.saved_marker_time); break; case CODES_WK_MARK: - fprintf(iteration_log, "ITERATION %d node %llu job %d rank %d time %lf\n", m->mpi_op->u.send.tag, LLU(s->nw_id), s->app_id, s->local_rank, m->rc.saved_marker_time); + fprintf(iteration_log, "ITERATION %d node %llu job %d rank %d time %lf\n", m->mpi_op->u.send.tag, LLU(s->nw_id), s->app_id, s->local_rank, m->rc.mpi_next.mark.saved_marker_time); if (OUTPUT_MARKS) { @@ -3340,7 +3392,7 @@ void nw_test_event_handler_commit(nw_state* s, tw_bf * bf, nw_message * m, tw_lp char tag_line[32]; int written; - written = sprintf(tag_line, "%llu %d %.5f\n",s->nw_id, m->mpi_op->u.send.tag, m->rc.saved_marker_time); + written = sprintf(tag_line, "%llu %d %.5f\n",s->nw_id, m->mpi_op->u.send.tag, m->rc.mpi_next.mark.saved_marker_time); lp_io_write(lp->gid, marker_filename, written, tag_line); } break; From 9a5bf98ac37b8631be47c51642acb00fe8494da1 Mon Sep 17 00:00:00 2001 From: helq Date: Mon, 24 Feb 2025 14:22:44 -0500 Subject: [PATCH 020/110] Print function for struct codes_workload_op and enum codes_workload_op_type --- codes/codes-workload.h | 4 + src/workload/codes-workload.c | 150 ++++++++++++++++++++++++++++++++++ 2 files changed, 154 insertions(+) diff --git a/codes/codes-workload.h b/codes/codes-workload.h index 2361ac4b..5ac6b333 100644 --- a/codes/codes-workload.h +++ b/codes/codes-workload.h @@ -381,6 +381,10 @@ void codes_workload_add_method(struct codes_workload_method const * method); * will shut down automatically once they have issued their last event. */ +/* Printing event :) */ +void fprint_codes_workload_op(FILE * out, struct codes_workload_op * op, char const * const begin); +char const * const op_type_string(enum codes_workload_op_type op_type); + #ifdef __cplusplus } #endif diff --git a/src/workload/codes-workload.c b/src/workload/codes-workload.c index aec7108e..a66e517a 100644 --- a/src/workload/codes-workload.c +++ b/src/workload/codes-workload.c @@ -550,6 +550,156 @@ void codes_workload_add_method(struct codes_workload_method const * method) method_array[num_user_methods++] = method; } +char const * const op_type_string(enum codes_workload_op_type op_type) { + switch(op_type) { + case CODES_WK_END: return "CODES_WK_END"; + case CODES_WK_DELAY: return "CODES_WK_DELAY"; + case CODES_WK_BARRIER: return "CODES_WK_BARRIER"; + case CODES_WK_OPEN: return "CODES_WK_OPEN"; + case CODES_WK_CLOSE: return "CODES_WK_CLOSE"; + case CODES_WK_WRITE: return "CODES_WK_WRITE"; + case CODES_WK_READ: return "CODES_WK_READ"; + case CODES_WK_SEND: return "CODES_WK_SEND"; + case CODES_WK_RECV: return "CODES_WK_RECV"; + case CODES_WK_ISEND: return "CODES_WK_ISEND"; + case CODES_WK_IRECV: return "CODES_WK_IRECV"; + case CODES_WK_BCAST: return "CODES_WK_BCAST"; + case CODES_WK_ALLGATHER: return "CODES_WK_ALLGATHER"; + case CODES_WK_ALLGATHERV: return "CODES_WK_ALLGATHERV"; + case CODES_WK_ALLTOALL: return "CODES_WK_ALLTOALL"; + case CODES_WK_ALLTOALLV: return "CODES_WK_ALLTOALLV"; + case CODES_WK_REDUCE: return "CODES_WK_REDUCE"; + case CODES_WK_ALLREDUCE: return "CODES_WK_ALLREDUCE"; + case CODES_WK_COL: return "CODES_WK_COL"; + case CODES_WK_WAITALL: return "CODES_WK_WAITALL"; + case CODES_WK_WAIT: return "CODES_WK_WAIT"; + case CODES_WK_WAITSOME: return "CODES_WK_WAITSOME"; + case CODES_WK_WAITANY: return "CODES_WK_WAITANY"; + case CODES_WK_TESTALL: return "CODES_WK_TESTALL"; + case CODES_WK_REQ_FREE: return "CODES_WK_REQ_FREE"; + case CODES_WK_IGNORE: return "CODES_WK_IGNORE"; + case CODES_WK_MPI_OPEN: return "CODES_WK_MPI_OPEN"; + case CODES_WK_MPI_CLOSE: return "CODES_WK_MPI_CLOSE"; + case CODES_WK_MPI_WRITE: return "CODES_WK_MPI_WRITE"; + case CODES_WK_MPI_READ: return "CODES_WK_MPI_READ"; + case CODES_WK_MPI_COLL_OPEN: return "CODES_WK_MPI_COLL_OPEN"; + case CODES_WK_MPI_COLL_WRITE: return "CODES_WK_MPI_COLL_WRITE"; + case CODES_WK_MPI_COLL_READ: return "CODES_WK_MPI_COLL_READ"; + case CODES_WK_MARK: return "CODES_WK_MARK"; + default: return "UNKNOWN!!"; + } +} + +// Initial implementation by Claude.ai +void fprint_codes_workload_op(FILE * out, struct codes_workload_op * op, char const * const begin) { + if (op == NULL) { + return; + } + + // Print common fields first + fprintf(out, "%sop_type = %s\n", begin, op_type_string(op->op_type)); + + fprintf(out, "%s start_time = %f\n", begin, op->start_time); + fprintf(out, "%s end_time = %f\n", begin, op->end_time); + fprintf(out, "%s sim_start_time = %f\n", begin, op->sim_start_time); + fprintf(out, "%s sequence_id = %ld\n", begin, op->sequence_id); + + // Print union fields based on op_type + switch(op->op_type) { + case CODES_WK_DELAY: + fprintf(out, "%s delay.seconds = %f\n", begin, op->u.delay.seconds); + fprintf(out, "%s delay.nsecs = %f\n", begin, op->u.delay.nsecs); + break; + + case CODES_WK_BARRIER: + fprintf(out, "%s barrier.count = %d\n", begin, op->u.barrier.count); + fprintf(out, "%s barrier.root = %d\n", begin, op->u.barrier.root); + break; + + case CODES_WK_OPEN: + case CODES_WK_MPI_OPEN: + case CODES_WK_MPI_COLL_OPEN: + fprintf(out, "%s open.file_id = %lu\n", begin, op->u.open.file_id); + fprintf(out, "%s open.create_flag = %d\n", begin, op->u.open.create_flag); + break; + + case CODES_WK_WRITE: + case CODES_WK_MPI_WRITE: + case CODES_WK_MPI_COLL_WRITE: + fprintf(out, "%s write.file_id = %lu\n", begin, op->u.write.file_id); + fprintf(out, "%s write.offset = %ld\n", begin, op->u.write.offset); + fprintf(out, "%s write.size = %zu\n", begin, op->u.write.size); + break; + + case CODES_WK_READ: + case CODES_WK_MPI_READ: + case CODES_WK_MPI_COLL_READ: + fprintf(out, "%s read.file_id = %lu\n", begin, op->u.read.file_id); + fprintf(out, "%s read.offset = %ld\n", begin, op->u.read.offset); + fprintf(out, "%s read.size = %zu\n", begin, op->u.read.size); + break; + + case CODES_WK_CLOSE: + case CODES_WK_MPI_CLOSE: + fprintf(out, "%s close.file_id = %lu\n", begin, op->u.close.file_id); + break; + + case CODES_WK_SEND: + case CODES_WK_ISEND: + fprintf(out, "%s send.source_rank = %d\n", begin, op->u.send.source_rank); + fprintf(out, "%s send.dest_rank = %d\n", begin, op->u.send.dest_rank); + fprintf(out, "%s send.num_bytes = %ld\n", begin, op->u.send.num_bytes); + fprintf(out, "%s send.data_type = %d\n", begin, op->u.send.data_type); + fprintf(out, "%s send.count = %d\n", begin, op->u.send.count); + fprintf(out, "%s send.tag = %d\n", begin, op->u.send.tag); + fprintf(out, "%s send.req_id = %u\n", begin, op->u.send.req_id); + break; + + case CODES_WK_RECV: + case CODES_WK_IRECV: + fprintf(out, "%s recv.source_rank = %d\n", begin, op->u.recv.source_rank); + fprintf(out, "%s recv.dest_rank = %d\n", begin, op->u.recv.dest_rank); + fprintf(out, "%s recv.num_bytes = %ld\n", begin, op->u.recv.num_bytes); + fprintf(out, "%s recv.data_type = %d\n", begin, op->u.recv.data_type); + fprintf(out, "%s recv.count = %d\n", begin, op->u.recv.count); + fprintf(out, "%s recv.tag = %d\n", begin, op->u.recv.tag); + fprintf(out, "%s recv.req_id = %u\n", begin, op->u.recv.req_id); + break; + + case CODES_WK_COL: + case CODES_WK_BCAST: + case CODES_WK_ALLGATHER: + case CODES_WK_ALLGATHERV: + case CODES_WK_ALLTOALL: + case CODES_WK_ALLTOALLV: + case CODES_WK_REDUCE: + case CODES_WK_ALLREDUCE: + fprintf(out, "%scollective.num_bytes = %d\n", begin, op->u.collective.num_bytes); + break; + + case CODES_WK_WAITALL: + case CODES_WK_WAITSOME: + case CODES_WK_WAITANY: + case CODES_WK_TESTALL: + fprintf(out, "%s waits.count = %d\n", begin, op->u.waits.count); + fprintf(out, "%s waits.req_ids = %p\n", begin, op->u.waits.req_ids); + break; + + case CODES_WK_WAIT: + fprintf(out, "%s wait.req_id = %u\n", begin, op->u.wait.req_id); + break; + + case CODES_WK_REQ_FREE: + fprintf(out, "%s free.req_id = %u\n", begin, op->u.free.req_id); + break; + + case CODES_WK_END: + case CODES_WK_IGNORE: + case CODES_WK_MARK: + break; + } +} + /* * Local variables: * c-indent-level: 4 From 9da3d364c085f59ab67051ee9ee5d2615a2d38a0 Mon Sep 17 00:00:00 2001 From: helq Date: Mon, 24 Feb 2025 14:28:22 -0500 Subject: [PATCH 021/110] Implementing deep copy/check/print for LP state: nw_state --- codes/quicklist.h | 10 +- src/network-workloads/model-net-mpi-replay.c | 513 ++++++++++++++++++- 2 files changed, 521 insertions(+), 2 deletions(-) diff --git a/codes/quicklist.h b/codes/quicklist.h index e2647648..5ca78730 100644 --- a/codes/quicklist.h +++ b/codes/quicklist.h @@ -193,6 +193,14 @@ static __inline__ void qlist_splice(struct qlist_head *qlist, struct qlist_head #define qlist_entry(ptr, type, member) \ ((type *)((char *)(ptr)-(unsigned long)((&((type *)0)->member)))) +/** + * QLIST_OFFSET - get offset to the member that holds qlist_header + * @type: the type of the struct this is embedded in. + * @member: the name of the qlist_struct within the struct. + */ +#define QLIST_OFFSET(type, member) \ + (unsigned long)((&((type *)0)->member)) + /** * qlist_for_each - iterate over a qlist * @pos: the &struct qlist_head to use as a loop counter. @@ -252,7 +260,7 @@ static inline int qlist_exists(struct qlist_head *list, struct qlist_head *qlink return 0; } -static inline int qlist_count(struct qlist_head *list) +static inline int qlist_count(struct qlist_head const *list) { struct qlist_head *pos; int count = 0; diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index be15fa30..8f944a3c 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include "codes/codes-workload.h" @@ -40,6 +41,7 @@ #define MAX_PERIODS_PER_APP 512 #define NEAR_ZERO .0001 //timestamp for use to be 'close to zero' but still allow progress, zero offset events are hard on the PDES engine #define OUTPUT_MARKS 0 +#define LP_DEBUG 1 static int msg_size_hash_compare( void *key, struct qhash_head *link); @@ -286,6 +288,9 @@ typedef struct pending_waits pending_waits; /* state of the network LP. It contains the pointers to send/receive lists */ struct nw_state { +#if LP_DEBUG + size_t num_events_processed; +#endif /* if LP_DEBUG */ long num_events_per_lp; tw_lpid nw_id; short wrkld_end; @@ -2771,6 +2776,9 @@ void nw_test_init(nw_state* s, tw_lp* lp) void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp) { assert(s->app_id >= 0 && s->local_rank >= 0); +#if LP_DEBUG + s->num_events_processed++; +#endif /* if LP_DEBUG */ //*(int *)bf = (int)0; rc_stack_gc(lp, s->matched_reqs); @@ -3298,6 +3306,10 @@ void nw_test_finalize(nw_state* s, tw_lp* lp) void nw_test_event_handler_rc(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp) { +#if LP_DEBUG + s->num_events_processed--; +#endif /* if LP_DEBUG */ + switch(m->msg_type) { case MPI_SEND_ARRIVED: @@ -3409,7 +3421,492 @@ void nw_test_event_handler_commit(nw_state* s, tw_bf * bf, nw_message * m, tw_lp break; case CLI_BCKGND_CHANGE: - printf("======== CHANGE [now: %lf] App|Job:%d | Period: %f\n", m->rc.saved_marker_time, s->app_id, m->fwd.msg_send_time); + printf("======== CHANGE [now: %lf] App|Job:%d | Period: %f\n", m->rc.change.saved_marker_time, s->app_id, m->fwd.msg_send_time); + break; + } +} + +static void make_qlist_cpy(struct qlist_head * into, struct qlist_head const * from, unsigned int sizeof_elem, unsigned int offset_ql) { + assert(sizeof_elem > offset_ql); + + int const num_elems = qlist_count(from); + INIT_QLIST_HEAD(into); + if (num_elems) { + char * pending_recvs = malloc(num_elems * sizeof_elem); + if (pending_recvs == NULL) { + tw_error(TW_LOC, "Malloc failed!"); + } + + char * new_entry = pending_recvs; + int i = 0; + struct qlist_head * ent; + qlist_for_each(ent, from) { + char * entry = ((char*)ent) - offset_ql; + + mempcpy(new_entry, entry, sizeof_elem); + struct qlist_head * new_entry_ql = (void*) (new_entry + offset_ql); + new_entry_ql->prev = (void*)(new_entry - sizeof_elem + offset_ql); + new_entry_ql->next = (void*)(new_entry + sizeof_elem + offset_ql); + i++; + new_entry += sizeof_elem; + } + assert(i == num_elems); + + struct qlist_head * first_ql = (void*)(pending_recvs + offset_ql); + struct qlist_head * last_ql = (void*)(pending_recvs + (num_elems - 1) * sizeof_elem + offset_ql); + into->next = first_ql; + into->prev = last_ql; + first_ql->prev = into; + last_ql->next = into; + } +} + +static void free_qlist_cpy(struct qlist_head * into, unsigned int offset_ql) { + if (! qlist_empty(into)) { + void * entry = (char *)(into->next) - offset_ql; + free(entry); + } +} + +// Assumes that ql is at the end of entry!! +static bool are_qlist_equal(struct qlist_head const * left, struct qlist_head const * right, unsigned int offset_ql, bool (cmp) (void *, void *)) { + int const num_elems = qlist_count(left); + if (num_elems != qlist_count(right)) { + return false; + } + + // Checking element by element + int i = 0; + struct qlist_head * elem_left = left->next; + struct qlist_head * elem_right = right->next; + while (elem_left != left) { + char * entry_left = (char *)(elem_left) - offset_ql; + char * entry_right = (char *)(elem_right) - offset_ql; + + if (!cmp(entry_left, entry_right)) { + return false; + } + + elem_left = elem_left->next; + elem_right = elem_right->next; + i++; + } + assert(i == num_elems); + assert(elem_right == right); + + return true; +} + +bool compare_pending_waits(struct pending_waits const * before, struct pending_waits const * after) { + // if one is null and the other isn't, then they're not equal + if ((before == NULL) != (after == NULL)) { + return false; + } + // only check values if they are not nul + if (before == NULL) { + return true; + } + + bool is_same = true; + + is_same &= before->op_type == after->op_type; + is_same &= before->num_completed == after->num_completed; + is_same &= before->count == after->count; + is_same &= before->start_time == after->start_time; + + for (int i=0; icount; i++) { + is_same &= before->req_ids[i] == after->req_ids[i]; + } + + return is_same; +} + +static bool compare_mpi_msg_queues(mpi_msgs_queue * left, mpi_msgs_queue * right) { + bool is_same = true; + is_same &= left->op_type == right->op_type; + is_same &= left->tag == right->tag; + is_same &= left->source_rank == right->source_rank; + is_same &= left->dest_rank == right->dest_rank; + is_same &= left->num_bytes == right->num_bytes; + is_same &= left->req_init_time == right->req_init_time; + is_same &= left->req_id == right->req_id; + return is_same; +} + +static bool compare_completed_requests(completed_requests * left, completed_requests * right) { + bool is_same = true; + is_same &= left->req_id == right->req_id; + return is_same; +} + +static bool compare_msg_size_info(struct msg_size_info * left, struct msg_size_info * right) { + bool is_same = true; + is_same &= left->msg_size == right->msg_size; + is_same &= left->num_msgs == right->num_msgs; + is_same &= left->agg_latency == right->agg_latency; + is_same &= left->avg_latency == right->avg_latency; + is_same &= left->hash_link.next == right->hash_link.next; // This is not correct, we have to do deep copy this and chek that it is the same + is_same &= left->hash_link.prev == right->hash_link.prev; + return is_same; +} + +// Deep-copy of nw_state!! +// Functionality to check for correct implementation of reverse event handler +static void save_nw_lp_state(nw_state * into, nw_state const * from) { + memcpy(into, from, sizeof(nw_state)); + + make_qlist_cpy(&into->arrival_queue, &from->arrival_queue,sizeof(mpi_msgs_queue), QLIST_OFFSET(mpi_msgs_queue, ql)); + make_qlist_cpy(&into->pending_recvs_queue, &from->pending_recvs_queue, sizeof(mpi_msgs_queue), QLIST_OFFSET(mpi_msgs_queue, ql)); + make_qlist_cpy(&into->completed_reqs, &from->completed_reqs, sizeof(completed_requests), QLIST_OFFSET(completed_requests, ql)); + make_qlist_cpy(&into->msg_sz_list, &from->msg_sz_list, sizeof(struct msg_size_info), QLIST_OFFSET(struct msg_size_info, ql)); + // No need to copy msg_sz_table because all data is also in msg_sz_list + + int num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx); + into->known_completed_jobs = malloc(num_jobs * sizeof(int)); + memcpy(into->known_completed_jobs, from->known_completed_jobs, num_jobs * sizeof(int)); + if (from->wait_op != NULL) { + into->wait_op = malloc(sizeof(pending_waits)); + memcpy(into->wait_op, from->wait_op, sizeof(pending_waits)); + } + + // Don't forget to make deep copies of any new complex data types that nw_state points to +} + +static void print_mpi_msgs_queue(FILE * out, struct qlist_head * head, char const * before) { + mpi_msgs_queue * current = NULL; + qlist_for_each_entry(current, head, ql) { + fprintf(out, "%sMsg: OpType: %d Tag %d Source %d Dest %d bytes %"PRId64" req_init_time %g req_id %u\n", before, current->op_type, current->tag, current->source_rank, current->dest_rank, current->num_bytes, current->req_init_time, current->req_id); + } +} + +// Cleaning up deep-copy +static void clean_nw_lp_state(nw_state * into) { + free_qlist_cpy(&into->arrival_queue, QLIST_OFFSET(mpi_msgs_queue, ql)); + free_qlist_cpy(&into->pending_recvs_queue, QLIST_OFFSET(mpi_msgs_queue, ql)); + free_qlist_cpy(&into->completed_reqs, QLIST_OFFSET(completed_requests, ql)); + free_qlist_cpy(&into->msg_sz_list, QLIST_OFFSET(struct msg_size_info, ql)); + free(into->known_completed_jobs); + if (into->wait_op != NULL) { + free(into->wait_op); + } +} + +// Checking that deep-copy is the same as original!! +// Originally filled with a prompt on Claude +static bool check_nw_lp_state(nw_state * before, nw_state const * after) { + bool is_same = true; + + // Basic fields + is_same &= (before->num_events_per_lp == after->num_events_per_lp); + is_same &= (before->nw_id == after->nw_id); + is_same &= (before->wrkld_end == after->wrkld_end); + is_same &= (before->app_id == after->app_id); + is_same &= (before->local_rank == after->local_rank); + is_same &= (before->qos_level == after->qos_level); + + // Pattern and completion flags + is_same &= (before->synthetic_pattern == after->synthetic_pattern); + is_same &= (before->is_finished == after->is_finished); + is_same &= (before->num_own_job_ranks_completed == after->num_own_job_ranks_completed); + + // Operation counts + is_same &= (before->num_sends == after->num_sends); + is_same &= (before->num_recvs == after->num_recvs); + is_same &= (before->num_cols == after->num_cols); + is_same &= (before->num_delays == after->num_delays); + is_same &= (before->num_wait == after->num_wait); + is_same &= (before->num_waitall == after->num_waitall); + is_same &= (before->num_waitsome == after->num_waitsome); + + // Timing information + is_same &= (before->start_time == after->start_time); + is_same &= (before->col_time == after->col_time); + is_same &= (before->reduce_time == after->reduce_time); + is_same &= (before->num_reduce == after->num_reduce); + is_same &= (before->all_reduce_time == after->all_reduce_time); + is_same &= (before->num_all_reduce == after->num_all_reduce); + is_same &= (before->elapsed_time == after->elapsed_time); + is_same &= (before->compute_time == after->compute_time); + is_same &= (before->send_time == after->send_time); + is_same &= (before->max_time == after->max_time); + is_same &= (before->recv_time == after->recv_time); + is_same &= (before->wait_time == after->wait_time); + + // Interval and current state + is_same &= (before->cur_interval_end == after->cur_interval_end); + + // Data statistics + is_same &= (before->num_bytes_sent == after->num_bytes_sent); + is_same &= (before->num_bytes_recvd == after->num_bytes_recvd); + is_same &= (before->syn_data == after->syn_data); + is_same &= (before->gen_data == after->gen_data); + + // Switch and routing information + is_same &= (before->prev_switch == after->prev_switch); + is_same &= (before->saved_perm_dest == after->saved_perm_dest); + is_same &= (before->rc_perm == after->rc_perm); + + // Sampling information + is_same &= (before->sampling_indx == after->sampling_indx); + //is_same &= (before->max_arr_size == after->max_arr_size); + + // Compare string buffers + is_same &= (strcmp(before->output_buf, after->output_buf) == 0); + is_same &= (strcmp(before->col_stats, after->col_stats) == 0); + + // Compare switch configuration size + is_same &= (before->switch_config_size == after->switch_config_size); + + // Complex elements + is_same &= are_qlist_equal(&before->arrival_queue, &after->arrival_queue, QLIST_OFFSET(mpi_msgs_queue, ql), (bool (*) (void *, void *)) compare_mpi_msg_queues); + is_same &= are_qlist_equal(&before->pending_recvs_queue, &after->pending_recvs_queue, QLIST_OFFSET(mpi_msgs_queue, ql), (bool (*) (void *, void *)) compare_mpi_msg_queues); + is_same &= are_qlist_equal(&before->completed_reqs, &after->completed_reqs, QLIST_OFFSET(completed_requests, ql), (bool (*) (void *, void *)) compare_completed_requests); + is_same &= are_qlist_equal(&before->msg_sz_list, &after->msg_sz_list, QLIST_OFFSET(struct msg_size_info, ql), (bool (*) (void *, void *)) compare_msg_size_info); + + is_same &= !memcmp(&before->ross_sample, &after->ross_sample, sizeof(struct ross_model_sample)); + + int num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx); + is_same &= !memcmp(before->known_completed_jobs, after->known_completed_jobs, num_jobs * sizeof(int)); + is_same &= compare_pending_waits(before->wait_op, after->wait_op); + + // Skipped pointer comparisons (used in reverse computation): + // - processed_ops + // - processed_wait_op + // - matched_reqs + // - msg_sz_table + // Pointers used in some data collection (IO) or outside of PDES loop + // - mpi_wkld_samples + // - switch_config + + // There is no need to implement msg_sz_table as all values are already + // accounted for in msg_sz_list. We can safely ignore all values in msg_sz_list + + return is_same; +} + +// Originally implemneted with a prompt on Claude.ai (tedious code, easy to check and produce) +static void print_nw_lp_state(FILE * out, nw_state * state) { + int num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx); + +#if LP_DEBUG + fprintf(out, " num_events_processed = %zu\n", state->num_events_processed); +#endif /* if LP_DEBUG */ + fprintf(out, " num_events_per_lp = %ld\n", state->num_events_per_lp); + fprintf(out, " nw_id = %lu\n", state->nw_id); + fprintf(out, " wrkld_end = %d\n", state->wrkld_end); + fprintf(out, " app_id = %d\n", state->app_id); + fprintf(out, " local_rank = %d\n", state->local_rank); + fprintf(out, " qos_level = %d\n", state->qos_level); + fprintf(out, " synthetic_pattern = %d\n", state->synthetic_pattern); + fprintf(out, " is_finished = %d\n", state->is_finished); + fprintf(out, "num_own_job_ranks_completed = %d\n", state->num_own_job_ranks_completed); + fprintf(out, " known_completed_jobs[%d] = [", num_jobs); + for(int i=0; iknown_completed_jobs[i], i+1==num_jobs ? "" : ", "); + } + fprintf(out, "]\n"); + fprintf(out, " *processed_ops = %p\n", state->processed_ops); + fprintf(out, " *processed_wait_op = %p\n", state->processed_wait_op); + fprintf(out, " *matched_reqs = %p\n", state->matched_reqs); + + // Operation counts + fprintf(out, " num_sends = %lu\n", state->num_sends); + fprintf(out, " num_recvs = %lu\n", state->num_recvs); + fprintf(out, " num_cols = %lu\n", state->num_cols); + fprintf(out, " num_delays = %lu\n", state->num_delays); + fprintf(out, " num_wait = %lu\n", state->num_wait); + fprintf(out, " num_waitall = %lu\n", state->num_waitall); + fprintf(out, " num_waitsome = %lu\n", state->num_waitsome); + + // Timing information + fprintf(out, " start_time = %g\n", state->start_time); + fprintf(out, " col_time = %g\n", state->col_time); + fprintf(out, " reduce_time = %g\n", state->reduce_time); + fprintf(out, " num_reduce = %d\n", state->num_reduce); + fprintf(out, " all_reduce_time = %g\n", state->all_reduce_time); + fprintf(out, " num_all_reduce = %d\n", state->num_all_reduce); + fprintf(out, " elapsed_time = %g\n", state->elapsed_time); + fprintf(out, " compute_time = %g\n", state->compute_time); + fprintf(out, " send_time = %g\n", state->send_time); + fprintf(out, " max_time = %g\n", state->max_time); + fprintf(out, " recv_time = %g\n", state->recv_time); + fprintf(out, " wait_time = %g\n", state->wait_time); + + // Queue heads + fprintf(out, " arrival_queue[%d] = [\n", qlist_count(&state->arrival_queue)); + print_mpi_msgs_queue(out, &state->arrival_queue, " "); + fprintf(out, "]\n"); + fprintf(out, " pending_recvs_queue[%d] = [\n", qlist_count(&state->pending_recvs_queue)); + print_mpi_msgs_queue(out, &state->pending_recvs_queue, " "); + fprintf(out, "]\n"); + + fprintf(out, " completed_reqs[%d] = [\n", qlist_count(&state->completed_reqs)); + completed_requests * current = NULL; + qlist_for_each_entry(current, &state->completed_reqs, ql) { + fprintf(out, " Req: req_id: %u\n", current->req_id); + } + fprintf(out, "]\n"); + + fprintf(out, " cur_interval_end = %g\n", state->cur_interval_end); + fprintf(out, " *wait_op = %p\n", state->wait_op); + if (state->wait_op != NULL) { + fprintf(out, " |.op_type = %d\n", state->wait_op->op_type); + fprintf(out, " |.req_ids = ["); + for(int i = 0; i < state->wait_op->count; i++) { + fprintf(out, "%d%s", state->wait_op->req_ids[i], i+1==state->wait_op->count ? "" : ", "); + } + fprintf(out, "]\n"); + fprintf(out, " |.num_completed = %d\n", state->wait_op->num_completed); + fprintf(out, " |.count = %d\n", state->wait_op->count); + fprintf(out, " |.start_time = %g\n", state->wait_op->start_time); + } + fprintf(out, " msg_sz_list[%d] = [\n", qlist_count(&state->completed_reqs)); + struct msg_size_info * ms_info = NULL; + qlist_for_each_entry(ms_info, &state->msg_sz_list, ql) { + fprintf(out, " MsSizeInfo: msg_size: %lu num_msgs: %d agg_latency: %g avg_latency: %g hash_link.next: %p hash_link.prev: %p\n", ms_info->msg_size, ms_info->num_msgs, ms_info->agg_latency, ms_info->avg_latency, ms_info->hash_link.next, ms_info->hash_link.prev); + } + fprintf(out, "]\n"); + + // Data statistics + fprintf(out, " num_bytes_sent = %llu\n", state->num_bytes_sent); + fprintf(out, " num_bytes_recvd = %llu\n", state->num_bytes_recvd); + fprintf(out, " syn_data = %llu\n", state->syn_data); + fprintf(out, " gen_data = %llu\n", state->gen_data); + + fprintf(out, " prev_switch = %lu\n", state->prev_switch); + fprintf(out, " saved_perm_dest = %d\n", state->saved_perm_dest); + fprintf(out, " rc_perm = %lu\n", state->rc_perm); + + // Sampling information + fprintf(out, " sampling_indx = %d\n", state->sampling_indx); + fprintf(out, " max_arr_size = %d\n", state->max_arr_size); + fprintf(out, "* mpi_wkld_samples = %p\n", state->mpi_wkld_samples); + fprintf(out, " output_buf = %.512s...\n", state->output_buf); + fprintf(out, " col_stats = %.64s...\n", state->col_stats); + + fprintf(out, "ross_sample.\n"); + fprintf(out, " | .nw_id = %lu\n", state->ross_sample.nw_id); + fprintf(out, " | .app_id = %d\n", state->ross_sample.app_id); + fprintf(out, " | .local_rank = %d\n", state->ross_sample.local_rank); + fprintf(out, " | .num_sends = %lu\n", state->ross_sample.num_sends); + fprintf(out, " | .num_recvs = %lu\n", state->ross_sample.num_recvs); + fprintf(out, " | .num_bytes_sent = %llu\n", state->ross_sample.num_bytes_sent); + fprintf(out, " |.num_bytes_recvd = %llu\n", state->ross_sample.num_bytes_recvd); + fprintf(out, " | .send_time = %g\n", state->ross_sample.send_time); + fprintf(out, " | .recv_time = %g\n", state->ross_sample.recv_time); + fprintf(out, " | .wait_time = %g\n", state->ross_sample.wait_time); + fprintf(out, " | .compute_time = %g\n", state->ross_sample.compute_time); + fprintf(out, " | .comm_time = %g\n", state->ross_sample.comm_time); + fprintf(out, " | .max_time = %g\n", state->ross_sample.max_time); + fprintf(out, " | .avg_msg_time = %g\n", state->ross_sample.avg_msg_time); + + // Configuration + fprintf(out, "* switch_config = %p\n", state->switch_config); + fprintf(out, " switch_config_size = %zu\n", state->switch_config_size); +} + +static char const * const MPI_NW_EVENTS_to_string(enum MPI_NW_EVENTS event_type) { + + switch (event_type) { + case MPI_OP_GET_NEXT: return "MPI_OP_GET_NEXT"; + case MPI_SEND_ARRIVED: return "MPI_SEND_ARRIVED"; + case MPI_SEND_ARRIVED_CB: return "MPI_SEND_ARRIVED_CB"; + case MPI_SEND_POSTED: return "MPI_SEND_POSTED"; + case MPI_REND_ARRIVED: return "MPI_REND_ARRIVED"; + case MPI_REND_ACK_ARRIVED: return "MPI_REND_ACK_ARRIVED"; + case CLI_BCKGND_FIN: return "CLI_BCKGND_FIN"; + case CLI_BCKGND_ARRIVE: return "CLI_BCKGND_ARRIVE"; + case CLI_BCKGND_GEN: return "CLI_BCKGND_GEN"; + case CLI_BCKGND_CHANGE: return "CLI_BCKGND_CHANGE"; + case CLI_NBR_FINISH: return "CLI_NBR_FINISH"; + case CLI_OTHER_FINISH: return "CLI_OTHER_FINISH"; + case SURR_SKIP_ITERATION: return "SURR_SKIP_ITERATION"; + default: return "UNKNOWN!!"; + } + +} + +// Original printing function from Claude.ai +static void print_nw_message(FILE * out, struct nw_message * msg) { + // Print main fields + fprintf(out, "msg_type = %s\n", MPI_NW_EVENTS_to_string(msg->msg_type)); + fprintf(out, " op_type = %s\n", op_type_string(msg->op_type)); + fprintf(out, "num_rngs = %d\n", msg->num_rngs); + fprintf(out, "event_rc = %d\n", msg->event_rc); + fprintf(out, " mpi_op = %p\n", msg->mpi_op); + fprint_codes_workload_op(out, msg->mpi_op, " |"); + + fprintf(out, "fwd\n"); + fprintf(out, " | .src_rank = %lu\n", msg->fwd.src_rank); + fprintf(out, " | .dest_rank = %d\n", msg->fwd.dest_rank); + fprintf(out, " | .num_bytes = %ld\n", msg->fwd.num_bytes); + fprintf(out, " | .num_matched = %d\n", msg->fwd.num_matched); + fprintf(out, " |.sim_start_time = %g\n", msg->fwd.sim_start_time); + fprintf(out, " | .msg_send_time = %g\n", msg->fwd.msg_send_time); + fprintf(out, " | .req_id = %u\n", msg->fwd.req_id); + fprintf(out, " | .matched_req = %d\n", msg->fwd.matched_req); + fprintf(out, " | .tag = %d\n", msg->fwd.tag); + fprintf(out, " | .app_id = %d\n", msg->fwd.app_id); + fprintf(out, " | .found_match = %d\n", msg->fwd.found_match); + fprintf(out, " |.wait_completed = %d\n", msg->fwd.wait_completed); + fprintf(out, " | .rend_send = %d\n", msg->fwd.rend_send); + + fprintf(out, "rc\n"); + switch(msg->msg_type) { + case CLI_BCKGND_GEN: + fprintf(out, " |.gen\n"); + fprintf(out, " | .saved_syn_length = %d\n", msg->rc.gen.saved_syn_length); + fprintf(out, " | .saved_perm = %d\n", msg->rc.gen.saved_perm); + fprintf(out, " |.saved_prev_switch = %lu\n", msg->rc.gen.saved_prev_switch); + break; + + case CLI_BCKGND_ARRIVE: + case MPI_SEND_ARRIVED_CB: + fprintf(out, " |arrive.saved_prev_max_time = %g\n", msg->rc.arrive.saved_prev_max_time); + fprintf(out, " | arrive.saved_send_time = %g\n", msg->rc.arrive.saved_send_time); + fprintf(out, " |arrive.saved_send_time_sample = %g\n", msg->rc.arrive.saved_send_time_sample); + break; + + case CLI_BCKGND_CHANGE: + fprintf(out, " | change.saved_send_time = %g\n", msg->rc.change.saved_send_time); + fprintf(out, " | change.saved_marker_time = %g\n", msg->rc.change.saved_marker_time); + break; + + case MPI_OP_GET_NEXT: + fprintf(out, " .mpi_next\n"); + fprintf(out, " |.saved_elapsed_time = %g\n", msg->rc.mpi_next.saved_elapsed_time); + fprintf(out, " |.all_reduce.saved_send_time = %g\n", msg->rc.mpi_next.all_reduce.saved_send_time); + fprintf(out, " |.all_reduce.saved_delay = %g\n", msg->rc.mpi_next.all_reduce.saved_delay); + + fprintf(out, " |.recv.saved_recv_time = %g\n", msg->rc.mpi_next.recv.saved_recv_time); + fprintf(out, " |.recv.saved_recv_time_sample = %g\n", msg->rc.mpi_next.recv.saved_recv_time_sample); + + fprintf(out, " |.delay.saved_delay = %g\n", msg->rc.mpi_next.delay.saved_delay); + fprintf(out, " |.delay.saved_delay_sample = %g\n", msg->rc.mpi_next.delay.saved_delay_sample); + + fprintf(out, " |.mark.saved_marker_time = %g\n", msg->rc.mpi_next.mark.saved_marker_time); + break; + + case MPI_SEND_ARRIVED: + case MPI_REND_ARRIVED: + case MPI_SEND_POSTED: + fprintf(out, " |.mpi_send\n"); + fprintf(out, " | .saved_wait_time = %g\n", msg->rc.mpi_send.saved_wait_time); + fprintf(out, " |.saved_wait_time_sample = %g\n", msg->rc.mpi_send.saved_wait_time_sample); + fprintf(out, " | .saved_recv_time = %g\n", msg->rc.mpi_send.saved_recv_time); + fprintf(out, " |.saved_recv_time_sample = %g\n", msg->rc.mpi_send.saved_recv_time_sample); + fprintf(out, " | .saved_num_bytes = %lu\n", msg->rc.mpi_send.saved_num_bytes); + break; + + case MPI_REND_ACK_ARRIVED: + fprintf(out, " | mpi_ack.saved_num_bytes = %ld\n", msg->rc.mpi_ack.saved_num_bytes); + break; + + case SURR_SKIP_ITERATION: + fprintf(out, " | surr.config_used = %p\n", msg->rc.surr.config_used); + break; + + default: break; } } @@ -3469,9 +3966,23 @@ const tw_lptype* nw_get_lp_type() return(&nw_lp); } +// ROSS function pointer table to check reverse event handler +crv_checkpointer nw_lp_chkptr = { + &nw_lp, + 0, + (save_checkpoint_state_f) save_nw_lp_state, + (clean_checkpoint_state_f) clean_nw_lp_state, + (check_states_f) check_nw_lp_state, + (print_lpstate_f) print_nw_lp_state, + (print_checkpoint_state_f) print_nw_lp_state, + (print_event_f) print_nw_message, +}; + static void nw_add_lp_type() { lp_type_register("nw-lp", nw_get_lp_type()); + // registering custom print for nw_lp LPs + crv_add_custom_state_checkpoint(&nw_lp_chkptr); } /* setup for the ROSS event tracing From 6e97889fa403b719bbe7e6db6cf7d0ba0fc09164 Mon Sep 17 00:00:00 2001 From: helq Date: Mon, 24 Feb 2025 14:31:16 -0500 Subject: [PATCH 022/110] Fixing minor reversibility bugs in LP type nw_state --- codes/quicklist.h | 19 +++++++ src/network-workloads/model-net-mpi-replay.c | 53 ++++++++------------ 2 files changed, 41 insertions(+), 31 deletions(-) diff --git a/codes/quicklist.h b/codes/quicklist.h index 5ca78730..bacc2c44 100644 --- a/codes/quicklist.h +++ b/codes/quicklist.h @@ -276,6 +276,25 @@ static inline int qlist_count(struct qlist_head const *list) return count; } +static inline void qlist_add_at_index(struct qlist_head *newi, struct qlist_head *list, int index) +{ + if (index < 0) + { + while(index++) + { + list = list->prev; + } + } + else + { + while(index--) + { + list = list->next; + } + } + __qlist_add(newi, list, list->next); +} + static inline struct qlist_head * qlist_find( struct qlist_head *list, int (*compare)(struct qlist_head *, void *), diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 8f944a3c..902e82d9 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -864,6 +864,7 @@ void finish_bckgnd_traffic_rc( (void)lp; ns->is_finished = 0; + ns->elapsed_time = msg->rc.mpi_next.saved_elapsed_time; return; } void finish_bckgnd_traffic( @@ -875,6 +876,7 @@ void finish_bckgnd_traffic( (void)b; (void)msg; ns->is_finished = 1; + msg->rc.mpi_next.saved_elapsed_time = ns->elapsed_time; ns->elapsed_time = tw_now(lp) - ns->start_time; printf("\n LP %llu App %d completed sending data %llu completed at time %lf ", LLU(lp->gid),ns->app_id, ns->gen_data, tw_now(lp)); @@ -1331,39 +1333,24 @@ static int clear_completed_reqs(nw_state * s, (void)s; (void)lp; - int i, matched = 0; + int matched = 0; - for( i = 0; i < count; i++) - { - struct qlist_head * ent = NULL; - struct completed_requests * current = NULL; - struct completed_requests * prev = NULL; + struct qlist_head * ent, * _; + struct completed_requests * current = NULL; - int index = 0; - qlist_for_each(ent, &s->completed_reqs) - { - if(prev) - { - rc_stack_push(lp, prev, free, s->matched_reqs); - prev = NULL; - } - - current = qlist_entry(ent, completed_requests, ql); - current->index = index; - if(current->req_id == reqs[i]) - { + int index = 0; + qlist_for_each_safe(ent, _, &s->completed_reqs) { + current = qlist_entry(ent, completed_requests, ql); + for(int i = 0; i < count; i++) { + if(current->req_id == reqs[i]) { + current->index = index; ++matched; - qlist_del(¤t->ql); - prev = current; + qlist_del(ent); + rc_stack_push(lp, current, free, s->matched_reqs); + break; } - ++index; - } - - if(prev) - { - rc_stack_push(lp, prev, free, s->matched_reqs); - prev = NULL; - } + } + index++; } return matched; } @@ -1376,7 +1363,7 @@ static void add_completed_reqs(nw_state * s, { struct completed_requests * req = (struct completed_requests*)rc_stack_pop(s->matched_reqs); // turn on only if wait-all unmatched error arises in optimistic mode. - qlist_add(&req->ql, &s->completed_reqs); + qlist_add_at_index(&req->ql, &s->completed_reqs, req->index - count + i + 1); }//end for } @@ -1677,6 +1664,7 @@ static int rm_matching_rcv(nw_state * ns, && ((qi->source_rank == qitem->source_rank) || qi->source_rank == -1)) { matched = 1; + m->rc.mpi_send.saved_num_bytes = qi->num_bytes; qi->num_bytes = qitem->num_bytes; break; } @@ -2090,7 +2078,7 @@ static void codes_exec_mpi_send(nw_state* s, if(lp->gid == TRACK_LP) printf("\n Sender rank %llu global dest rank %d dest-rank %d bytes %"PRIu64" Tag %d", LLU(s->nw_id), global_dest_rank, mpi_op->u.send.dest_rank, mpi_op->u.send.num_bytes, mpi_op->u.send.tag); - m->rc.mpi_ack.saved_num_bytes = mpi_op->u.send.num_bytes; + m->rc.mpi_ack.saved_num_bytes = mpi_op->u.send.num_bytes; /* model-net event */ tw_lpid dest_rank = codes_mapping_get_lpid_from_relative(global_dest_rank, NULL, "nw-lp", NULL, 0); @@ -2380,6 +2368,7 @@ static void update_arrival_queue_rc(nw_state* s, if(m->fwd.found_match >= 0) { mpi_msgs_queue * qi = (mpi_msgs_queue*)rc_stack_pop(s->processed_ops); + qi->num_bytes = m->rc.mpi_send.saved_num_bytes; // int queue_count = qlist_count(&s->pending_recvs_queue); if(m->fwd.found_match == 0) @@ -2926,6 +2915,7 @@ static void get_next_mpi_operation_rc(nw_state* s, tw_bf * bf, nw_message * m, t if(m->op_type == CODES_WK_END) { s->is_finished = 0; + s->elapsed_time = m->rc.mpi_next.saved_elapsed_time; if(bf->c9) return; @@ -3040,6 +3030,7 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l if(mpi_op->op_type == CODES_WK_END) { + m->rc.mpi_next.saved_elapsed_time = s->elapsed_time; s->elapsed_time = tw_now(lp) - s->start_time; s->is_finished = 1; From a3e638e73fbe513a1e55bc46d7082c51879c53a9 Mon Sep 17 00:00:00 2001 From: helq Date: Sun, 2 Mar 2025 15:48:39 -0500 Subject: [PATCH 023/110] Adding checkpointer functionality to model-net sub-models --- codes/model-net-method.h | 1 + src/networks/model-net/dragonfly-custom.C | 2 + src/networks/model-net/dragonfly-dally.C | 183 +++++++++++++++++++++- src/networks/model-net/dragonfly-plus.C | 2 + src/networks/model-net/slimfly.c | 2 + 5 files changed, 188 insertions(+), 2 deletions(-) diff --git a/codes/model-net-method.h b/codes/model-net-method.h index b6bb01ab..eab2dc7c 100644 --- a/codes/model-net-method.h +++ b/codes/model-net-method.h @@ -71,6 +71,7 @@ struct model_net_method event_f cc_congestion_event_fn; revent_f cc_congestion_event_rc_fn; commit_f cc_congestion_event_commit_fn; + crv_checkpointer * checkpointer; }; extern struct model_net_method * method_array[]; diff --git a/src/networks/model-net/dragonfly-custom.C b/src/networks/model-net/dragonfly-custom.C index 934827a2..cf7bf21b 100644 --- a/src/networks/model-net/dragonfly-custom.C +++ b/src/networks/model-net/dragonfly-custom.C @@ -4022,6 +4022,7 @@ struct model_net_method dragonfly_custom_method = NULL,//(final_f)dragonfly_custom_sample_fin custom_dragonfly_register_model_types, custom_dragonfly_get_model_types, + NULL, }; struct model_net_method dragonfly_custom_router_method = @@ -4044,6 +4045,7 @@ struct model_net_method dragonfly_custom_router_method = NULL,//(final_f)dragonfly_custom_rsample_fin custom_router_register_model_types, custom_dfly_router_get_model_types, + NULL, }; #ifdef ENABLE_CORTEX diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 25bccfe6..76496e9f 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -6924,8 +6924,163 @@ static void router_dally_rc_event_handler(router_state * s, tw_bf * bf, msg->num_rngs = 0; } +//*** ---------- START OF reverse handler checking functions ---------- *** +// Print fuction originally constructed with help from Claude.ai +static void print_terminal_state(FILE * out, terminal_state * state) { + fprintf(out, "terminal_state (dragonfly-dally) ->\n"); + fprintf(out, " | packet_counter = %ld\n", state->packet_counter); + fprintf(out, " | packet_gen = %d\n", state->packet_gen); + fprintf(out, " | packet_fin = %d\n", state->packet_fin); + fprintf(out, " | total_gen_size = %d\n", state->total_gen_size); + fprintf(out, " | * router_lp = %p\n", state->router_lp); + fprintf(out, " | * router_id = %p\n", state->router_id); + fprintf(out, " | terminal_id = %u\n", state->terminal_id); + fprintf(out, " | connMan = \n"); + fprintf(out, " | *local_congestion_controller = %p\n", state->local_congestion_controller); + fprintf(out, " | workload_lpid_to_app_id = \n"); + fprintf(out, " | app_ids = \n"); + fprintf(out, " | workloads_finished_flag = %d\n", state->workloads_finished_flag); + fprintf(out, " | ** vc_occupancy = %p\n", state->vc_occupancy); + fprintf(out, " | *terminal_available_time = %p\n", state->terminal_available_time); + fprintf(out, " | *** terminal_msgs = %p\n", state->terminal_msgs); + fprintf(out, " | *** terminal_msgs_tail = %p\n", state->terminal_msgs_tail); + fprintf(out, " | * in_send_loop = %p\n", state->in_send_loop); + fprintf(out, " | dragonfly_stats_array = \n"); + fprintf(out, " | ** qos_status = %p\n", state->qos_status); + fprintf(out, " | ** qos_data = %p\n", state->qos_data); + fprintf(out, " | * last_qos_lvl = %p\n", state->last_qos_lvl); + fprintf(out, " | is_monitoring_bw = %d\n", state->is_monitoring_bw); + fprintf(out, " | * st = %p\n", state->st); + fprintf(out, " | * cc_st = %p\n", state->cc_st); + fprintf(out, " | * issueIdle = %p\n", state->issueIdle); + fprintf(out, " | ** terminal_length = %p\n", state->terminal_length); + fprintf(out, " | * anno = %s\n", state->anno ? state->anno : "(nil)"); + fprintf(out, " | * params = %p\n", state->params); + fprintf(out, " | * rank_tbl = %p\n", state->rank_tbl); + fprintf(out, " | rank_tbl_pop = %lu\n", state->rank_tbl_pop); + fprintf(out, " | total_time = %f\n", state->total_time); + fprintf(out, " | total_msg_size = %lu\n", state->total_msg_size); + fprintf(out, " | total_hops = %f\n", state->total_hops); + fprintf(out, " | finished_msgs = %ld\n", state->finished_msgs); + fprintf(out, " | finished_chunks = %ld\n", state->finished_chunks); + fprintf(out, " | finished_packets = %ld\n", state->finished_packets); + fprintf(out, " | * last_buf_full = %p\n", state->last_buf_full); + fprintf(out, " | * busy_time = %p\n", state->busy_time); + fprintf(out, " | * link_traffic = %p\n", state->link_traffic); + fprintf(out, " | * total_chunks = %p\n", state->total_chunks); + fprintf(out, " | * stalled_chunks = %p\n", state->stalled_chunks); + fprintf(out, " | injected_chunks = %lu\n", state->injected_chunks); + fprintf(out, " | ejected_chunks = %lu\n", state->ejected_chunks); + fprintf(out, " | max_latency = %f\n", state->max_latency); + fprintf(out, " | min_latency = %f\n", state->min_latency); + fprintf(out, " | output_buf = '%.4096s'\n", state->output_buf); + fprintf(out, " | output_buf2 = '%.4096s'\n", state->output_buf2); + fprintf(out, " | fin_chunks_sample = %ld\n", state->fin_chunks_sample); + fprintf(out, " | data_size_sample = %ld\n", state->data_size_sample); + fprintf(out, " | fin_hops_sample = %f\n", state->fin_hops_sample); + fprintf(out, " | fin_chunks_time = %f\n", state->fin_chunks_time); + fprintf(out, " | * busy_time_sample = %p\n", state->busy_time_sample); + fprintf(out, " | sample_buf = '%.4096s'\n", state->sample_buf); + fprintf(out, " | * sample_stat = %p\n", state->sample_stat); + fprintf(out, " | op_arr_size = %d\n", state->op_arr_size); + fprintf(out, " | max_arr_size = %d\n", state->max_arr_size); + fprintf(out, " | fwd_events = %ld\n", state->fwd_events); + fprintf(out, " | rev_events = %ld\n", state->rev_events); + fprintf(out, " | fin_chunks_ross_sample = %ld\n", state->fin_chunks_ross_sample); + fprintf(out, " | data_size_ross_sample = %ld\n", state->data_size_ross_sample); + fprintf(out, " | fin_hops_ross_sample = %ld\n", state->fin_hops_ross_sample); + fprintf(out, " | fin_chunks_time_ross_sample = %f\n", state->fin_chunks_time_ross_sample); + fprintf(out, " | * busy_time_ross_sample = %p\n", state->busy_time_ross_sample); + fprintf(out, " | ross_sample = \n"); + fprintf(out, " | sent_packets = \n"); + fprintf(out, " | last_packet_sent_id = %lu\n", state->last_packet_sent_id); + fprintf(out, " | arrival_of_last_packet = {packet_ID: %lu, travel_end_time: %f}\n", state->arrival_of_last_packet.packet_ID, state->arrival_of_last_packet.travel_end_time); + fprintf(out, " | remaining_sz_packets = \n"); + fprintf(out, " | last_in_queue_time = %f\n", state->last_in_queue_time); + fprintf(out, " | * predictor_data = %p\n", state->predictor_data); + fprintf(out, " | zombies = \n"); + fprintf(out, " | * frozen_state = %p\n", state->frozen_state); +} + +// Print fuction originally constructed with help from Claude.ai +static void print_terminal_dally_message(FILE * out, struct terminal_dally_message * msg) { + fprintf(out, "terminal_dally_message ->\n"); + fprintf(out, " | magic = %d\n", msg->magic); + fprintf(out, " | travel_start_time = %f\n", msg->travel_start_time); + fprintf(out, " | travel_end_time = %f\n", msg->travel_end_time); + fprintf(out, " | packet_ID = %llu\n", msg->packet_ID); + fprintf(out, " | type = %d\n", msg->type); + fprintf(out, " | notify_type = %d\n", msg->notify_type); + fprintf(out, " | category = %s\n", msg->category); + fprintf(out, " | final_dest_gid = %lu\n", msg->final_dest_gid); + fprintf(out, " | sender_lp = %lu\n", msg->sender_lp); + fprintf(out, " | sender_mn_lp = %lu\n", msg->sender_mn_lp); + fprintf(out, " | dest_terminal_lpid = %lu\n", msg->dest_terminal_lpid); + fprintf(out, " | dfdally_src_terminal_id = %u\n", msg->dfdally_src_terminal_id); + fprintf(out, " | dfdally_dest_terminal_id = %u\n", msg->dfdally_dest_terminal_id); + fprintf(out, " | src_terminal_id = %u\n", msg->src_terminal_id); + fprintf(out, " | origin_router_id = %u\n", msg->origin_router_id); + fprintf(out, " | app_id = %d\n", msg->app_id); + fprintf(out, " | my_N_hop = %d\n", msg->my_N_hop); + fprintf(out, " | my_l_hop = %d\n", msg->my_l_hop); + fprintf(out, " | my_g_hop = %d\n", msg->my_g_hop); + fprintf(out, " | my_hops_cur_group = %d\n", msg->my_hops_cur_group); + fprintf(out, " | saved_channel = %d\n", msg->saved_channel); + fprintf(out, " | saved_vc = %d\n", msg->saved_vc); + fprintf(out, " | next_stop = %d\n", msg->next_stop); + fprintf(out, " | this_router_arrival = %f\n", msg->this_router_arrival); + fprintf(out, " | this_router_ptp_latency = %f\n", msg->this_router_ptp_latency); + fprintf(out, " | intm_lp_id = %u\n", msg->intm_lp_id); + fprintf(out, " | last_hop = %d\n", msg->last_hop); + fprintf(out, " | is_intm_visited = %d\n", msg->is_intm_visited); + fprintf(out, " | intm_rtr_id = %d\n", msg->intm_rtr_id); + fprintf(out, " | intm_grp_id = %d\n", msg->intm_grp_id); + fprintf(out, " | saved_src_dest = %d\n", msg->saved_src_dest); + fprintf(out, " | saved_src_chan = %d\n", msg->saved_src_chan); + fprintf(out, " | chunk_id = %u\n", msg->chunk_id); + fprintf(out, " | packet_size = %u\n", msg->packet_size); + fprintf(out, " | message_id = %u\n", msg->message_id); + fprintf(out, " | total_size = %u\n", msg->total_size); + fprintf(out, " | remote_event_size_bytes = %d\n", msg->remote_event_size_bytes); + fprintf(out, " | local_event_size_bytes = %d\n", msg->local_event_size_bytes); + fprintf(out, " | vc_index = %d\n", msg->vc_index); + fprintf(out, " | rail_id = %d\n", msg->rail_id); + fprintf(out, " | output_chan = %d\n", msg->output_chan); + fprintf(out, " | event_rc = \n"); + fprintf(out, " | is_pull = %d\n", msg->is_pull); + fprintf(out, " | pull_size = %u\n", msg->pull_size); + fprintf(out, " | path_type = %d\n", msg->path_type); + fprintf(out, " | saved_app_id = %d\n", msg->saved_app_id); + fprintf(out, " | is_there_another_pckt_in_queue = %s\n", msg->is_there_another_pckt_in_queue ? "true" : "false"); + fprintf(out, " | num_rngs = %d\n", msg->num_rngs); + fprintf(out, " | num_cll = %d\n", msg->num_cll); + fprintf(out, " | last_saved_qos = %d\n", msg->last_saved_qos); + fprintf(out, " | qos_reset1 = %d\n", msg->qos_reset1); + fprintf(out, " | qos_reset2 = %d\n", msg->qos_reset2); + fprintf(out, " | rc_is_qos_set = %d\n", msg->rc_is_qos_set); + fprintf(out, " | * rc_qos_data = %p\n", msg->rc_qos_data); + fprintf(out, " | * rc_qos_status = %p\n", msg->rc_qos_status); + fprintf(out, " | saved_send_loop = %d\n", msg->saved_send_loop); + fprintf(out, " | saved_available_time = %f\n", msg->saved_available_time); + fprintf(out, " | saved_min_lat = %f\n", msg->saved_min_lat); + fprintf(out, " | saved_avg_time = %f\n", msg->saved_avg_time); + fprintf(out, " | saved_rcv_time = %f\n", msg->saved_rcv_time); + fprintf(out, " | saved_busy_time = %f\n", msg->saved_busy_time); + fprintf(out, " | saved_total_time = %f\n", msg->saved_total_time); + fprintf(out, " | saved_sample_time = %f\n", msg->saved_sample_time); + fprintf(out, " | msg_start_time = %f\n", msg->msg_start_time); + fprintf(out, " | saved_busy_time_ross = %f\n", msg->saved_busy_time_ross); + fprintf(out, " | saved_fin_chunks_ross = %f\n", msg->saved_fin_chunks_ross); + fprintf(out, " | saved_last_in_queue_time = %f\n", msg->saved_last_in_queue_time); + fprintf(out, " | saved_next_packet_delay = %f\n", msg->saved_next_packet_delay); + fprintf(out, " | msg_new_mn_event = %f\n", msg->msg_new_mn_event); + fprintf(out, " | last_received_time = %f\n", msg->last_received_time); + fprintf(out, " | last_sent_time = %f\n", msg->last_sent_time); + fprintf(out, " | last_bufupdate_time = %f\n", msg->last_bufupdate_time); +} +//*** ---------- END OF reverse handler checking functions ---------- *** + /* dragonfly compute node and router LP types */ -extern "C" { tw_lptype dragonfly_dally_lps[] = { // Terminal handling functions @@ -6951,7 +7106,29 @@ tw_lptype dragonfly_dally_lps[] = }, {NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0}, }; -} + +crv_checkpointer dragonfly_dally_checkpointers[] = { + { + &dragonfly_dally_lps[0], + 0, + (save_checkpoint_state_f) NULL, + (clean_checkpoint_state_f) NULL, + (check_states_f) NULL, + (print_lpstate_f) print_terminal_state, + (print_checkpoint_state_f) NULL, + (print_event_f) print_terminal_dally_message, + }, + { + &dragonfly_dally_lps[1], + 0, + (save_checkpoint_state_f) NULL, + (clean_checkpoint_state_f) NULL, + (check_states_f) NULL, + (print_lpstate_f) NULL, + (print_checkpoint_state_f) NULL, + (print_event_f) NULL, + }, +}; /* returns the dragonfly lp type for lp registration */ static const tw_lptype* dragonfly_dally_get_cn_lp_type(void) @@ -8221,6 +8398,7 @@ struct model_net_method dragonfly_dally_method = (event_f)dragonfly_dally_terminal_congestion_event, (revent_f)dragonfly_dally_terminal_congestion_event_rc, (commit_f)dragonfly_dally_terminal_congestion_event_commit, + &dragonfly_dally_checkpointers[0], }; struct model_net_method dragonfly_dally_router_method = @@ -8248,6 +8426,7 @@ struct model_net_method dragonfly_dally_router_method = (event_f)dragonfly_dally_router_congestion_event, (revent_f)dragonfly_dally_router_congestion_event_rc, (commit_f)dragonfly_dally_router_congestion_event_commit, + &dragonfly_dally_checkpointers[1], }; // #ifdef ENABLE_CORTEX diff --git a/src/networks/model-net/dragonfly-plus.C b/src/networks/model-net/dragonfly-plus.C index 96334f0a..141b7ce8 100644 --- a/src/networks/model-net/dragonfly-plus.C +++ b/src/networks/model-net/dragonfly-plus.C @@ -6619,6 +6619,7 @@ struct model_net_method dragonfly_plus_method = { NULL, //(final_f)dragonfly_plus_sample_fin, dfly_plus_register_model_types, dfly_plus_get_model_types, + NULL, }; struct model_net_method dragonfly_plus_router_method = { @@ -6640,6 +6641,7 @@ struct model_net_method dragonfly_plus_router_method = { NULL, //(final_f)dragonfly_plus_rsample_fin, dfly_plus_router_register_model_types, dfly_plus_router_get_model_types, + NULL, }; // #ifdef ENABLE_CORTEX diff --git a/src/networks/model-net/slimfly.c b/src/networks/model-net/slimfly.c index 94188942..eee9cd74 100644 --- a/src/networks/model-net/slimfly.c +++ b/src/networks/model-net/slimfly.c @@ -4045,6 +4045,7 @@ struct model_net_method slimfly_method = NULL, slimfly_register_model_types, slimfly_get_cn_model_types, + NULL, }; struct model_net_method slimfly_router_method = @@ -4067,6 +4068,7 @@ struct model_net_method slimfly_router_method = NULL, slimfly_router_register_model_types, slimfly_get_router_model_types, + NULL, }; From e430feade5935287734ae913c1c146887ed70b04 Mon Sep 17 00:00:00 2001 From: helq Date: Sun, 2 Mar 2025 15:51:08 -0500 Subject: [PATCH 024/110] Moving implementation of linked list equality to quicklist.h --- codes/quicklist.h | 33 ++++++++++++++++++++ src/network-workloads/model-net-mpi-replay.c | 29 ----------------- 2 files changed, 33 insertions(+), 29 deletions(-) diff --git a/codes/quicklist.h b/codes/quicklist.h index bacc2c44..0a73b761 100644 --- a/codes/quicklist.h +++ b/codes/quicklist.h @@ -30,6 +30,8 @@ extern "C" { #endif #include +#include +#include struct qlist_head { struct qlist_head *next, *prev; @@ -311,6 +313,37 @@ static inline struct qlist_head * qlist_find( return NULL; } +/** + * are_qlist_equal - determine if two qlists have the same elements + */ +static inline bool are_qlist_equal(struct qlist_head const * left, struct qlist_head const * right, unsigned int offset_ql, bool (cmp) (void *, void *)) { + int const num_elems = qlist_count(left); + if (num_elems != qlist_count(right)) { + return false; + } + + // Checking element by element + int i = 0; + struct qlist_head * elem_left = left->next; + struct qlist_head * elem_right = right->next; + while (elem_left != left) { + char * entry_left = (char *)(elem_left) - offset_ql; + char * entry_right = (char *)(elem_right) - offset_ql; + + if (!cmp(entry_left, entry_right)) { + return false; + } + + elem_left = elem_left->next; + elem_right = elem_right->next; + i++; + } + assert(i == num_elems); + assert(elem_right == right); + + return true; +} + /* * Local variables: * c-indent-level: 4 diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 902e82d9..41597968 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -3459,35 +3459,6 @@ static void free_qlist_cpy(struct qlist_head * into, unsigned int offset_ql) { } } -// Assumes that ql is at the end of entry!! -static bool are_qlist_equal(struct qlist_head const * left, struct qlist_head const * right, unsigned int offset_ql, bool (cmp) (void *, void *)) { - int const num_elems = qlist_count(left); - if (num_elems != qlist_count(right)) { - return false; - } - - // Checking element by element - int i = 0; - struct qlist_head * elem_left = left->next; - struct qlist_head * elem_right = right->next; - while (elem_left != left) { - char * entry_left = (char *)(elem_left) - offset_ql; - char * entry_right = (char *)(elem_right) - offset_ql; - - if (!cmp(entry_left, entry_right)) { - return false; - } - - elem_left = elem_left->next; - elem_right = elem_right->next; - i++; - } - assert(i == num_elems); - assert(elem_right == right); - - return true; -} - bool compare_pending_waits(struct pending_waits const * before, struct pending_waits const * after) { // if one is null and the other isn't, then they're not equal if ((before == NULL) != (after == NULL)) { From 8b95a700a58941580dca3f3508b19a9dd4aa939e Mon Sep 17 00:00:00 2001 From: helq Date: Sun, 2 Mar 2025 15:54:15 -0500 Subject: [PATCH 025/110] Fixing some potential memory errors (from Valgrind) --- src/networks/model-net/dragonfly-dally.C | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 76496e9f..6675ca4b 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -1858,7 +1858,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params) if (p->num_rails % p->num_planes != 0) tw_error(TW_LOC, "Number of rails not evenly divisible by number of planes!\n"); - char rail_select_str[MAX_NAME_LENGTH]; + char rail_select_str[MAX_NAME_LENGTH] = {'\0'}; rc = configuration_get_value(&config, "PARAMS", "rail_select", anno, rail_select_str, MAX_NAME_LENGTH); if(strcmp(rail_select_str, "dedicated") == 0) @@ -1883,7 +1883,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params) fprintf(stderr, "global_k_picks for global adaptive routing not specified, setting to %d\n",p->global_k_picks); } - char scoring_str[MAX_NAME_LENGTH]; + char scoring_str[MAX_NAME_LENGTH] = {'\0'}; configuration_get_value(&config, "PARAMS", "route_scoring_metric", anno, scoring_str, MAX_NAME_LENGTH); if (strcmp(scoring_str, "alpha") == 0) { scoring = ALPHA; @@ -1978,7 +1978,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params) // read intra group connections, store from a router's perspective // all links to the same router form a vector - char intraFile[MAX_NAME_LENGTH]; + char intraFile[MAX_NAME_LENGTH] = {'\0'}; configuration_get_value(&config, "PARAMS", "intra-group-connections", anno, intraFile, MAX_NAME_LENGTH); if (strlen(intraFile) <= 0) { @@ -2035,7 +2035,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params) // read inter group connections, store from a router's perspective // also create a group level table that tells all the connecting routers - char interFile[MAX_NAME_LENGTH]; + char interFile[MAX_NAME_LENGTH] = {'\0'}; configuration_get_value(&config, "PARAMS", "inter-group-connections", anno, interFile, MAX_NAME_LENGTH); if(strlen(interFile) <= 0) { @@ -2100,7 +2100,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params) //read link failure file - char failureFileName[MAX_NAME_LENGTH]; + char failureFileName[MAX_NAME_LENGTH] = {'\0'}; failureFileName[0] = '\0'; if (strlen(g_nm_link_failure_filepath) == 0) //was this defined already via a command line argument? From c9729d81a7213e452ad4755330fe355331aa3661 Mon Sep 17 00:00:00 2001 From: helq Date: Sun, 2 Mar 2025 16:06:12 -0500 Subject: [PATCH 026/110] Extending implementation of model-net checkpointer --- codes/model-net-sched.h | 7 + src/networks/model-net/core/model-net-lp.c | 164 +++++++++++++++--- src/networks/model-net/core/model-net-sched.c | 60 +++++++ 3 files changed, 211 insertions(+), 20 deletions(-) diff --git a/codes/model-net-sched.h b/codes/model-net-sched.h index 9f685b85..da28ddc2 100644 --- a/codes/model-net-sched.h +++ b/codes/model-net-sched.h @@ -197,6 +197,13 @@ void model_net_sched_add_rc( // set default parameters for messages that don't specify any void model_net_sched_set_default_params(mn_sched_params *sched_params); +// Reverse handler functionality +void save_model_net_sched(model_net_sched *before, model_net_sched const *after); +void clean_model_net_sched(model_net_sched *before); +bool check_model_net_sched(model_net_sched *before, model_net_sched *after); +void print_model_net_sched(FILE * out, model_net_sched *sched); +void print_model_net_sched_checkpoint(FILE * out, model_net_sched *sched); + extern char * sched_names[]; #ifdef __cplusplus diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c index 8a52c7da..966137fe 100644 --- a/src/networks/model-net/core/model-net-lp.c +++ b/src/networks/model-net/core/model-net-lp.c @@ -131,19 +131,23 @@ tw_lptype model_net_base_lp = { sizeof(model_net_base_state), }; -// Functionality to check for correct implementation of reverse event handler +// Functionality to check for correct implementation of reverse event handler +static void save_state_net_state(model_net_base_state * into, model_net_base_state const * from); +static void clean_state_net_state(model_net_base_state * state); +static bool check_model_net_state(model_net_base_state * before, model_net_base_state * after); static void print_model_net_state(FILE * out, model_net_base_state * state); +static void print_model_net_checkpoint(FILE * out, model_net_base_state * state); static void print_event_state(FILE * out, model_net_wrap_msg * state); // ROSS function pointer table to check reverse event handler crv_checkpointer model_net_chkptr = { &model_net_base_lp, 0, - (save_checkpoint_state_f) NULL, - (clean_checkpoint_state_f) NULL, - (check_states_f) NULL, + (save_checkpoint_state_f) save_state_net_state, + (clean_checkpoint_state_f) clean_state_net_state, + (check_states_f) check_model_net_state, (print_lpstate_f) print_model_net_state, - (print_checkpoint_state_f) print_model_net_state, + (print_checkpoint_state_f) print_model_net_checkpoint, (print_event_f) print_event_state, }; @@ -1140,22 +1144,140 @@ tw_event* model_net_method_congestion_event(tw_lpid dest_gid, } /* START Checking reverse handler functionality */ +static void save_state_net_state(model_net_base_state * into, model_net_base_state const * from) { + memcpy(into, from, sizeof(model_net_base_state)); + + into->in_sched_send_loop = malloc(from->params->num_queues * sizeof(int)); + for (int i=0; i < from->params->num_queues; i++) { + into->in_sched_send_loop[i] = from->in_sched_send_loop[i]; + } + + into->sched_send = malloc(from->params->num_queues * sizeof(model_net_sched*)); + if (from->params->num_queues > 0) { + model_net_sched * sched_send_array = malloc(from->params->num_queues * sizeof(model_net_sched)); + for(int i = 0; i < from->params->num_queues; i++) { + into->sched_send[i] = &sched_send_array[i]; + save_model_net_sched(into->sched_send[i], from->sched_send[i]); + } + } + + into->sched_recv = malloc(sizeof(model_net_sched)); + save_model_net_sched(into->sched_recv, from->sched_recv); + + into->sub_state = NULL; + crv_checkpointer * chptr = method_array[from->net_id]->checkpointer; + if (chptr && chptr->check_lps) { + into->sub_state = calloc(1, from->sub_type->state_sz); + chptr->save_lp(into->sub_state, from->sub_state); + } + + into->node_copy_next_available_time = malloc(from->params->node_copy_queues * sizeof(tw_stime)); + for (int i=0; i < from->params->node_copy_queues; i++) { + into->node_copy_next_available_time[i] = from->node_copy_next_available_time[i]; + } +} + +static void clean_state_net_state(model_net_base_state * state) { + free(state->in_sched_send_loop); + + if (state->params->num_queues > 0) { + for(int i = 0; i < state->params->num_queues; i++) { + clean_model_net_sched(state->sched_send[i]); + } + } + free(state->sched_send[0]); + free(state->sched_send); + clean_model_net_sched(state->sched_recv); + free(state->sched_recv); + + if (state->sub_state != NULL) { + free(state->sub_state); + } + free(state->node_copy_next_available_time); +} + +static bool check_model_net_state(model_net_base_state * before, model_net_base_state * after) { + bool is_same = true; + is_same &= before->net_id == after->net_id; + is_same &= before->nics_per_router == after->nics_per_router; + for (int i=0; i < before->params->num_queues; i++) { + is_same &= before->in_sched_send_loop[i] == after->in_sched_send_loop[i]; + } + is_same &= before->in_sched_recv_loop == after->in_sched_recv_loop; + is_same &= before->msg_id == after->msg_id; + for(int i = 0; i < before->params->num_queues; i++) { + is_same &= check_model_net_sched(before->sched_send[i], after->sched_send[i]); + } + is_same &= check_model_net_sched(before->sched_recv, after->sched_recv); + crv_checkpointer * chptr = method_array[before->net_id]->checkpointer; + if (chptr && before->sub_state != NULL && chptr->check_lps) { + is_same &= chptr->check_lps(before->sub_state, after->sub_state); + } else { + tw_error(TW_LOC, "Network of type \"%s\" has not been configured to be checkpointed", model_net_method_names[before->net_id]); + } + is_same &= before->next_available_time == after->next_available_time; + for (int i=0; i < before->params->node_copy_queues; i++) { + is_same &= before->node_copy_next_available_time[i] == after->node_copy_next_available_time[i]; + } + + return is_same; +} + +static void __print_model_net(FILE * out, model_net_base_state * state, bool is_lp_state) { + fprintf(out, "model_net_state ->\n"); + fprintf(out, " | net_id = %d\n", state->net_id); + fprintf(out, " | nics_per_router = %d\n", state->nics_per_router); + fprintf(out, " | *in_sched_send_loop[%d] = [", state->params->num_queues); // (done) deep-all + for (int i=0; i < state->params->num_queues; i++) { + fprintf(out, "%d%s", state->in_sched_send_loop[i], i==state->params->num_queues-1 ? "" : ", "); + } + fprintf(out, "]\n"); + fprintf(out, " | in_sched_recv_loop = %d\n", state->in_sched_recv_loop); + fprintf(out, " | msg_id = %lu\n", state->msg_id); + fprintf(out, " | ** sched_send = %p\n", state->sched_send); // (done) deep-all + fprintf(out, " | * sched_recv = %p\n", state->sched_recv); // (done) deep-all + fprintf(out, " | * params = %p\n", state->params); + fprintf(out, " | * sub_type = %p\n", state->sub_type); + fprintf(out, " | * sub_model_type = %p\n", state->sub_model_type); + fprintf(out, " | * sub_state = %p\n", state->sub_state); // deep-all + fprintf(out, " | next_available_time = %f\n", state->next_available_time); + fprintf(out, " | *node_copy_next_available_time[%d] = [", state->params->num_queues); // (done) deep-all + for (int i=0; i < state->params->node_copy_queues; i++) { + fprintf(out, "%g%s", state->node_copy_next_available_time[i], i==state->params->node_copy_queues-1 ? "" : ", "); + } + fprintf(out, "]\n"); + fprintf(out, " | *sched_loop_pre_surrogate = %p\n", state->sched_loop_pre_surrogate); // no need to check + fprintf(out, " | sched_recv_loop_pre_surrogate = %d\n", state->sched_recv_loop_pre_surrogate); // no need to check + + void (*print_modelnet) (FILE * out, model_net_sched *sched) = is_lp_state ? print_model_net_sched : print_model_net_sched_checkpoint; + + fprintf(out, "\n"); + for(int i = 0; i < state->params->num_queues; i++) { + fprintf(out, "==== CONTENT for sched_send[%d]:\n", i); + print_modelnet(stderr, state->sched_send[i]); + } + + fprintf(out, "\n==== CONTENT for sched_recv:\n"); + print_modelnet(stderr, state->sched_recv); + + crv_checkpointer * chptr = method_array[state->net_id]->checkpointer; + if (chptr && state->sub_state != NULL) { + if (is_lp_state && chptr->print_lp) { + fprintf(out, "\n==== CONTENT for sub_state:\n"); + chptr->print_lp(out, state->sub_state); + } + if (!is_lp_state && chptr->print_checkpoint) { + fprintf(out, "\n==== CONTENT for sub_state:\n"); + chptr->print_checkpoint(out, state->sub_state); + } + } +} + static void print_model_net_state(FILE * out, model_net_base_state * state) { - fprintf(out, " net_id = %d\n", state->net_id); - fprintf(out, " nics_per_router = %d\n", state->nics_per_router); - fprintf(out, "*in_sched_send_loop = %p\n", state->in_sched_send_loop); - fprintf(out, " in_sched_recv_loop = %d\n", state->in_sched_recv_loop); - fprintf(out, " msg_id = %lu\n", state->msg_id); - fprintf(out, "** sched_send = %p\n", state->sched_send); - fprintf(out, "* sched_recv = %p\n", state->sched_recv); - fprintf(out, "* params = %p\n", state->params); - fprintf(out, "* sub_type = %p\n", state->sub_type); - fprintf(out, "* sub_model_type = %p\n", state->sub_model_type); - fprintf(out, "* sub_state = %p\n", state->sub_state); - fprintf(out, "next_available_time = %f\n", state->next_available_time); - fprintf(out, "*node_copy_next_available_time = %p\n", state->node_copy_next_available_time); - fprintf(out, "*sched_loop_pre_surrogate = %p\n", state->sched_loop_pre_surrogate); - fprintf(out, "sched_recv_loop_pre_surrogate = %d\n", state->sched_recv_loop_pre_surrogate); + __print_model_net(out, state, true); +} +static void print_model_net_checkpoint(FILE * out, model_net_base_state * state) { + __print_model_net(out, state, false); } static void print_type(FILE * out, enum model_net_base_event_type type) { @@ -1229,6 +1351,8 @@ static void print_event_state(FILE * out, model_net_wrap_msg * msg) { default: fprintf(out, "The content of this message cannot be deciphered yet with the information given\n"); } + // TODO: print internal state of message + // void * sub_msg = ((char*)msg) + msg_offsets[state->net_id]; } /* END checking reverse handler functionality */ diff --git a/src/networks/model-net/core/model-net-sched.c b/src/networks/model-net/core/model-net-sched.c index ca31659a..9fefa30d 100644 --- a/src/networks/model-net/core/model-net-sched.c +++ b/src/networks/model-net/core/model-net-sched.c @@ -80,6 +80,66 @@ void model_net_sched_set_default_params(mn_sched_params *sched_params){ sched_params->prio = -1; } +/* START Checking reverse handler functionality */ +void save_model_net_sched(model_net_sched *into, model_net_sched const *from) { + into->type = from->type; + + into->dat = NULL; + crv_checkpointer const * chptr = sched_checkpointers[from->type]; + if (chptr && chptr->save_lp) { + into->dat = malloc(chptr->sz_storage); + chptr->save_lp(into->dat, from->dat); + } +} + +void clean_model_net_sched(model_net_sched *state) { + if (state->dat) { + crv_checkpointer const * chptr = sched_checkpointers[state->type]; + assert (chptr && chptr->clean_lp); + chptr->clean_lp(state->dat); + free(state->dat); + } +} + +bool check_model_net_sched( + model_net_sched *before, + model_net_sched *after +) { + crv_checkpointer const * chptr = sched_checkpointers[before->type]; + if (before->dat != NULL && chptr && chptr->check_lps) { + return chptr->check_lps(before->dat, after->dat); + } + tw_error(TW_LOC, "Scheduler of type \"%s\" has not been configured to be checkpointed", sched_names[before->type]); + return false; +} + +static void __print_model_net_sched( + FILE * out, + model_net_sched *sched, + bool is_lp_state +) { + crv_checkpointer const * chptr = sched_checkpointers[sched->type]; + fprintf(out, "model_net_sched.sched_type = %d\n", sched->type); + fprintf(out, "model_net_sched.\n"); + if (chptr) { + if (is_lp_state && chptr->print_lp) { + chptr->print_lp(out, sched->dat); + } + if (!is_lp_state && chptr->print_checkpoint) { + chptr->print_checkpoint(out, sched->dat); + } + } +} + +void print_model_net_sched(FILE * out, model_net_sched *sched) { + __print_model_net_sched(out, sched, true); +} + +void print_model_net_sched_checkpoint(FILE * out, model_net_sched *sched) { + __print_model_net_sched(out, sched, false); +} +/* STOP Checking reverse handler functionality */ + /* * Local variables: * c-indent-level: 4 From 7bc29c21df10be26586528bddcdbdd386d934a6e Mon Sep 17 00:00:00 2001 From: helq Date: Sun, 2 Mar 2025 16:08:30 -0500 Subject: [PATCH 027/110] Implementing FCFS checkpointer --- codes/model-net-sched.h | 15 +- codes/model-net.h | 3 + src/networks/model-net/core/model-net-lp.c | 26 +++- .../model-net/core/model-net-sched-impl.c | 128 +++++++++++++++++- src/networks/model-net/core/model-net-sched.c | 2 +- 5 files changed, 164 insertions(+), 10 deletions(-) diff --git a/codes/model-net-sched.h b/codes/model-net-sched.h index da28ddc2..ad7ccf6e 100644 --- a/codes/model-net-sched.h +++ b/codes/model-net-sched.h @@ -23,16 +23,16 @@ typedef struct mn_sched_params_s mn_sched_params; #include "model-net-method.h" /// types of schedulers -/// format: enum type, config string, function pointer names +/// format: enum type, config string, function pointer names, crv_checkpointer instance /// fcfs-full eschews packetization #define SCHEDULER_TYPES \ - X(MN_SCHED_FCFS, "fcfs", &fcfs_tab) \ - X(MN_SCHED_FCFS_FULL, "fcfs-full", &fcfs_tab) \ - X(MN_SCHED_RR, "round-robin", &rr_tab) \ - X(MN_SCHED_PRIO, "priority", &prio_tab) \ - X(MAX_SCHEDS, NULL, NULL) + X(MN_SCHED_FCFS, "fcfs", &fcfs_tab, &fcfs_chptr) \ + X(MN_SCHED_FCFS_FULL, "fcfs-full", &fcfs_tab, &fcfs_chptr) \ + X(MN_SCHED_RR, "round-robin", &rr_tab, NULL) \ + X(MN_SCHED_PRIO, "priority", &prio_tab, NULL) \ + X(MAX_SCHEDS, NULL, NULL, NULL) -#define X(a,b,c) a, +#define X(a,b,c,d) a, enum sched_type { SCHEDULER_TYPES }; @@ -205,6 +205,7 @@ void print_model_net_sched(FILE * out, model_net_sched *sched); void print_model_net_sched_checkpoint(FILE * out, model_net_sched *sched); extern char * sched_names[]; +extern const crv_checkpointer * sched_checkpointers[]; #ifdef __cplusplus } diff --git a/codes/model-net.h b/codes/model-net.h index a529627c..f003cc10 100644 --- a/codes/model-net.h +++ b/codes/model-net.h @@ -163,6 +163,9 @@ struct mn_stats long max_event_size; }; +bool check_model_net_request(model_net_request const * before, model_net_request const * after); +void print_model_net_request(FILE * out, char const * before, model_net_request * item); + /* Registers all model-net LPs in ROSS. Should be called after * configuration_load, but before codes_mapping_setup */ void model_net_register(); diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c index 966137fe..cbf49b54 100644 --- a/src/networks/model-net/core/model-net-lp.c +++ b/src/networks/model-net/core/model-net-lp.c @@ -1303,7 +1303,31 @@ static void print_type(FILE * out, enum model_net_base_event_type type) { } } -static void print_model_net_request(FILE * out, char const * starts_with, model_net_request * req) { +// Used Claude for an initial draft of this function +bool check_model_net_request(model_net_request const * before, model_net_request const * after) { + bool is_same = true; + + is_same &= (before->final_dest_lp == after->final_dest_lp); + is_same &= (before->dest_mn_lp == after->dest_mn_lp); + is_same &= (before->src_lp == after->src_lp); + is_same &= (before->msg_start_time == after->msg_start_time); + is_same &= (before->msg_new_mn_event == after->msg_new_mn_event); + is_same &= (before->msg_size == after->msg_size); + is_same &= (before->pull_size == after->pull_size); + is_same &= (before->packet_size == after->packet_size); + is_same &= (before->msg_id == after->msg_id); + is_same &= (before->net_id == after->net_id); + is_same &= (before->is_pull == after->is_pull); + is_same &= (before->queue_offset == after->queue_offset); + is_same &= (before->remote_event_size == after->remote_event_size); + is_same &= (before->self_event_size == after->self_event_size); + is_same &= (before->app_id == after->app_id); + is_same &= (strncmp(before->category, after->category, CATEGORY_NAME_MAX) == 0); + + return is_same; +} + +void print_model_net_request(FILE * out, char const * starts_with, model_net_request * req) { fprintf(out, "%sfinal_dest_lp = %ld\n", starts_with, req->final_dest_lp); fprintf(out, "%sdest_mn_lp = %ld\n", starts_with, req->dest_mn_lp); fprintf(out, "%ssrc_lp = %ld\n", starts_with, req->src_lp); diff --git a/src/networks/model-net/core/model-net-sched-impl.c b/src/networks/model-net/core/model-net-sched-impl.c index 3c3d25a9..e5f2d9e2 100644 --- a/src/networks/model-net/core/model-net-sched-impl.c +++ b/src/networks/model-net/core/model-net-sched-impl.c @@ -83,6 +83,10 @@ static void fcfs_next_rc( const void * rc_event_save, const model_net_sched_rc * rc, tw_lp * lp); +static void save_state_fcfs_state(mn_sched_queue * into, mn_sched_queue const * from); +static void clean_state_fcfs_state(mn_sched_queue * into); +static bool check_fcfs_state(mn_sched_queue *before, mn_sched_queue *after); +static void print_fcfs_state(FILE * out, mn_sched_queue *sched); // ROUND-ROBIN static void rr_init ( @@ -150,12 +154,29 @@ static const model_net_sched_interface rr_tab = static const model_net_sched_interface prio_tab = { &prio_init, &prio_destroy, &prio_add, &prio_add_rc, &prio_next, &prio_next_rc}; -#define X(a,b,c) c, +static const crv_checkpointer fcfs_chptr = { + NULL, + sizeof(mn_sched_queue), + (save_checkpoint_state_f) save_state_fcfs_state, + (clean_checkpoint_state_f) clean_state_fcfs_state, + (check_states_f) check_fcfs_state, + (print_lpstate_f) print_fcfs_state, + (print_checkpoint_state_f) print_fcfs_state, + NULL, +}; + +#define X(a,b,c,d) c, const model_net_sched_interface * sched_interfaces[] = { SCHEDULER_TYPES }; #undef X +#define X(a,b,c,d) d, +const crv_checkpointer * sched_checkpointers[] = { + SCHEDULER_TYPES +}; +#undef X + /// FCFS implementation void fcfs_init( @@ -192,11 +213,13 @@ void fcfs_add ( q->req = *req; q->sched_params = *sched_params; q->rem = req->msg_size; + assert(req->remote_event_size == remote_event_size); if (remote_event_size > 0){ q->remote_event = malloc(remote_event_size); memcpy(q->remote_event, remote_event, remote_event_size); } else { q->remote_event = NULL; } + assert(req->self_event_size == local_event_size); if (local_event_size > 0){ q->local_event = malloc(local_event_size); memcpy(q->local_event, local_event, local_event_size); @@ -364,6 +387,109 @@ void fcfs_next_rc( } } +static void save_mn_sched_qitem(mn_sched_qitem * into, mn_sched_qitem const * from) { + into->req = from->req; + into->sched_params = from->sched_params; + into->rem = from->rem; + into->entry_time = from->entry_time; + if (from->remote_event != NULL) { + assert(from->req.remote_event_size > 0); + into->remote_event = malloc(from->req.remote_event_size); + memcpy(into->remote_event, from->remote_event, from->req.remote_event_size); + } + if (from->local_event != NULL) { + assert(from->req.self_event_size > 0); + into->local_event = malloc(from->req.self_event_size); + memcpy(into->local_event, from->local_event, from->req.self_event_size); + } +} + +static void save_state_fcfs_state(mn_sched_queue * into, mn_sched_queue const * from) { + into->method = from->method; + into->is_recv_queue = from->is_recv_queue; + into->queue_len = from->queue_len; + INIT_QLIST_HEAD(&into->reqs); + + mn_sched_qitem * sched_qitem = NULL; + qlist_for_each_entry(sched_qitem, &from->reqs, ql) { + mn_sched_qitem * new_sched_qitem = malloc(sizeof(mn_sched_qitem)); + save_mn_sched_qitem(new_sched_qitem, sched_qitem); + qlist_add_tail(&new_sched_qitem->ql, &into->reqs); + } +} + +static void clean_mn_sched_qitem(mn_sched_qitem * into) { + if (into->remote_event != NULL) { + free(into->remote_event); + } + if (into->local_event != NULL) { + free(into->local_event); + } +} + +static void clean_state_fcfs_state(mn_sched_queue * into) { + mn_sched_qitem * sched_qitem = NULL; + mn_sched_qitem * _ = NULL; + qlist_for_each_entry_safe(sched_qitem, _, &into->reqs, ql) { + clean_mn_sched_qitem(sched_qitem); + qlist_del(&sched_qitem->ql); + free(sched_qitem); + } +} + +static bool check_mn_sched_qitem(mn_sched_qitem * before, mn_sched_qitem * after) { + bool is_same = true; + + is_same &= check_model_net_request(&before->req, &after->req); + is_same &= before->sched_params.prio == after->sched_params.prio; + is_same &= before->rem == after->rem; + is_same &= before->entry_time == after->entry_time; + is_same &= !memcmp(before->remote_event, after->remote_event, before->req.remote_event_size); + is_same &= !memcmp(before->local_event, after->local_event, before->req.self_event_size); + return is_same; +} + +static bool check_fcfs_state(mn_sched_queue * before, mn_sched_queue * after) { + bool is_same = true; + + is_same &= before->is_recv_queue == after->is_recv_queue; + is_same &= before->queue_len == after->queue_len; + + if (qlist_count(&before->reqs) != qlist_count(&before->reqs)) { + return false; + } + + is_same &= are_qlist_equal(&before->reqs, &after->reqs, QLIST_OFFSET(mn_sched_qitem, ql), (bool (*) (void *, void *)) check_mn_sched_qitem); + + return is_same; +} + +static void print_mn_sched_qitem(FILE * out, mn_sched_qitem * item) { + fprintf(out, " mn_sched_qitem\n"); + fprintf(out, " | .req\n"); + print_model_net_request(out, " | |.", &item->req); + fprintf(out, " | sched_params.prio = %d\n", item->sched_params.prio); + fprintf(out, " | rem = %lu\n", item->rem); + fprintf(out, " | entry_time = %g\n", item->entry_time); + fprintf(out, " | remote_event = %p (contents below)\n", item->remote_event); + tw_fprint_binary_array(out, item->remote_event, item->req.remote_event_size); + fprintf(out, " | local_event = %p (contents below)\n", item->local_event); + tw_fprint_binary_array(out, item->local_event, item->req.self_event_size); +} + +static void print_fcfs_state(FILE * out, mn_sched_queue *sched) { + fprintf(out, "FCFS:\n"); + fprintf(out, " | .method = %p\n", sched->method); + fprintf(out, " | .is_recv_queue = %d\n", sched->is_recv_queue); + fprintf(out, " | .queue_len = %d\n", sched->queue_len); + fprintf(out, " | .reqs[%d] = {\n", qlist_count(&sched->reqs)); + mn_sched_qitem * sched_qitem = NULL; + qlist_for_each_entry(sched_qitem, &sched->reqs, ql) { + print_mn_sched_qitem(out, sched_qitem); + } + fprintf(out, "}\n"); +} + void rr_init ( const struct model_net_method * method, const model_net_sched_cfg_params * params, diff --git a/src/networks/model-net/core/model-net-sched.c b/src/networks/model-net/core/model-net-sched.c index 9fefa30d..ed280e19 100644 --- a/src/networks/model-net/core/model-net-sched.c +++ b/src/networks/model-net/core/model-net-sched.c @@ -14,7 +14,7 @@ #include "codes/model-net-sched-impl.h" #include "codes/quicklist.h" -#define X(a,b,c) b, +#define X(a,b,c,d) b, char * sched_names [] = { SCHEDULER_TYPES }; From d48898a4944f13e6de6c95e5794ea92c7af107cb Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 4 Mar 2025 12:03:52 -0500 Subject: [PATCH 028/110] Removing never used struct param `entry_time` --- src/network-workloads/model-net-mpi-replay.c | 2 +- src/networks/model-net/core/model-net-lp.c | 13 +++++++++++++ src/networks/model-net/core/model-net-sched-impl.c | 5 ----- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 41597968..2501c5ac 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -41,7 +41,7 @@ #define MAX_PERIODS_PER_APP 512 #define NEAR_ZERO .0001 //timestamp for use to be 'close to zero' but still allow progress, zero offset events are hard on the PDES engine #define OUTPUT_MARKS 0 -#define LP_DEBUG 1 +#define LP_DEBUG 0 static int msg_size_hash_compare( void *key, struct qhash_head *link); diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c index cbf49b54..2cc1d516 100644 --- a/src/networks/model-net/core/model-net-lp.c +++ b/src/networks/model-net/core/model-net-lp.c @@ -16,6 +16,7 @@ #define MN_NAME "model_net_base" #define DEBUG 0 +#define MODELNET_LP_DEBUG 1 /**** BEGIN SIMULATION DATA STRUCTURES ****/ int model_net_base_magic; @@ -48,6 +49,9 @@ static int servers_per_node_queue = -1; extern tw_stime codes_cn_delay; typedef struct model_net_base_state { +#if MODELNET_LP_DEBUG + size_t num_events_processed; +#endif /* if MODELNET_LP_DEBUG */ int net_id, nics_per_router; // whether scheduler loop is running int *in_sched_send_loop, in_sched_recv_loop; @@ -592,6 +596,9 @@ void model_net_base_event( model_net_wrap_msg * m, tw_lp * lp){ memset(b, 0, sizeof(tw_bf)); +#if MODELNET_LP_DEBUG + ns->num_events_processed++; +#endif /* if MODELNET_LP_DEBUG */ if(m->h.magic != model_net_base_magic) printf("\n LP ID mismatched %llu\n", LLU(lp->gid)); @@ -644,6 +651,9 @@ void model_net_base_event_rc( model_net_wrap_msg * m, tw_lp * lp){ assert(m->h.magic == model_net_base_magic); +#if MODELNET_LP_DEBUG + ns->num_events_processed--; +#endif /* if MODELNET_LP_DEBUG */ if(!is_freezing_on && m->h.event_type == MN_BASE_SCHED_NEXT && m->msg.m_base.created_in_surrogate) { return; @@ -1225,6 +1235,9 @@ static bool check_model_net_state(model_net_base_state * before, model_net_base_ static void __print_model_net(FILE * out, model_net_base_state * state, bool is_lp_state) { fprintf(out, "model_net_state ->\n"); +#if MODELNET_LP_DEBUG + fprintf(out, " |num_events_processed = %zu\n", state->num_events_processed); +#endif /* if MODELNET_LP_DEBUG */ fprintf(out, " | net_id = %d\n", state->net_id); fprintf(out, " | nics_per_router = %d\n", state->nics_per_router); fprintf(out, " | *in_sched_send_loop[%d] = [", state->params->num_queues); // (done) deep-all diff --git a/src/networks/model-net/core/model-net-sched-impl.c b/src/networks/model-net/core/model-net-sched-impl.c index e5f2d9e2..c23bd935 100644 --- a/src/networks/model-net/core/model-net-sched-impl.c +++ b/src/networks/model-net/core/model-net-sched-impl.c @@ -27,7 +27,6 @@ typedef struct mn_sched_qitem { mn_sched_params sched_params; // remaining bytes to send uint64_t rem; - tw_stime entry_time; // pointers to event structures // sizes are given in the request struct void * remote_event; @@ -209,7 +208,6 @@ void fcfs_add ( tw_lp * lp){ (void)rc; // unneeded for fcfs mn_sched_qitem *q = malloc(sizeof(mn_sched_qitem)); - q->entry_time = tw_now(lp); q->req = *req; q->sched_params = *sched_params; q->rem = req->msg_size; @@ -391,7 +389,6 @@ static void save_mn_sched_qitem(mn_sched_qitem * into, mn_sched_qitem const * fr into->req = from->req; into->sched_params = from->sched_params; into->rem = from->rem; - into->entry_time = from->entry_time; if (from->remote_event != NULL) { assert(from->req.remote_event_size > 0); into->remote_event = malloc(from->req.remote_event_size); @@ -443,7 +440,6 @@ static bool check_mn_sched_qitem(mn_sched_qitem * before, mn_sched_qitem * after is_same &= check_model_net_request(&before->req, &after->req); is_same &= before->sched_params.prio == after->sched_params.prio; is_same &= before->rem == after->rem; - is_same &= before->entry_time == after->entry_time; is_same &= !memcmp(before->remote_event, after->remote_event, before->req.remote_event_size); is_same &= !memcmp(before->local_event, after->local_event, before->req.self_event_size); return is_same; @@ -470,7 +466,6 @@ static void print_mn_sched_qitem(FILE * out, mn_sched_qitem * item) { print_model_net_request(out, " | |.", &item->req); fprintf(out, " | sched_params.prio = %d\n", item->sched_params.prio); fprintf(out, " | rem = %lu\n", item->rem); - fprintf(out, " | entry_time = %g\n", item->entry_time); fprintf(out, " | remote_event = %p (contents below)\n", item->remote_event); tw_fprint_binary_array(out, item->remote_event, item->req.remote_event_size); fprintf(out, " | local_event = %p (contents below)\n", item->local_event); From fab09e8143cad00bf2424ecb0628659befad8b71 Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 4 Mar 2025 20:51:01 -0500 Subject: [PATCH 029/110] Printing lp states and events with a prefix (prettier printing) --- codes/codes-workload.h | 2 +- codes/model-net-sched.h | 4 +- src/network-workloads/model-net-mpi-replay.c | 289 +++++++++-------- src/networks/model-net/core/model-net-lp.c | 207 ++++++------ .../model-net/core/model-net-sched-impl.c | 42 +-- src/networks/model-net/core/model-net-sched.c | 22 +- src/networks/model-net/dragonfly-dally.C | 296 +++++++++--------- src/workload/codes-workload.c | 76 ++--- 8 files changed, 479 insertions(+), 459 deletions(-) diff --git a/codes/codes-workload.h b/codes/codes-workload.h index 5ac6b333..4722b5a4 100644 --- a/codes/codes-workload.h +++ b/codes/codes-workload.h @@ -382,7 +382,7 @@ void codes_workload_add_method(struct codes_workload_method const * method); */ /* Printing event :) */ -void fprint_codes_workload_op(FILE * out, struct codes_workload_op * op, char const * const begin); +void fprint_codes_workload_op(FILE * out, char const * prefix, struct codes_workload_op * op); char const * const op_type_string(enum codes_workload_op_type op_type); #ifdef __cplusplus diff --git a/codes/model-net-sched.h b/codes/model-net-sched.h index ad7ccf6e..576c57eb 100644 --- a/codes/model-net-sched.h +++ b/codes/model-net-sched.h @@ -201,8 +201,8 @@ void model_net_sched_set_default_params(mn_sched_params *sched_params); void save_model_net_sched(model_net_sched *before, model_net_sched const *after); void clean_model_net_sched(model_net_sched *before); bool check_model_net_sched(model_net_sched *before, model_net_sched *after); -void print_model_net_sched(FILE * out, model_net_sched *sched); -void print_model_net_sched_checkpoint(FILE * out, model_net_sched *sched); +void print_model_net_sched(FILE * out, char const * prefix, model_net_sched *sched); +void print_model_net_sched_checkpoint(FILE * out, char const * prefix, model_net_sched *sched); extern char * sched_names[]; extern const crv_checkpointer * sched_checkpointers[]; diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 2501c5ac..653f6f31 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -3534,10 +3534,10 @@ static void save_nw_lp_state(nw_state * into, nw_state const * from) { // Don't forget to make deep copies of any new complex data types that nw_state points to } -static void print_mpi_msgs_queue(FILE * out, struct qlist_head * head, char const * before) { +static void print_mpi_msgs_queue(FILE * out, char const * prefix, struct qlist_head * head) { mpi_msgs_queue * current = NULL; qlist_for_each_entry(current, head, ql) { - fprintf(out, "%sMsg: OpType: %d Tag %d Source %d Dest %d bytes %"PRId64" req_init_time %g req_id %u\n", before, current->op_type, current->tag, current->source_rank, current->dest_rank, current->num_bytes, current->req_init_time, current->req_id); + fprintf(out, "%sMsg: OpType: %d Tag %d Source %d Dest %d bytes %"PRId64" req_init_time %g req_id %u\n", prefix, current->op_type, current->tag, current->source_rank, current->dest_rank, current->num_bytes, current->req_init_time, current->req_id); } } @@ -3647,124 +3647,130 @@ static bool check_nw_lp_state(nw_state * before, nw_state const * after) { } // Originally implemneted with a prompt on Claude.ai (tedious code, easy to check and produce) -static void print_nw_lp_state(FILE * out, nw_state * state) { +static void print_nw_lp_state(FILE * out, char const * prefix, nw_state * state) { int num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx); + fprintf(out, "%snw-lp state ->\n", prefix); #if LP_DEBUG - fprintf(out, " num_events_processed = %zu\n", state->num_events_processed); -#endif /* if LP_DEBUG */ - fprintf(out, " num_events_per_lp = %ld\n", state->num_events_per_lp); - fprintf(out, " nw_id = %lu\n", state->nw_id); - fprintf(out, " wrkld_end = %d\n", state->wrkld_end); - fprintf(out, " app_id = %d\n", state->app_id); - fprintf(out, " local_rank = %d\n", state->local_rank); - fprintf(out, " qos_level = %d\n", state->qos_level); - fprintf(out, " synthetic_pattern = %d\n", state->synthetic_pattern); - fprintf(out, " is_finished = %d\n", state->is_finished); - fprintf(out, "num_own_job_ranks_completed = %d\n", state->num_own_job_ranks_completed); - fprintf(out, " known_completed_jobs[%d] = [", num_jobs); + fprintf(out, "%s | num_events_processed = %zu\n", prefix, state->num_events_processed); +#endif /* if LP_DE%sBUG */ + fprintf(out, "%s | num_events_per_lp = %ld\n", prefix, state->num_events_per_lp); + fprintf(out, "%s | nw_id = %lu\n", prefix, state->nw_id); + fprintf(out, "%s | wrkld_end = %d\n", prefix, state->wrkld_end); + fprintf(out, "%s | app_id = %d\n", prefix, state->app_id); + fprintf(out, "%s | local_rank = %d\n", prefix, state->local_rank); + fprintf(out, "%s | qos_level = %d\n", prefix, state->qos_level); + fprintf(out, "%s | synthetic_pattern = %d\n", prefix, state->synthetic_pattern); + fprintf(out, "%s | is_finished = %d\n", prefix, state->is_finished); + fprintf(out, "%s |num_own_job_ranks_completed = %d\n", prefix, state->num_own_job_ranks_completed); + fprintf(out, "%s | known_completed_jobs[%d] = [", prefix, num_jobs); for(int i=0; iknown_completed_jobs[i], i+1==num_jobs ? "" : ", "); + fprintf(out, "%s%d%s", prefix, state->known_completed_jobs[i], i+1==num_jobs ? "" : ", "); } fprintf(out, "]\n"); - fprintf(out, " *processed_ops = %p\n", state->processed_ops); - fprintf(out, " *processed_wait_op = %p\n", state->processed_wait_op); - fprintf(out, " *matched_reqs = %p\n", state->matched_reqs); + fprintf(out, "%s | *processed_ops = %p\n", prefix, state->processed_ops); + fprintf(out, "%s | *processed_wait_op = %p\n", prefix, state->processed_wait_op); + fprintf(out, "%s | *matched_reqs = %p\n", prefix, state->matched_reqs); // Operation counts - fprintf(out, " num_sends = %lu\n", state->num_sends); - fprintf(out, " num_recvs = %lu\n", state->num_recvs); - fprintf(out, " num_cols = %lu\n", state->num_cols); - fprintf(out, " num_delays = %lu\n", state->num_delays); - fprintf(out, " num_wait = %lu\n", state->num_wait); - fprintf(out, " num_waitall = %lu\n", state->num_waitall); - fprintf(out, " num_waitsome = %lu\n", state->num_waitsome); + fprintf(out, "%s | num_sends = %lu\n", prefix, state->num_sends); + fprintf(out, "%s | num_recvs = %lu\n", prefix, state->num_recvs); + fprintf(out, "%s | num_cols = %lu\n", prefix, state->num_cols); + fprintf(out, "%s | num_delays = %lu\n", prefix, state->num_delays); + fprintf(out, "%s | num_wait = %lu\n", prefix, state->num_wait); + fprintf(out, "%s | num_waitall = %lu\n", prefix, state->num_waitall); + fprintf(out, "%s | num_waitsome = %lu\n", prefix, state->num_waitsome); // Timing information - fprintf(out, " start_time = %g\n", state->start_time); - fprintf(out, " col_time = %g\n", state->col_time); - fprintf(out, " reduce_time = %g\n", state->reduce_time); - fprintf(out, " num_reduce = %d\n", state->num_reduce); - fprintf(out, " all_reduce_time = %g\n", state->all_reduce_time); - fprintf(out, " num_all_reduce = %d\n", state->num_all_reduce); - fprintf(out, " elapsed_time = %g\n", state->elapsed_time); - fprintf(out, " compute_time = %g\n", state->compute_time); - fprintf(out, " send_time = %g\n", state->send_time); - fprintf(out, " max_time = %g\n", state->max_time); - fprintf(out, " recv_time = %g\n", state->recv_time); - fprintf(out, " wait_time = %g\n", state->wait_time); + fprintf(out, "%s | start_time = %g\n", prefix, state->start_time); + fprintf(out, "%s | col_time = %g\n", prefix, state->col_time); + fprintf(out, "%s | reduce_time = %g\n", prefix, state->reduce_time); + fprintf(out, "%s | num_reduce = %d\n", prefix, state->num_reduce); + fprintf(out, "%s | all_reduce_time = %g\n", prefix, state->all_reduce_time); + fprintf(out, "%s | num_all_reduce = %d\n", prefix, state->num_all_reduce); + fprintf(out, "%s | elapsed_time = %g\n", prefix, state->elapsed_time); + fprintf(out, "%s | compute_time = %g\n", prefix, state->compute_time); + fprintf(out, "%s | send_time = %g\n", prefix, state->send_time); + fprintf(out, "%s | max_time = %g\n", prefix, state->max_time); + fprintf(out, "%s | recv_time = %g\n", prefix, state->recv_time); + fprintf(out, "%s | wait_time = %g\n", prefix, state->wait_time); // Queue heads - fprintf(out, " arrival_queue[%d] = [\n", qlist_count(&state->arrival_queue)); - print_mpi_msgs_queue(out, &state->arrival_queue, " "); - fprintf(out, "]\n"); - fprintf(out, " pending_recvs_queue[%d] = [\n", qlist_count(&state->pending_recvs_queue)); - print_mpi_msgs_queue(out, &state->pending_recvs_queue, " "); - fprintf(out, "]\n"); - - fprintf(out, " completed_reqs[%d] = [\n", qlist_count(&state->completed_reqs)); + char addprefix[] = " | | "; + int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix) + 1; + char subprefix[len_subprefix]; + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix); + + fprintf(out, "%s | arrival_queue[%d] = [\n", prefix, qlist_count(&state->arrival_queue)); + print_mpi_msgs_queue(out, subprefix, &state->arrival_queue); + fprintf(out, "%s | ]\n", prefix); + fprintf(out, "%s | pending_recvs_queue[%d] = [\n", prefix, qlist_count(&state->pending_recvs_queue)); + print_mpi_msgs_queue(out, subprefix, &state->pending_recvs_queue); + fprintf(out, "%s | ]\n", prefix); + + fprintf(out, "%s | completed_reqs[%d] = [\n", prefix, qlist_count(&state->completed_reqs)); completed_requests * current = NULL; qlist_for_each_entry(current, &state->completed_reqs, ql) { - fprintf(out, " Req: req_id: %u\n", current->req_id); + fprintf(out, "%s | | Req: req_id: %u\n", prefix, current->req_id); } - fprintf(out, "]\n"); + fprintf(out, "%s | ]\n", prefix); - fprintf(out, " cur_interval_end = %g\n", state->cur_interval_end); - fprintf(out, " *wait_op = %p\n", state->wait_op); + fprintf(out, "%s | cur_interval_end = %g\n", prefix, state->cur_interval_end); + fprintf(out, "%s | *wait_op = %p\n", prefix, state->wait_op); if (state->wait_op != NULL) { - fprintf(out, " |.op_type = %d\n", state->wait_op->op_type); - fprintf(out, " |.req_ids = ["); + fprintf(out, "%s | |.op_type = %d\n", prefix, state->wait_op->op_type); + fprintf(out, "%s | |.req_ids = [", prefix); for(int i = 0; i < state->wait_op->count; i++) { fprintf(out, "%d%s", state->wait_op->req_ids[i], i+1==state->wait_op->count ? "" : ", "); } fprintf(out, "]\n"); - fprintf(out, " |.num_completed = %d\n", state->wait_op->num_completed); - fprintf(out, " |.count = %d\n", state->wait_op->count); - fprintf(out, " |.start_time = %g\n", state->wait_op->start_time); + fprintf(out, "%s | |.num_completed = %d\n", prefix, state->wait_op->num_completed); + fprintf(out, "%s | |.count = %d\n", prefix, state->wait_op->count); + fprintf(out, "%s | |.start_time = %g\n", prefix, state->wait_op->start_time); } - fprintf(out, " msg_sz_list[%d] = [\n", qlist_count(&state->completed_reqs)); + fprintf(out, "%s | msg_sz_list[%d] = [\n", prefix, qlist_count(&state->completed_reqs)); struct msg_size_info * ms_info = NULL; qlist_for_each_entry(ms_info, &state->msg_sz_list, ql) { - fprintf(out, " MsSizeInfo: msg_size: %lu num_msgs: %d agg_latency: %g avg_latency: %g hash_link.next: %p hash_link.prev: %p\n", ms_info->msg_size, ms_info->num_msgs, ms_info->agg_latency, ms_info->avg_latency, ms_info->hash_link.next, ms_info->hash_link.prev); + fprintf(out, "%s | | MsSizeInfo: msg_size: %lu num_msgs: %d agg_latency: %g avg_latency: %g hash_link.next: %p hash_link.prev: %p\n", prefix, ms_info->msg_size, ms_info->num_msgs, ms_info->agg_latency, ms_info->avg_latency, ms_info->hash_link.next, ms_info->hash_link.prev); } - fprintf(out, "]\n"); + fprintf(out, "%s | ]\n", prefix); // Data statistics - fprintf(out, " num_bytes_sent = %llu\n", state->num_bytes_sent); - fprintf(out, " num_bytes_recvd = %llu\n", state->num_bytes_recvd); - fprintf(out, " syn_data = %llu\n", state->syn_data); - fprintf(out, " gen_data = %llu\n", state->gen_data); + fprintf(out, "%s | num_bytes_sent = %llu\n", prefix, state->num_bytes_sent); + fprintf(out, "%s | num_bytes_recvd = %llu\n", prefix, state->num_bytes_recvd); + fprintf(out, "%s | syn_data = %llu\n", prefix, state->syn_data); + fprintf(out, "%s | gen_data = %llu\n", prefix, state->gen_data); - fprintf(out, " prev_switch = %lu\n", state->prev_switch); - fprintf(out, " saved_perm_dest = %d\n", state->saved_perm_dest); - fprintf(out, " rc_perm = %lu\n", state->rc_perm); + fprintf(out, "%s | prev_switch = %lu\n", prefix, state->prev_switch); + fprintf(out, "%s | saved_perm_dest = %d\n", prefix, state->saved_perm_dest); + fprintf(out, "%s | rc_perm = %lu\n", prefix, state->rc_perm); // Sampling information - fprintf(out, " sampling_indx = %d\n", state->sampling_indx); - fprintf(out, " max_arr_size = %d\n", state->max_arr_size); - fprintf(out, "* mpi_wkld_samples = %p\n", state->mpi_wkld_samples); - fprintf(out, " output_buf = %.512s...\n", state->output_buf); - fprintf(out, " col_stats = %.64s...\n", state->col_stats); - - fprintf(out, "ross_sample.\n"); - fprintf(out, " | .nw_id = %lu\n", state->ross_sample.nw_id); - fprintf(out, " | .app_id = %d\n", state->ross_sample.app_id); - fprintf(out, " | .local_rank = %d\n", state->ross_sample.local_rank); - fprintf(out, " | .num_sends = %lu\n", state->ross_sample.num_sends); - fprintf(out, " | .num_recvs = %lu\n", state->ross_sample.num_recvs); - fprintf(out, " | .num_bytes_sent = %llu\n", state->ross_sample.num_bytes_sent); - fprintf(out, " |.num_bytes_recvd = %llu\n", state->ross_sample.num_bytes_recvd); - fprintf(out, " | .send_time = %g\n", state->ross_sample.send_time); - fprintf(out, " | .recv_time = %g\n", state->ross_sample.recv_time); - fprintf(out, " | .wait_time = %g\n", state->ross_sample.wait_time); - fprintf(out, " | .compute_time = %g\n", state->ross_sample.compute_time); - fprintf(out, " | .comm_time = %g\n", state->ross_sample.comm_time); - fprintf(out, " | .max_time = %g\n", state->ross_sample.max_time); - fprintf(out, " | .avg_msg_time = %g\n", state->ross_sample.avg_msg_time); + fprintf(out, "%s | sampling_indx = %d\n", prefix, state->sampling_indx); + fprintf(out, "%s | max_arr_size = %d\n", prefix, state->max_arr_size); + fprintf(out, "%s |* mpi_wkld_samples = %p\n", prefix, state->mpi_wkld_samples); + fprintf(out, "%s | output_buf = %.512s...\n", prefix, state->output_buf); + fprintf(out, "%s | col_stats = %.64s...\n", prefix, state->col_stats); + + fprintf(out, "%s |ross_sample.\n", prefix); + fprintf(out, "%s | | nw_id = %lu\n", prefix, state->ross_sample.nw_id); + fprintf(out, "%s | | app_id = %d\n", prefix, state->ross_sample.app_id); + fprintf(out, "%s | | local_rank = %d\n", prefix, state->ross_sample.local_rank); + fprintf(out, "%s | | num_sends = %lu\n", prefix, state->ross_sample.num_sends); + fprintf(out, "%s | | num_recvs = %lu\n", prefix, state->ross_sample.num_recvs); + fprintf(out, "%s | | num_bytes_sent = %llu\n", prefix, state->ross_sample.num_bytes_sent); + fprintf(out, "%s | | num_bytes_recvd = %llu\n", prefix, state->ross_sample.num_bytes_recvd); + fprintf(out, "%s | | send_time = %g\n", prefix, state->ross_sample.send_time); + fprintf(out, "%s | | recv_time = %g\n", prefix, state->ross_sample.recv_time); + fprintf(out, "%s | | wait_time = %g\n", prefix, state->ross_sample.wait_time); + fprintf(out, "%s | | compute_time = %g\n", prefix, state->ross_sample.compute_time); + fprintf(out, "%s | | comm_time = %g\n", prefix, state->ross_sample.comm_time); + fprintf(out, "%s | | max_time = %g\n", prefix, state->ross_sample.max_time); + fprintf(out, "%s | | avg_msg_time = %g\n", prefix, state->ross_sample.avg_msg_time); // Configuration - fprintf(out, "* switch_config = %p\n", state->switch_config); - fprintf(out, " switch_config_size = %zu\n", state->switch_config_size); + fprintf(out, "%s |* switch_config = %p\n", prefix, state->switch_config); + fprintf(out, "%s | switch_config_size = %zu\n", prefix, state->switch_config_size); } static char const * const MPI_NW_EVENTS_to_string(enum MPI_NW_EVENTS event_type) { @@ -3789,83 +3795,88 @@ static char const * const MPI_NW_EVENTS_to_string(enum MPI_NW_EVENTS event_type) } // Original printing function from Claude.ai -static void print_nw_message(FILE * out, struct nw_message * msg) { - // Print main fields - fprintf(out, "msg_type = %s\n", MPI_NW_EVENTS_to_string(msg->msg_type)); - fprintf(out, " op_type = %s\n", op_type_string(msg->op_type)); - fprintf(out, "num_rngs = %d\n", msg->num_rngs); - fprintf(out, "event_rc = %d\n", msg->event_rc); - fprintf(out, " mpi_op = %p\n", msg->mpi_op); - fprint_codes_workload_op(out, msg->mpi_op, " |"); - - fprintf(out, "fwd\n"); - fprintf(out, " | .src_rank = %lu\n", msg->fwd.src_rank); - fprintf(out, " | .dest_rank = %d\n", msg->fwd.dest_rank); - fprintf(out, " | .num_bytes = %ld\n", msg->fwd.num_bytes); - fprintf(out, " | .num_matched = %d\n", msg->fwd.num_matched); - fprintf(out, " |.sim_start_time = %g\n", msg->fwd.sim_start_time); - fprintf(out, " | .msg_send_time = %g\n", msg->fwd.msg_send_time); - fprintf(out, " | .req_id = %u\n", msg->fwd.req_id); - fprintf(out, " | .matched_req = %d\n", msg->fwd.matched_req); - fprintf(out, " | .tag = %d\n", msg->fwd.tag); - fprintf(out, " | .app_id = %d\n", msg->fwd.app_id); - fprintf(out, " | .found_match = %d\n", msg->fwd.found_match); - fprintf(out, " |.wait_completed = %d\n", msg->fwd.wait_completed); - fprintf(out, " | .rend_send = %d\n", msg->fwd.rend_send); - - fprintf(out, "rc\n"); +static void print_nw_message(FILE * out, char const * prefix, struct nw_message * msg) { + fprintf(out, "%snw_message ->\n", prefix); + fprintf(out, "%s | msg_type = %s\n", prefix, MPI_NW_EVENTS_to_string(msg->msg_type)); + fprintf(out, "%s | op_type = %s\n", prefix, op_type_string(msg->op_type)); + fprintf(out, "%s | num_rngs = %d\n", prefix, msg->num_rngs); + fprintf(out, "%s | event_rc = %d\n", prefix, msg->event_rc); + fprintf(out, "%s | mpi_op = %p\n", prefix, msg->mpi_op); + + char addprefix[] = " | | "; + int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix) + 1; + char subprefix[len_subprefix]; + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix); + fprint_codes_workload_op(out, subprefix, msg->mpi_op); + + fprintf(out, "%s | fwd\n", prefix); + fprintf(out, "%s | | src_rank = %lu\n", prefix, msg->fwd.src_rank); + fprintf(out, "%s | | dest_rank = %d\n", prefix, msg->fwd.dest_rank); + fprintf(out, "%s | | num_bytes = %ld\n", prefix, msg->fwd.num_bytes); + fprintf(out, "%s | | num_matched = %d\n", prefix, msg->fwd.num_matched); + fprintf(out, "%s | | sim_start_time = %g\n", prefix, msg->fwd.sim_start_time); + fprintf(out, "%s | | msg_send_time = %g\n", prefix, msg->fwd.msg_send_time); + fprintf(out, "%s | | req_id = %u\n", prefix, msg->fwd.req_id); + fprintf(out, "%s | | matched_req = %d\n", prefix, msg->fwd.matched_req); + fprintf(out, "%s | | tag = %d\n", prefix, msg->fwd.tag); + fprintf(out, "%s | | app_id = %d\n", prefix, msg->fwd.app_id); + fprintf(out, "%s | | found_match = %d\n", prefix, msg->fwd.found_match); + fprintf(out, "%s | | wait_completed = %d\n", prefix, msg->fwd.wait_completed); + fprintf(out, "%s | | rend_send = %d\n", prefix, msg->fwd.rend_send); + + fprintf(out, "%s | rc\n", prefix); switch(msg->msg_type) { case CLI_BCKGND_GEN: - fprintf(out, " |.gen\n"); - fprintf(out, " | .saved_syn_length = %d\n", msg->rc.gen.saved_syn_length); - fprintf(out, " | .saved_perm = %d\n", msg->rc.gen.saved_perm); - fprintf(out, " |.saved_prev_switch = %lu\n", msg->rc.gen.saved_prev_switch); + fprintf(out, "%s | | gen\n", prefix); + fprintf(out, "%s | | saved_syn_length = %d\n", prefix, msg->rc.gen.saved_syn_length); + fprintf(out, "%s | | saved_perm = %d\n", prefix, msg->rc.gen.saved_perm); + fprintf(out, "%s | | saved_prev_switch = %lu\n", prefix, msg->rc.gen.saved_prev_switch); break; case CLI_BCKGND_ARRIVE: case MPI_SEND_ARRIVED_CB: - fprintf(out, " |arrive.saved_prev_max_time = %g\n", msg->rc.arrive.saved_prev_max_time); - fprintf(out, " | arrive.saved_send_time = %g\n", msg->rc.arrive.saved_send_time); - fprintf(out, " |arrive.saved_send_time_sample = %g\n", msg->rc.arrive.saved_send_time_sample); + fprintf(out, "%s | |arrive.saved_prev_max_time = %g\n", prefix, msg->rc.arrive.saved_prev_max_time); + fprintf(out, "%s | | arrive.saved_send_time = %g\n", prefix, msg->rc.arrive.saved_send_time); + fprintf(out, "%s | |arrive.saved_send_time_sample = %g\n", prefix, msg->rc.arrive.saved_send_time_sample); break; case CLI_BCKGND_CHANGE: - fprintf(out, " | change.saved_send_time = %g\n", msg->rc.change.saved_send_time); - fprintf(out, " | change.saved_marker_time = %g\n", msg->rc.change.saved_marker_time); + fprintf(out, "%s | | change.saved_send_time = %g\n", prefix, msg->rc.change.saved_send_time); + fprintf(out, "%s | | change.saved_marker_time = %g\n", prefix, msg->rc.change.saved_marker_time); break; case MPI_OP_GET_NEXT: - fprintf(out, " .mpi_next\n"); - fprintf(out, " |.saved_elapsed_time = %g\n", msg->rc.mpi_next.saved_elapsed_time); - fprintf(out, " |.all_reduce.saved_send_time = %g\n", msg->rc.mpi_next.all_reduce.saved_send_time); - fprintf(out, " |.all_reduce.saved_delay = %g\n", msg->rc.mpi_next.all_reduce.saved_delay); + fprintf(out, "%s | mpi_next\n", prefix); + fprintf(out, "%s | | saved_elapsed_time = %g\n", prefix, msg->rc.mpi_next.saved_elapsed_time); + fprintf(out, "%s | | all_reduce.saved_send_time = %g\n", prefix, msg->rc.mpi_next.all_reduce.saved_send_time); + fprintf(out, "%s | | all_reduce.saved_delay = %g\n", prefix, msg->rc.mpi_next.all_reduce.saved_delay); - fprintf(out, " |.recv.saved_recv_time = %g\n", msg->rc.mpi_next.recv.saved_recv_time); - fprintf(out, " |.recv.saved_recv_time_sample = %g\n", msg->rc.mpi_next.recv.saved_recv_time_sample); + fprintf(out, "%s | | recv.saved_recv_time = %g\n", prefix, msg->rc.mpi_next.recv.saved_recv_time); + fprintf(out, "%s | | recv.saved_recv_time_sample = %g\n", prefix, msg->rc.mpi_next.recv.saved_recv_time_sample); - fprintf(out, " |.delay.saved_delay = %g\n", msg->rc.mpi_next.delay.saved_delay); - fprintf(out, " |.delay.saved_delay_sample = %g\n", msg->rc.mpi_next.delay.saved_delay_sample); + fprintf(out, "%s | | delay.saved_delay = %g\n", prefix, msg->rc.mpi_next.delay.saved_delay); + fprintf(out, "%s | | delay.saved_delay_sample = %g\n", prefix, msg->rc.mpi_next.delay.saved_delay_sample); - fprintf(out, " |.mark.saved_marker_time = %g\n", msg->rc.mpi_next.mark.saved_marker_time); + fprintf(out, "%s | | mark.saved_marker_time = %g\n", prefix, msg->rc.mpi_next.mark.saved_marker_time); break; case MPI_SEND_ARRIVED: case MPI_REND_ARRIVED: case MPI_SEND_POSTED: - fprintf(out, " |.mpi_send\n"); - fprintf(out, " | .saved_wait_time = %g\n", msg->rc.mpi_send.saved_wait_time); - fprintf(out, " |.saved_wait_time_sample = %g\n", msg->rc.mpi_send.saved_wait_time_sample); - fprintf(out, " | .saved_recv_time = %g\n", msg->rc.mpi_send.saved_recv_time); - fprintf(out, " |.saved_recv_time_sample = %g\n", msg->rc.mpi_send.saved_recv_time_sample); - fprintf(out, " | .saved_num_bytes = %lu\n", msg->rc.mpi_send.saved_num_bytes); + fprintf(out, "%s | | mpi_send\n", prefix); + fprintf(out, "%s | | saved_wait_time = %g\n", prefix, msg->rc.mpi_send.saved_wait_time); + fprintf(out, "%s | | saved_wait_time_sample = %g\n", prefix, msg->rc.mpi_send.saved_wait_time_sample); + fprintf(out, "%s | | saved_recv_time = %g\n", prefix, msg->rc.mpi_send.saved_recv_time); + fprintf(out, "%s | | saved_recv_time_sample = %g\n", prefix, msg->rc.mpi_send.saved_recv_time_sample); + fprintf(out, "%s | | saved_num_bytes = %lu\n", prefix, msg->rc.mpi_send.saved_num_bytes); break; case MPI_REND_ACK_ARRIVED: - fprintf(out, " | mpi_ack.saved_num_bytes = %ld\n", msg->rc.mpi_ack.saved_num_bytes); + fprintf(out, "%s | | mpi_ack.saved_num_bytes = %ld\n", prefix, msg->rc.mpi_ack.saved_num_bytes); break; case SURR_SKIP_ITERATION: - fprintf(out, " | surr.config_used = %p\n", msg->rc.surr.config_used); + fprintf(out, "%s | | surr.config_used = %p\n", prefix, msg->rc.surr.config_used); break; default: diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c index 2cc1d516..1a065c8f 100644 --- a/src/networks/model-net/core/model-net-lp.c +++ b/src/networks/model-net/core/model-net-lp.c @@ -139,9 +139,9 @@ tw_lptype model_net_base_lp = { static void save_state_net_state(model_net_base_state * into, model_net_base_state const * from); static void clean_state_net_state(model_net_base_state * state); static bool check_model_net_state(model_net_base_state * before, model_net_base_state * after); -static void print_model_net_state(FILE * out, model_net_base_state * state); -static void print_model_net_checkpoint(FILE * out, model_net_base_state * state); -static void print_event_state(FILE * out, model_net_wrap_msg * state); +static void print_model_net_state(FILE * out, char const * prefix, model_net_base_state * state); +static void print_model_net_checkpoint(FILE * out, char const * prefix, model_net_base_state * state); +static void print_event_state(FILE * out, char const * prefix, model_net_wrap_msg * state); // ROSS function pointer table to check reverse event handler crv_checkpointer model_net_chkptr = { @@ -1233,87 +1233,77 @@ static bool check_model_net_state(model_net_base_state * before, model_net_base_ return is_same; } -static void __print_model_net(FILE * out, model_net_base_state * state, bool is_lp_state) { - fprintf(out, "model_net_state ->\n"); +static void __print_model_net(FILE * out, char const * prefix, model_net_base_state * state, bool is_lp_state) { + fprintf(out, "%smodel_net_state ->\n", prefix); #if MODELNET_LP_DEBUG - fprintf(out, " |num_events_processed = %zu\n", state->num_events_processed); -#endif /* if MODELNET_LP_DEBUG */ - fprintf(out, " | net_id = %d\n", state->net_id); - fprintf(out, " | nics_per_router = %d\n", state->nics_per_router); - fprintf(out, " | *in_sched_send_loop[%d] = [", state->params->num_queues); // (done) deep-all + fprintf(out, "%s |num_events_processed = %zu\n", prefix, state->num_events_processed); +#endif /* if MODEL%sNET_LP_DEBUG */ + + void (*print_modelnet) (FILE *, char const *, model_net_sched *) = is_lp_state ? print_model_net_sched : print_model_net_sched_checkpoint; + + fprintf(out, "%s | net_id = %d\n", prefix, state->net_id); + fprintf(out, "%s | nics_per_router = %d\n", prefix, state->nics_per_router); + fprintf(out, "%s | *in_sched_send_loop[%d] = [", prefix, state->params->num_queues); // deep-all for (int i=0; i < state->params->num_queues; i++) { fprintf(out, "%d%s", state->in_sched_send_loop[i], i==state->params->num_queues-1 ? "" : ", "); } fprintf(out, "]\n"); - fprintf(out, " | in_sched_recv_loop = %d\n", state->in_sched_recv_loop); - fprintf(out, " | msg_id = %lu\n", state->msg_id); - fprintf(out, " | ** sched_send = %p\n", state->sched_send); // (done) deep-all - fprintf(out, " | * sched_recv = %p\n", state->sched_recv); // (done) deep-all - fprintf(out, " | * params = %p\n", state->params); - fprintf(out, " | * sub_type = %p\n", state->sub_type); - fprintf(out, " | * sub_model_type = %p\n", state->sub_model_type); - fprintf(out, " | * sub_state = %p\n", state->sub_state); // deep-all - fprintf(out, " | next_available_time = %f\n", state->next_available_time); - fprintf(out, " | *node_copy_next_available_time[%d] = [", state->params->num_queues); // (done) deep-all - for (int i=0; i < state->params->node_copy_queues; i++) { - fprintf(out, "%g%s", state->node_copy_next_available_time[i], i==state->params->node_copy_queues-1 ? "" : ", "); - } - fprintf(out, "]\n"); - fprintf(out, " | *sched_loop_pre_surrogate = %p\n", state->sched_loop_pre_surrogate); // no need to check - fprintf(out, " | sched_recv_loop_pre_surrogate = %d\n", state->sched_recv_loop_pre_surrogate); // no need to check - - void (*print_modelnet) (FILE * out, model_net_sched *sched) = is_lp_state ? print_model_net_sched : print_model_net_sched_checkpoint; - - fprintf(out, "\n"); + fprintf(out, "%s | in_sched_recv_loop = %d\n", prefix, state->in_sched_recv_loop); + fprintf(out, "%s | msg_id = %lu\n", prefix, state->msg_id); + fprintf(out, "%s | ** sched_send = %p\n", prefix, state->sched_send); // deep-all + // + int len_subprefix = snprintf(NULL, 0, "%s | | ", prefix) + 1; + char subprefix[len_subprefix]; + snprintf(subprefix, len_subprefix, "%s | | ", prefix); for(int i = 0; i < state->params->num_queues; i++) { - fprintf(out, "==== CONTENT for sched_send[%d]:\n", i); - print_modelnet(stderr, state->sched_send[i]); - } - - fprintf(out, "\n==== CONTENT for sched_recv:\n"); - print_modelnet(stderr, state->sched_recv); - + fprintf(out, "%ssched_send[%d]:\n", subprefix, i); + print_modelnet(out, subprefix, state->sched_send[i]); + } + // + fprintf(out, "%s | * sched_recv = %p\n", prefix, state->sched_recv); // deep-all + print_modelnet(out, subprefix, state->sched_recv); + fprintf(out, "%s | * params = %p\n", prefix, state->params); + fprintf(out, "%s | * sub_type = %p\n", prefix, state->sub_type); + fprintf(out, "%s | * sub_model_type = %p\n", prefix, state->sub_model_type); + fprintf(out, "%s | * sub_state = %p\n", prefix, state->sub_state); // deep-all + // crv_checkpointer * chptr = method_array[state->net_id]->checkpointer; if (chptr && state->sub_state != NULL) { if (is_lp_state && chptr->print_lp) { - fprintf(out, "\n==== CONTENT for sub_state:\n"); - chptr->print_lp(out, state->sub_state); + chptr->print_lp(out, subprefix, state->sub_state); } if (!is_lp_state && chptr->print_checkpoint) { - fprintf(out, "\n==== CONTENT for sub_state:\n"); - chptr->print_checkpoint(out, state->sub_state); + chptr->print_checkpoint(out, subprefix, state->sub_state); } } + // + fprintf(out, "%s | next_available_time = %f\n", prefix, state->next_available_time); + fprintf(out, "%s | *node_copy_next_available_time[%d] = [", prefix, state->params->num_queues); // (done) deep-all + for (int i=0; i < state->params->node_copy_queues; i++) { + fprintf(out, "%g%s", state->node_copy_next_available_time[i], i==state->params->node_copy_queues-1 ? "" : ", "); + } + fprintf(out, "]\n"); + fprintf(out, "%s | *sched_loop_pre_surrogate = %p\n", prefix, state->sched_loop_pre_surrogate); // no need to check + fprintf(out, "%s | sched_recv_loop_pre_surrogate = %d\n", prefix, state->sched_recv_loop_pre_surrogate); // no need to check } -static void print_model_net_state(FILE * out, model_net_base_state * state) { - __print_model_net(out, state, true); +static void print_model_net_state(FILE * out, char const * prefix, model_net_base_state * state) { + __print_model_net(out, prefix, state, true); } -static void print_model_net_checkpoint(FILE * out, model_net_base_state * state) { - __print_model_net(out, state, false); +static void print_model_net_checkpoint(FILE * out, char const * prefix, model_net_base_state * state) { + __print_model_net(out, prefix, state, false); } -static void print_type(FILE * out, enum model_net_base_event_type type) { +static char const * const event_type_string(enum model_net_base_event_type type) { switch (type) { - case MN_BASE_NEW_MSG: - fprintf(out, "MN_BASE_NEW_MSG"); - break; - case MN_BASE_SCHED_NEXT: - fprintf(out, "MN_BASE_SCHED_NEXT"); - break; - case MN_BASE_SAMPLE: - fprintf(out, "MN_BASE_SAMPLE"); - break; - case MN_BASE_PASS: - fprintf(out, "MN_BASE_PASS"); - break; - case MN_BASE_END_NOTIF: - fprintf(out, "MN_BASE_END_NOTIF"); - break; - case MN_CONGESTION_EVENT: - fprintf(out, "MN_CONGESTION_EVENT"); - break; - } + case MN_BASE_NEW_MSG: return "MN_BASE_NEW_MSG"; + case MN_BASE_SCHED_NEXT: return "MN_BASE_SCHED_NEXT"; + case MN_BASE_SAMPLE: return "MN_BASE_SAMPLE"; + case MN_BASE_PASS: return "MN_BASE_PASS"; + case MN_BASE_END_NOTIF: return "MN_BASE_END_NOTIF"; + case MN_CONGESTION_EVENT: return "MN_CONGESTION_EVENT"; + } + return "UNKNOWN TYPE!!"; } // Used Claude for an initial draft of this function @@ -1340,53 +1330,62 @@ bool check_model_net_request(model_net_request const * before, model_net_request return is_same; } -void print_model_net_request(FILE * out, char const * starts_with, model_net_request * req) { - fprintf(out, "%sfinal_dest_lp = %ld\n", starts_with, req->final_dest_lp); - fprintf(out, "%sdest_mn_lp = %ld\n", starts_with, req->dest_mn_lp); - fprintf(out, "%ssrc_lp = %ld\n", starts_with, req->src_lp); - fprintf(out, "%smsg_start_time = %f\n", starts_with, req->msg_start_time); - fprintf(out, "%smsg_new_mn_event = %f\n", starts_with, req->msg_new_mn_event); - fprintf(out, "%smsg_size = %ld\n", starts_with, req->msg_size); - fprintf(out, "%spull_size = %ld\n", starts_with, req->pull_size); - fprintf(out, "%spacket_size = %ld\n", starts_with, req->packet_size); - fprintf(out, "%smsg_id = %ld\n", starts_with, req->msg_id); - fprintf(out, "%snet_id = %d\n", starts_with, req->net_id); - fprintf(out, "%sis_pull = %d\n", starts_with, req->is_pull); - fprintf(out, "%squeue_offset = %d\n", starts_with, req->queue_offset); - fprintf(out, "%sremote_event_size = %d\n", starts_with, req->remote_event_size); - fprintf(out, "%sself_event_size = %d\n", starts_with, req->self_event_size); - fprintf(out, "%scategory = '%s'\n", starts_with, req->category); - fprintf(out, "%sapp_id = %d\n", starts_with, req->app_id); +void print_model_net_request(FILE * out, char const * prefix, model_net_request * req) { + fprintf(out, "%sfinal_dest_lp = %ld\n", prefix, req->final_dest_lp); + fprintf(out, "%sdest_mn_lp = %ld\n", prefix, req->dest_mn_lp); + fprintf(out, "%ssrc_lp = %ld\n", prefix, req->src_lp); + fprintf(out, "%smsg_start_time = %f\n", prefix, req->msg_start_time); + fprintf(out, "%smsg_new_mn_event = %f\n", prefix, req->msg_new_mn_event); + fprintf(out, "%smsg_size = %ld\n", prefix, req->msg_size); + fprintf(out, "%spull_size = %ld\n", prefix, req->pull_size); + fprintf(out, "%spacket_size = %ld\n", prefix, req->packet_size); + fprintf(out, "%smsg_id = %ld\n", prefix, req->msg_id); + fprintf(out, "%snet_id = %d\n", prefix, req->net_id); + fprintf(out, "%sis_pull = %d\n", prefix, req->is_pull); + fprintf(out, "%squeue_offset = %d\n", prefix, req->queue_offset); + fprintf(out, "%sremote_event_size = %d\n", prefix, req->remote_event_size); + fprintf(out, "%sself_event_size = %d\n", prefix, req->self_event_size); + fprintf(out, "%scategory = '%s'\n", prefix, req->category); + fprintf(out, "%sapp_id = %d\n", prefix, req->app_id); } -static void print_event_state(FILE * out, model_net_wrap_msg * msg) { - fprintf(out, "h\n"); - fprintf(out, "|.src = %lu\n", msg->h.src); - fprintf(out, "|.event_type = %d (", msg->h.event_type); - print_type(out, msg->h.event_type); - fprintf(out, ")\n"); - fprintf(out, "|.magic = %d\n", msg->h.magic); +static void print_event_state(FILE * out, char const * prefix, model_net_wrap_msg * msg) { + fprintf(out, "%sh\n", prefix); + fprintf(out, "%s| src = %lu\n", prefix, msg->h.src); + fprintf(out, "%s| event_type = %d (%s)\n", prefix, msg->h.event_type, event_type_string(msg->h.event_type)); + fprintf(out, "%s| magic = %d\n", prefix, msg->h.magic); + + char addprefix[] = " | | "; + int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix) + 1; + char subprefix[len_subprefix]; + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix); + + char addprefix_2[] = " | | | "; + len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_2) + 1; + char subprefix_2[len_subprefix]; + snprintf(subprefix_2, len_subprefix, "%s%s", prefix, addprefix_2); + switch (msg->h.event_type) { case MN_BASE_NEW_MSG: case MN_BASE_SCHED_NEXT: // We can check m_base values - fprintf(out, "m_base\n"); - fprintf(out, " |.req\n"); - print_model_net_request(out, " | |.", &msg->msg.m_base.req); - fprintf(out, " |.is_from_remote = %d\n", msg->msg.m_base.is_from_remote); - fprintf(out, " |.isQueueReq = %d\n", msg->msg.m_base.isQueueReq); - fprintf(out, " |.save_ts = %f\n", msg->msg.m_base.save_ts); - fprintf(out, " |.sched_params.prio = %d\n", msg->msg.m_base.sched_params.prio); - fprintf(out, " |.rc\n"); - fprintf(out, " | |.req\n"); - print_model_net_request(out, " | | |.", &msg->msg.m_base.rc.req); - fprintf(out, " | |.sched_params.prio = %d\n", msg->msg.m_base.rc.sched_params.prio); - fprintf(out, " | |.rtn = %d\n", msg->msg.m_base.rc.rtn); - fprintf(out, " | |.prio = %d\n", msg->msg.m_base.rc.prio); - fprintf(out, " |.created_in_surrogate = %d\n", msg->msg.m_base.created_in_surrogate); + fprintf(out, "%sm_base\n", prefix); + fprintf(out, "%s | req\n", prefix); + print_model_net_request(out, subprefix, &msg->msg.m_base.req); + fprintf(out, "%s | is_from_remote = %d\n", prefix, msg->msg.m_base.is_from_remote); + fprintf(out, "%s | isQueueReq = %d\n", prefix, msg->msg.m_base.isQueueReq); + fprintf(out, "%s | save_ts = %f\n", prefix, msg->msg.m_base.save_ts); + fprintf(out, "%s | sched_params.prio = %d\n", prefix, msg->msg.m_base.sched_params.prio); + fprintf(out, "%s | rc\n", prefix); + fprintf(out, "%s | | req\n", prefix); + print_model_net_request(out, subprefix_2, &msg->msg.m_base.rc.req); + fprintf(out, "%s | | sched_params.prio = %d\n", prefix, msg->msg.m_base.rc.sched_params.prio); + fprintf(out, "%s | | rtn = %d\n", prefix, msg->msg.m_base.rc.rtn); + fprintf(out, "%s | | prio = %d\n", prefix, msg->msg.m_base.rc.prio); + fprintf(out, "%s | created_in_surrogate = %d\n", prefix, msg->msg.m_base.created_in_surrogate); break; default: - fprintf(out, "The content of this message cannot be deciphered yet with the information given\n"); + fprintf(out, "%sThe content of this message cannot be deciphered yet with the information given\n", prefix); } // TODO: print internal state of message // void * sub_msg = ((char*)msg) + msg_offsets[state->net_id]; diff --git a/src/networks/model-net/core/model-net-sched-impl.c b/src/networks/model-net/core/model-net-sched-impl.c index c23bd935..a3ff4fde 100644 --- a/src/networks/model-net/core/model-net-sched-impl.c +++ b/src/networks/model-net/core/model-net-sched-impl.c @@ -85,7 +85,7 @@ static void fcfs_next_rc( static void save_state_fcfs_state(mn_sched_queue * into, mn_sched_queue const * from); static void clean_state_fcfs_state(mn_sched_queue * into); static bool check_fcfs_state(mn_sched_queue *before, mn_sched_queue *after); -static void print_fcfs_state(FILE * out, mn_sched_queue *sched); +static void print_fcfs_state(FILE * out, char const * prefix, mn_sched_queue *sched); // ROUND-ROBIN static void rr_init ( @@ -460,29 +460,33 @@ static bool check_fcfs_state(mn_sched_queue * before, mn_sched_queue * after) { return is_same; } -static void print_mn_sched_qitem(FILE * out, mn_sched_qitem * item) { - fprintf(out, " mn_sched_qitem\n"); - fprintf(out, " | .req\n"); - print_model_net_request(out, " | |.", &item->req); - fprintf(out, " | sched_params.prio = %d\n", item->sched_params.prio); - fprintf(out, " | rem = %lu\n", item->rem); - fprintf(out, " | remote_event = %p (contents below)\n", item->remote_event); - tw_fprint_binary_array(out, item->remote_event, item->req.remote_event_size); - fprintf(out, " | local_event = %p (contents below)\n", item->local_event); - tw_fprint_binary_array(out, item->local_event, item->req.self_event_size); +static void print_mn_sched_qitem(FILE * out, char const * prefix, mn_sched_qitem * item) { + int len_subprefix = snprintf(NULL, 0, "%s | | ", prefix) + 1; + char subprefix[len_subprefix]; + snprintf(subprefix, len_subprefix, "%s | | ", prefix); + + fprintf(out, "%s mn_sched_qitem\n", prefix); + fprintf(out, "%s | .req\n", prefix); + print_model_net_request(out, subprefix, &item->req); + fprintf(out, "%s | sched_params.prio = %d\n", prefix, item->sched_params.prio); + fprintf(out, "%s | rem = %lu\n", prefix, item->rem); + fprintf(out, "%s | remote_event = %p (contents below)\n", prefix, item->remote_event); + tw_fprint_binary_array(out, subprefix, item->remote_event, item->req.remote_event_size); + fprintf(out, "%s | local_event = %p (contents below)\n", prefix, item->local_event); + tw_fprint_binary_array(out, subprefix, item->local_event, item->req.self_event_size); } -static void print_fcfs_state(FILE * out, mn_sched_queue *sched) { - fprintf(out, "FCFS:\n"); - fprintf(out, " | .method = %p\n", sched->method); - fprintf(out, " | .is_recv_queue = %d\n", sched->is_recv_queue); - fprintf(out, " | .queue_len = %d\n", sched->queue_len); - fprintf(out, " | .reqs[%d] = {\n", qlist_count(&sched->reqs)); +static void print_fcfs_state(FILE * out, char const * prefix, mn_sched_queue *sched) { + fprintf(out, "%sFCFS:\n", prefix); + fprintf(out, "%s | .method = %p\n", prefix, sched->method); + fprintf(out, "%s | .is_recv_queue = %d\n", prefix, sched->is_recv_queue); + fprintf(out, "%s | .queue_len = %d\n", prefix, sched->queue_len); + fprintf(out, "%s | .reqs[%d] = {\n", prefix, qlist_count(&sched->reqs)); mn_sched_qitem * sched_qitem = NULL; qlist_for_each_entry(sched_qitem, &sched->reqs, ql) { - print_mn_sched_qitem(out, sched_qitem); + print_mn_sched_qitem(out, prefix, sched_qitem); } - fprintf(out, "}\n"); + fprintf(out, "%s | }\n", prefix); } void rr_init ( diff --git a/src/networks/model-net/core/model-net-sched.c b/src/networks/model-net/core/model-net-sched.c index ed280e19..4868fcbf 100644 --- a/src/networks/model-net/core/model-net-sched.c +++ b/src/networks/model-net/core/model-net-sched.c @@ -115,28 +115,34 @@ bool check_model_net_sched( static void __print_model_net_sched( FILE * out, + char const * prefix, model_net_sched *sched, bool is_lp_state ) { crv_checkpointer const * chptr = sched_checkpointers[sched->type]; - fprintf(out, "model_net_sched.sched_type = %d\n", sched->type); - fprintf(out, "model_net_sched.\n"); + fprintf(out, "%smodel_net_sched.sched_type = %d\n", prefix, sched->type); + fprintf(out, "%smodel_net_sched.dat = %p\n", prefix, sched->dat); + + int len_subprefix = snprintf(NULL, 0, "%s | ", prefix) + 1; + char subprefix[len_subprefix]; + snprintf(subprefix, len_subprefix, "%s | ", prefix); + if (chptr) { if (is_lp_state && chptr->print_lp) { - chptr->print_lp(out, sched->dat); + chptr->print_lp(out, subprefix, sched->dat); } if (!is_lp_state && chptr->print_checkpoint) { - chptr->print_checkpoint(out, sched->dat); + chptr->print_checkpoint(out, subprefix, sched->dat); } } } -void print_model_net_sched(FILE * out, model_net_sched *sched) { - __print_model_net_sched(out, sched, true); +void print_model_net_sched(FILE * out, char const * prefix, model_net_sched *sched) { + __print_model_net_sched(out, prefix, sched, true); } -void print_model_net_sched_checkpoint(FILE * out, model_net_sched *sched) { - __print_model_net_sched(out, sched, false); +void print_model_net_sched_checkpoint(FILE * out, char const * prefix, model_net_sched *sched) { + __print_model_net_sched(out, prefix, sched, false); } /* STOP Checking reverse handler functionality */ diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 6675ca4b..bb067dcd 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -6926,157 +6926,157 @@ static void router_dally_rc_event_handler(router_state * s, tw_bf * bf, //*** ---------- START OF reverse handler checking functions ---------- *** // Print fuction originally constructed with help from Claude.ai -static void print_terminal_state(FILE * out, terminal_state * state) { - fprintf(out, "terminal_state (dragonfly-dally) ->\n"); - fprintf(out, " | packet_counter = %ld\n", state->packet_counter); - fprintf(out, " | packet_gen = %d\n", state->packet_gen); - fprintf(out, " | packet_fin = %d\n", state->packet_fin); - fprintf(out, " | total_gen_size = %d\n", state->total_gen_size); - fprintf(out, " | * router_lp = %p\n", state->router_lp); - fprintf(out, " | * router_id = %p\n", state->router_id); - fprintf(out, " | terminal_id = %u\n", state->terminal_id); - fprintf(out, " | connMan = \n"); - fprintf(out, " | *local_congestion_controller = %p\n", state->local_congestion_controller); - fprintf(out, " | workload_lpid_to_app_id = \n"); - fprintf(out, " | app_ids = \n"); - fprintf(out, " | workloads_finished_flag = %d\n", state->workloads_finished_flag); - fprintf(out, " | ** vc_occupancy = %p\n", state->vc_occupancy); - fprintf(out, " | *terminal_available_time = %p\n", state->terminal_available_time); - fprintf(out, " | *** terminal_msgs = %p\n", state->terminal_msgs); - fprintf(out, " | *** terminal_msgs_tail = %p\n", state->terminal_msgs_tail); - fprintf(out, " | * in_send_loop = %p\n", state->in_send_loop); - fprintf(out, " | dragonfly_stats_array = \n"); - fprintf(out, " | ** qos_status = %p\n", state->qos_status); - fprintf(out, " | ** qos_data = %p\n", state->qos_data); - fprintf(out, " | * last_qos_lvl = %p\n", state->last_qos_lvl); - fprintf(out, " | is_monitoring_bw = %d\n", state->is_monitoring_bw); - fprintf(out, " | * st = %p\n", state->st); - fprintf(out, " | * cc_st = %p\n", state->cc_st); - fprintf(out, " | * issueIdle = %p\n", state->issueIdle); - fprintf(out, " | ** terminal_length = %p\n", state->terminal_length); - fprintf(out, " | * anno = %s\n", state->anno ? state->anno : "(nil)"); - fprintf(out, " | * params = %p\n", state->params); - fprintf(out, " | * rank_tbl = %p\n", state->rank_tbl); - fprintf(out, " | rank_tbl_pop = %lu\n", state->rank_tbl_pop); - fprintf(out, " | total_time = %f\n", state->total_time); - fprintf(out, " | total_msg_size = %lu\n", state->total_msg_size); - fprintf(out, " | total_hops = %f\n", state->total_hops); - fprintf(out, " | finished_msgs = %ld\n", state->finished_msgs); - fprintf(out, " | finished_chunks = %ld\n", state->finished_chunks); - fprintf(out, " | finished_packets = %ld\n", state->finished_packets); - fprintf(out, " | * last_buf_full = %p\n", state->last_buf_full); - fprintf(out, " | * busy_time = %p\n", state->busy_time); - fprintf(out, " | * link_traffic = %p\n", state->link_traffic); - fprintf(out, " | * total_chunks = %p\n", state->total_chunks); - fprintf(out, " | * stalled_chunks = %p\n", state->stalled_chunks); - fprintf(out, " | injected_chunks = %lu\n", state->injected_chunks); - fprintf(out, " | ejected_chunks = %lu\n", state->ejected_chunks); - fprintf(out, " | max_latency = %f\n", state->max_latency); - fprintf(out, " | min_latency = %f\n", state->min_latency); - fprintf(out, " | output_buf = '%.4096s'\n", state->output_buf); - fprintf(out, " | output_buf2 = '%.4096s'\n", state->output_buf2); - fprintf(out, " | fin_chunks_sample = %ld\n", state->fin_chunks_sample); - fprintf(out, " | data_size_sample = %ld\n", state->data_size_sample); - fprintf(out, " | fin_hops_sample = %f\n", state->fin_hops_sample); - fprintf(out, " | fin_chunks_time = %f\n", state->fin_chunks_time); - fprintf(out, " | * busy_time_sample = %p\n", state->busy_time_sample); - fprintf(out, " | sample_buf = '%.4096s'\n", state->sample_buf); - fprintf(out, " | * sample_stat = %p\n", state->sample_stat); - fprintf(out, " | op_arr_size = %d\n", state->op_arr_size); - fprintf(out, " | max_arr_size = %d\n", state->max_arr_size); - fprintf(out, " | fwd_events = %ld\n", state->fwd_events); - fprintf(out, " | rev_events = %ld\n", state->rev_events); - fprintf(out, " | fin_chunks_ross_sample = %ld\n", state->fin_chunks_ross_sample); - fprintf(out, " | data_size_ross_sample = %ld\n", state->data_size_ross_sample); - fprintf(out, " | fin_hops_ross_sample = %ld\n", state->fin_hops_ross_sample); - fprintf(out, " | fin_chunks_time_ross_sample = %f\n", state->fin_chunks_time_ross_sample); - fprintf(out, " | * busy_time_ross_sample = %p\n", state->busy_time_ross_sample); - fprintf(out, " | ross_sample = \n"); - fprintf(out, " | sent_packets = \n"); - fprintf(out, " | last_packet_sent_id = %lu\n", state->last_packet_sent_id); - fprintf(out, " | arrival_of_last_packet = {packet_ID: %lu, travel_end_time: %f}\n", state->arrival_of_last_packet.packet_ID, state->arrival_of_last_packet.travel_end_time); - fprintf(out, " | remaining_sz_packets = \n"); - fprintf(out, " | last_in_queue_time = %f\n", state->last_in_queue_time); - fprintf(out, " | * predictor_data = %p\n", state->predictor_data); - fprintf(out, " | zombies = \n"); - fprintf(out, " | * frozen_state = %p\n", state->frozen_state); +static void print_terminal_state(FILE * out, char const * prefix, terminal_state * state) { + fprintf(out, "%sterminal_state (dragonfly-dally) ->\n", prefix); + fprintf(out, "%s | packet_counter = %ld\n", prefix, state->packet_counter); + fprintf(out, "%s | packet_gen = %d\n", prefix, state->packet_gen); + fprintf(out, "%s | packet_fin = %d\n", prefix, state->packet_fin); + fprintf(out, "%s | total_gen_size = %d\n", prefix, state->total_gen_size); + fprintf(out, "%s | * router_lp = %p\n", prefix, state->router_lp); + fprintf(out, "%s | * router_id = %p\n", prefix, state->router_id); + fprintf(out, "%s | terminal_id = %u\n", prefix, state->terminal_id); + fprintf(out, "%s | connMan = \n", prefix); + fprintf(out, "%s | *local_congestion_controller = %p\n", prefix, state->local_congestion_controller); + fprintf(out, "%s | workload_lpid_to_app_id = \n", prefix); + fprintf(out, "%s | app_ids = \n", prefix); + fprintf(out, "%s | workloads_finished_flag = %d\n", prefix, state->workloads_finished_flag); + fprintf(out, "%s | ** vc_occupancy = %p\n", prefix, state->vc_occupancy); + fprintf(out, "%s | *terminal_available_time = %p\n", prefix, state->terminal_available_time); + fprintf(out, "%s | *** terminal_msgs = %p\n", prefix, state->terminal_msgs); + fprintf(out, "%s | *** terminal_msgs_tail = %p\n", prefix, state->terminal_msgs_tail); + fprintf(out, "%s | * in_send_loop = %p\n", prefix, state->in_send_loop); + fprintf(out, "%s | dragonfly_stats_array = \n", prefix); + fprintf(out, "%s | ** qos_status = %p\n", prefix, state->qos_status); + fprintf(out, "%s | ** qos_data = %p\n", prefix, state->qos_data); + fprintf(out, "%s | * last_qos_lvl = %p\n", prefix, state->last_qos_lvl); + fprintf(out, "%s | is_monitoring_bw = %d\n", prefix, state->is_monitoring_bw); + fprintf(out, "%s | * st = %p\n", prefix, state->st); + fprintf(out, "%s | * cc_st = %p\n", prefix, state->cc_st); + fprintf(out, "%s | * issueIdle = %p\n", prefix, state->issueIdle); + fprintf(out, "%s | ** terminal_length = %p\n", prefix, state->terminal_length); + fprintf(out, "%s | * anno = %s\n", prefix, state->anno ? state->anno : "(nil)"); + fprintf(out, "%s | * params = %p\n", prefix, state->params); + fprintf(out, "%s | * rank_tbl = %p\n", prefix, state->rank_tbl); + fprintf(out, "%s | rank_tbl_pop = %lu\n", prefix, state->rank_tbl_pop); + fprintf(out, "%s | total_time = %f\n", prefix, state->total_time); + fprintf(out, "%s | total_msg_size = %lu\n", prefix, state->total_msg_size); + fprintf(out, "%s | total_hops = %f\n", prefix, state->total_hops); + fprintf(out, "%s | finished_msgs = %ld\n", prefix, state->finished_msgs); + fprintf(out, "%s | finished_chunks = %ld\n", prefix, state->finished_chunks); + fprintf(out, "%s | finished_packets = %ld\n", prefix, state->finished_packets); + fprintf(out, "%s | * last_buf_full = %p\n", prefix, state->last_buf_full); + fprintf(out, "%s | * busy_time = %p\n", prefix, state->busy_time); + fprintf(out, "%s | * link_traffic = %p\n", prefix, state->link_traffic); + fprintf(out, "%s | * total_chunks = %p\n", prefix, state->total_chunks); + fprintf(out, "%s | * stalled_chunks = %p\n", prefix, state->stalled_chunks); + fprintf(out, "%s | injected_chunks = %lu\n", prefix, state->injected_chunks); + fprintf(out, "%s | ejected_chunks = %lu\n", prefix, state->ejected_chunks); + fprintf(out, "%s | max_latency = %f\n", prefix, state->max_latency); + fprintf(out, "%s | min_latency = %f\n", prefix, state->min_latency); + fprintf(out, "%s | output_buf = '%.4096s'\n", prefix, state->output_buf); + fprintf(out, "%s | output_buf2 = '%.4096s'\n", prefix, state->output_buf2); + fprintf(out, "%s | fin_chunks_sample = %ld\n", prefix, state->fin_chunks_sample); + fprintf(out, "%s | data_size_sample = %ld\n", prefix, state->data_size_sample); + fprintf(out, "%s | fin_hops_sample = %f\n", prefix, state->fin_hops_sample); + fprintf(out, "%s | fin_chunks_time = %f\n", prefix, state->fin_chunks_time); + fprintf(out, "%s | * busy_time_sample = %p\n", prefix, state->busy_time_sample); + fprintf(out, "%s | sample_buf = '%.4096s'\n", prefix, state->sample_buf); + fprintf(out, "%s | * sample_stat = %p\n", prefix, state->sample_stat); + fprintf(out, "%s | op_arr_size = %d\n", prefix, state->op_arr_size); + fprintf(out, "%s | max_arr_size = %d\n", prefix, state->max_arr_size); + fprintf(out, "%s | fwd_events = %ld\n", prefix, state->fwd_events); + fprintf(out, "%s | rev_events = %ld\n", prefix, state->rev_events); + fprintf(out, "%s | fin_chunks_ross_sample = %ld\n", prefix, state->fin_chunks_ross_sample); + fprintf(out, "%s | data_size_ross_sample = %ld\n", prefix, state->data_size_ross_sample); + fprintf(out, "%s | fin_hops_ross_sample = %ld\n", prefix, state->fin_hops_ross_sample); + fprintf(out, "%s | fin_chunks_time_ross_sample = %f\n", prefix, state->fin_chunks_time_ross_sample); + fprintf(out, "%s | * busy_time_ross_sample = %p\n", prefix, state->busy_time_ross_sample); + fprintf(out, "%s | ross_sample = \n", prefix); + fprintf(out, "%s | sent_packets = \n", prefix); + fprintf(out, "%s | last_packet_sent_id = %lu\n", prefix, state->last_packet_sent_id); + fprintf(out, "%s | arrival_of_last_packet = {packet_ID: %lu, travel_end_time: %f}\n", prefix, state->arrival_of_last_packet.packet_ID, state->arrival_of_last_packet.travel_end_time); + fprintf(out, "%s | remaining_sz_packets = \n", prefix); + fprintf(out, "%s | last_in_queue_time = %f\n", prefix, state->last_in_queue_time); + fprintf(out, "%s | * predictor_data = %p\n", prefix, state->predictor_data); + fprintf(out, "%s | zombies = \n", prefix); + fprintf(out, "%s | * frozen_state = %p\n", prefix, state->frozen_state); } // Print fuction originally constructed with help from Claude.ai -static void print_terminal_dally_message(FILE * out, struct terminal_dally_message * msg) { - fprintf(out, "terminal_dally_message ->\n"); - fprintf(out, " | magic = %d\n", msg->magic); - fprintf(out, " | travel_start_time = %f\n", msg->travel_start_time); - fprintf(out, " | travel_end_time = %f\n", msg->travel_end_time); - fprintf(out, " | packet_ID = %llu\n", msg->packet_ID); - fprintf(out, " | type = %d\n", msg->type); - fprintf(out, " | notify_type = %d\n", msg->notify_type); - fprintf(out, " | category = %s\n", msg->category); - fprintf(out, " | final_dest_gid = %lu\n", msg->final_dest_gid); - fprintf(out, " | sender_lp = %lu\n", msg->sender_lp); - fprintf(out, " | sender_mn_lp = %lu\n", msg->sender_mn_lp); - fprintf(out, " | dest_terminal_lpid = %lu\n", msg->dest_terminal_lpid); - fprintf(out, " | dfdally_src_terminal_id = %u\n", msg->dfdally_src_terminal_id); - fprintf(out, " | dfdally_dest_terminal_id = %u\n", msg->dfdally_dest_terminal_id); - fprintf(out, " | src_terminal_id = %u\n", msg->src_terminal_id); - fprintf(out, " | origin_router_id = %u\n", msg->origin_router_id); - fprintf(out, " | app_id = %d\n", msg->app_id); - fprintf(out, " | my_N_hop = %d\n", msg->my_N_hop); - fprintf(out, " | my_l_hop = %d\n", msg->my_l_hop); - fprintf(out, " | my_g_hop = %d\n", msg->my_g_hop); - fprintf(out, " | my_hops_cur_group = %d\n", msg->my_hops_cur_group); - fprintf(out, " | saved_channel = %d\n", msg->saved_channel); - fprintf(out, " | saved_vc = %d\n", msg->saved_vc); - fprintf(out, " | next_stop = %d\n", msg->next_stop); - fprintf(out, " | this_router_arrival = %f\n", msg->this_router_arrival); - fprintf(out, " | this_router_ptp_latency = %f\n", msg->this_router_ptp_latency); - fprintf(out, " | intm_lp_id = %u\n", msg->intm_lp_id); - fprintf(out, " | last_hop = %d\n", msg->last_hop); - fprintf(out, " | is_intm_visited = %d\n", msg->is_intm_visited); - fprintf(out, " | intm_rtr_id = %d\n", msg->intm_rtr_id); - fprintf(out, " | intm_grp_id = %d\n", msg->intm_grp_id); - fprintf(out, " | saved_src_dest = %d\n", msg->saved_src_dest); - fprintf(out, " | saved_src_chan = %d\n", msg->saved_src_chan); - fprintf(out, " | chunk_id = %u\n", msg->chunk_id); - fprintf(out, " | packet_size = %u\n", msg->packet_size); - fprintf(out, " | message_id = %u\n", msg->message_id); - fprintf(out, " | total_size = %u\n", msg->total_size); - fprintf(out, " | remote_event_size_bytes = %d\n", msg->remote_event_size_bytes); - fprintf(out, " | local_event_size_bytes = %d\n", msg->local_event_size_bytes); - fprintf(out, " | vc_index = %d\n", msg->vc_index); - fprintf(out, " | rail_id = %d\n", msg->rail_id); - fprintf(out, " | output_chan = %d\n", msg->output_chan); - fprintf(out, " | event_rc = \n"); - fprintf(out, " | is_pull = %d\n", msg->is_pull); - fprintf(out, " | pull_size = %u\n", msg->pull_size); - fprintf(out, " | path_type = %d\n", msg->path_type); - fprintf(out, " | saved_app_id = %d\n", msg->saved_app_id); - fprintf(out, " | is_there_another_pckt_in_queue = %s\n", msg->is_there_another_pckt_in_queue ? "true" : "false"); - fprintf(out, " | num_rngs = %d\n", msg->num_rngs); - fprintf(out, " | num_cll = %d\n", msg->num_cll); - fprintf(out, " | last_saved_qos = %d\n", msg->last_saved_qos); - fprintf(out, " | qos_reset1 = %d\n", msg->qos_reset1); - fprintf(out, " | qos_reset2 = %d\n", msg->qos_reset2); - fprintf(out, " | rc_is_qos_set = %d\n", msg->rc_is_qos_set); - fprintf(out, " | * rc_qos_data = %p\n", msg->rc_qos_data); - fprintf(out, " | * rc_qos_status = %p\n", msg->rc_qos_status); - fprintf(out, " | saved_send_loop = %d\n", msg->saved_send_loop); - fprintf(out, " | saved_available_time = %f\n", msg->saved_available_time); - fprintf(out, " | saved_min_lat = %f\n", msg->saved_min_lat); - fprintf(out, " | saved_avg_time = %f\n", msg->saved_avg_time); - fprintf(out, " | saved_rcv_time = %f\n", msg->saved_rcv_time); - fprintf(out, " | saved_busy_time = %f\n", msg->saved_busy_time); - fprintf(out, " | saved_total_time = %f\n", msg->saved_total_time); - fprintf(out, " | saved_sample_time = %f\n", msg->saved_sample_time); - fprintf(out, " | msg_start_time = %f\n", msg->msg_start_time); - fprintf(out, " | saved_busy_time_ross = %f\n", msg->saved_busy_time_ross); - fprintf(out, " | saved_fin_chunks_ross = %f\n", msg->saved_fin_chunks_ross); - fprintf(out, " | saved_last_in_queue_time = %f\n", msg->saved_last_in_queue_time); - fprintf(out, " | saved_next_packet_delay = %f\n", msg->saved_next_packet_delay); - fprintf(out, " | msg_new_mn_event = %f\n", msg->msg_new_mn_event); - fprintf(out, " | last_received_time = %f\n", msg->last_received_time); - fprintf(out, " | last_sent_time = %f\n", msg->last_sent_time); - fprintf(out, " | last_bufupdate_time = %f\n", msg->last_bufupdate_time); +static void print_terminal_dally_message(FILE * out, char const * prefix, struct terminal_dally_message * msg) { + fprintf(out, "%sterminal_dally_message ->\n", prefix); + fprintf(out, "%s | magic = %d\n", prefix, msg->magic); + fprintf(out, "%s | travel_start_time = %f\n", prefix, msg->travel_start_time); + fprintf(out, "%s | travel_end_time = %f\n", prefix, msg->travel_end_time); + fprintf(out, "%s | packet_ID = %llu\n", prefix, msg->packet_ID); + fprintf(out, "%s | type = %d\n", prefix, msg->type); + fprintf(out, "%s | notify_type = %d\n", prefix, msg->notify_type); + fprintf(out, "%s | category = %s\n", prefix, msg->category); + fprintf(out, "%s | final_dest_gid = %lu\n", prefix, msg->final_dest_gid); + fprintf(out, "%s | sender_lp = %lu\n", prefix, msg->sender_lp); + fprintf(out, "%s | sender_mn_lp = %lu\n", prefix, msg->sender_mn_lp); + fprintf(out, "%s | dest_terminal_lpid = %lu\n", prefix, msg->dest_terminal_lpid); + fprintf(out, "%s | dfdally_src_terminal_id = %u\n", prefix, msg->dfdally_src_terminal_id); + fprintf(out, "%s | dfdally_dest_terminal_id = %u\n", prefix, msg->dfdally_dest_terminal_id); + fprintf(out, "%s | src_terminal_id = %u\n", prefix, msg->src_terminal_id); + fprintf(out, "%s | origin_router_id = %u\n", prefix, msg->origin_router_id); + fprintf(out, "%s | app_id = %d\n", prefix, msg->app_id); + fprintf(out, "%s | my_N_hop = %d\n", prefix, msg->my_N_hop); + fprintf(out, "%s | my_l_hop = %d\n", prefix, msg->my_l_hop); + fprintf(out, "%s | my_g_hop = %d\n", prefix, msg->my_g_hop); + fprintf(out, "%s | my_hops_cur_group = %d\n", prefix, msg->my_hops_cur_group); + fprintf(out, "%s | saved_channel = %d\n", prefix, msg->saved_channel); + fprintf(out, "%s | saved_vc = %d\n", prefix, msg->saved_vc); + fprintf(out, "%s | next_stop = %d\n", prefix, msg->next_stop); + fprintf(out, "%s | this_router_arrival = %f\n", prefix, msg->this_router_arrival); + fprintf(out, "%s | this_router_ptp_latency = %f\n", prefix, msg->this_router_ptp_latency); + fprintf(out, "%s | intm_lp_id = %u\n", prefix, msg->intm_lp_id); + fprintf(out, "%s | last_hop = %d\n", prefix, msg->last_hop); + fprintf(out, "%s | is_intm_visited = %d\n", prefix, msg->is_intm_visited); + fprintf(out, "%s | intm_rtr_id = %d\n", prefix, msg->intm_rtr_id); + fprintf(out, "%s | intm_grp_id = %d\n", prefix, msg->intm_grp_id); + fprintf(out, "%s | saved_src_dest = %d\n", prefix, msg->saved_src_dest); + fprintf(out, "%s | saved_src_chan = %d\n", prefix, msg->saved_src_chan); + fprintf(out, "%s | chunk_id = %u\n", prefix, msg->chunk_id); + fprintf(out, "%s | packet_size = %u\n", prefix, msg->packet_size); + fprintf(out, "%s | message_id = %u\n", prefix, msg->message_id); + fprintf(out, "%s | total_size = %u\n", prefix, msg->total_size); + fprintf(out, "%s | remote_event_size_bytes = %d\n", prefix, msg->remote_event_size_bytes); + fprintf(out, "%s | local_event_size_bytes = %d\n", prefix, msg->local_event_size_bytes); + fprintf(out, "%s | vc_index = %d\n", prefix, msg->vc_index); + fprintf(out, "%s | rail_id = %d\n", prefix, msg->rail_id); + fprintf(out, "%s | output_chan = %d\n", prefix, msg->output_chan); + fprintf(out, "%s | event_rc = \n", prefix); + fprintf(out, "%s | is_pull = %d\n", prefix, msg->is_pull); + fprintf(out, "%s | pull_size = %u\n", prefix, msg->pull_size); + fprintf(out, "%s | path_type = %d\n", prefix, msg->path_type); + fprintf(out, "%s | saved_app_id = %d\n", prefix, msg->saved_app_id); + fprintf(out, "%s | is_there_another_pckt_in_queue = %s\n", prefix, msg->is_there_another_pckt_in_queue ? "true" : "false"); + fprintf(out, "%s | num_rngs = %d\n", prefix, msg->num_rngs); + fprintf(out, "%s | num_cll = %d\n", prefix, msg->num_cll); + fprintf(out, "%s | last_saved_qos = %d\n", prefix, msg->last_saved_qos); + fprintf(out, "%s | qos_reset1 = %d\n", prefix, msg->qos_reset1); + fprintf(out, "%s | qos_reset2 = %d\n", prefix, msg->qos_reset2); + fprintf(out, "%s | rc_is_qos_set = %d\n", prefix, msg->rc_is_qos_set); + fprintf(out, "%s | * rc_qos_data = %p\n", prefix, msg->rc_qos_data); + fprintf(out, "%s | * rc_qos_status = %p\n", prefix, msg->rc_qos_status); + fprintf(out, "%s | saved_send_loop = %d\n", prefix, msg->saved_send_loop); + fprintf(out, "%s | saved_available_time = %f\n", prefix, msg->saved_available_time); + fprintf(out, "%s | saved_min_lat = %f\n", prefix, msg->saved_min_lat); + fprintf(out, "%s | saved_avg_time = %f\n", prefix, msg->saved_avg_time); + fprintf(out, "%s | saved_rcv_time = %f\n", prefix, msg->saved_rcv_time); + fprintf(out, "%s | saved_busy_time = %f\n", prefix, msg->saved_busy_time); + fprintf(out, "%s | saved_total_time = %f\n", prefix, msg->saved_total_time); + fprintf(out, "%s | saved_sample_time = %f\n", prefix, msg->saved_sample_time); + fprintf(out, "%s | msg_start_time = %f\n", prefix, msg->msg_start_time); + fprintf(out, "%s | saved_busy_time_ross = %f\n", prefix, msg->saved_busy_time_ross); + fprintf(out, "%s | saved_fin_chunks_ross = %f\n", prefix, msg->saved_fin_chunks_ross); + fprintf(out, "%s | saved_last_in_queue_time = %f\n", prefix, msg->saved_last_in_queue_time); + fprintf(out, "%s | saved_next_packet_delay = %f\n", prefix, msg->saved_next_packet_delay); + fprintf(out, "%s | msg_new_mn_event = %f\n", prefix, msg->msg_new_mn_event); + fprintf(out, "%s | last_received_time = %f\n", prefix, msg->last_received_time); + fprintf(out, "%s | last_sent_time = %f\n", prefix, msg->last_sent_time); + fprintf(out, "%s | last_bufupdate_time = %f\n", prefix, msg->last_bufupdate_time); } //*** ---------- END OF reverse handler checking functions ---------- *** diff --git a/src/workload/codes-workload.c b/src/workload/codes-workload.c index a66e517a..45efc8c0 100644 --- a/src/workload/codes-workload.c +++ b/src/workload/codes-workload.c @@ -591,79 +591,79 @@ char const * const op_type_string(enum codes_workload_op_type op_type) { } // Initial implementation by Claude.ai -void fprint_codes_workload_op(FILE * out, struct codes_workload_op * op, char const * const begin) { +void fprint_codes_workload_op(FILE * out, char const * prefix, struct codes_workload_op * op) { if (op == NULL) { return; } // Print common fields first - fprintf(out, "%sop_type = %s\n", begin, op_type_string(op->op_type)); + fprintf(out, "%sop_type = %s\n", prefix, op_type_string(op->op_type)); - fprintf(out, "%s start_time = %f\n", begin, op->start_time); - fprintf(out, "%s end_time = %f\n", begin, op->end_time); - fprintf(out, "%s sim_start_time = %f\n", begin, op->sim_start_time); - fprintf(out, "%s sequence_id = %ld\n", begin, op->sequence_id); + fprintf(out, "%s start_time = %f\n", prefix, op->start_time); + fprintf(out, "%s end_time = %f\n", prefix, op->end_time); + fprintf(out, "%s sim_start_time = %f\n", prefix, op->sim_start_time); + fprintf(out, "%s sequence_id = %ld\n", prefix, op->sequence_id); // Print union fields based on op_type switch(op->op_type) { case CODES_WK_DELAY: - fprintf(out, "%s delay.seconds = %f\n", begin, op->u.delay.seconds); - fprintf(out, "%s delay.nsecs = %f\n", begin, op->u.delay.nsecs); + fprintf(out, "%s delay.seconds = %f\n", prefix, op->u.delay.seconds); + fprintf(out, "%s delay.nsecs = %f\n", prefix, op->u.delay.nsecs); break; case CODES_WK_BARRIER: - fprintf(out, "%s barrier.count = %d\n", begin, op->u.barrier.count); - fprintf(out, "%s barrier.root = %d\n", begin, op->u.barrier.root); + fprintf(out, "%s barrier.count = %d\n", prefix, op->u.barrier.count); + fprintf(out, "%s barrier.root = %d\n", prefix, op->u.barrier.root); break; case CODES_WK_OPEN: case CODES_WK_MPI_OPEN: case CODES_WK_MPI_COLL_OPEN: - fprintf(out, "%s open.file_id = %lu\n", begin, op->u.open.file_id); - fprintf(out, "%s open.create_flag = %d\n", begin, op->u.open.create_flag); + fprintf(out, "%s open.file_id = %lu\n", prefix, op->u.open.file_id); + fprintf(out, "%s open.create_flag = %d\n", prefix, op->u.open.create_flag); break; case CODES_WK_WRITE: case CODES_WK_MPI_WRITE: case CODES_WK_MPI_COLL_WRITE: - fprintf(out, "%s write.file_id = %lu\n", begin, op->u.write.file_id); - fprintf(out, "%s write.offset = %ld\n", begin, op->u.write.offset); - fprintf(out, "%s write.size = %zu\n", begin, op->u.write.size); + fprintf(out, "%s write.file_id = %lu\n", prefix, op->u.write.file_id); + fprintf(out, "%s write.offset = %ld\n", prefix, op->u.write.offset); + fprintf(out, "%s write.size = %zu\n", prefix, op->u.write.size); break; case CODES_WK_READ: case CODES_WK_MPI_READ: case CODES_WK_MPI_COLL_READ: - fprintf(out, "%s read.file_id = %lu\n", begin, op->u.read.file_id); - fprintf(out, "%s read.offset = %ld\n", begin, op->u.read.offset); - fprintf(out, "%s read.size = %zu\n", begin, op->u.read.size); + fprintf(out, "%s read.file_id = %lu\n", prefix, op->u.read.file_id); + fprintf(out, "%s read.offset = %ld\n", prefix, op->u.read.offset); + fprintf(out, "%s read.size = %zu\n", prefix, op->u.read.size); break; case CODES_WK_CLOSE: case CODES_WK_MPI_CLOSE: - fprintf(out, "%s close.file_id = %lu\n", begin, op->u.close.file_id); + fprintf(out, "%s close.file_id = %lu\n", prefix, op->u.close.file_id); break; case CODES_WK_SEND: case CODES_WK_ISEND: - fprintf(out, "%s send.source_rank = %d\n", begin, op->u.send.source_rank); - fprintf(out, "%s send.dest_rank = %d\n", begin, op->u.send.dest_rank); - fprintf(out, "%s send.num_bytes = %ld\n", begin, op->u.send.num_bytes); - fprintf(out, "%s send.data_type = %d\n", begin, op->u.send.data_type); - fprintf(out, "%s send.count = %d\n", begin, op->u.send.count); - fprintf(out, "%s send.tag = %d\n", begin, op->u.send.tag); - fprintf(out, "%s send.req_id = %u\n", begin, op->u.send.req_id); + fprintf(out, "%s send.source_rank = %d\n", prefix, op->u.send.source_rank); + fprintf(out, "%s send.dest_rank = %d\n", prefix, op->u.send.dest_rank); + fprintf(out, "%s send.num_bytes = %ld\n", prefix, op->u.send.num_bytes); + fprintf(out, "%s send.data_type = %d\n", prefix, op->u.send.data_type); + fprintf(out, "%s send.count = %d\n", prefix, op->u.send.count); + fprintf(out, "%s send.tag = %d\n", prefix, op->u.send.tag); + fprintf(out, "%s send.req_id = %u\n", prefix, op->u.send.req_id); break; case CODES_WK_RECV: case CODES_WK_IRECV: - fprintf(out, "%s recv.source_rank = %d\n", begin, op->u.recv.source_rank); - fprintf(out, "%s recv.dest_rank = %d\n", begin, op->u.recv.dest_rank); - fprintf(out, "%s recv.num_bytes = %ld\n", begin, op->u.recv.num_bytes); - fprintf(out, "%s recv.data_type = %d\n", begin, op->u.recv.data_type); - fprintf(out, "%s recv.count = %d\n", begin, op->u.recv.count); - fprintf(out, "%s recv.tag = %d\n", begin, op->u.recv.tag); - fprintf(out, "%s recv.req_id = %u\n", begin, op->u.recv.req_id); + fprintf(out, "%s recv.source_rank = %d\n", prefix, op->u.recv.source_rank); + fprintf(out, "%s recv.dest_rank = %d\n", prefix, op->u.recv.dest_rank); + fprintf(out, "%s recv.num_bytes = %ld\n", prefix, op->u.recv.num_bytes); + fprintf(out, "%s recv.data_type = %d\n", prefix, op->u.recv.data_type); + fprintf(out, "%s recv.count = %d\n", prefix, op->u.recv.count); + fprintf(out, "%s recv.tag = %d\n", prefix, op->u.recv.tag); + fprintf(out, "%s recv.req_id = %u\n", prefix, op->u.recv.req_id); break; case CODES_WK_COL: @@ -674,23 +674,23 @@ void fprint_codes_workload_op(FILE * out, struct codes_workload_op * op, char co case CODES_WK_ALLTOALLV: case CODES_WK_REDUCE: case CODES_WK_ALLREDUCE: - fprintf(out, "%scollective.num_bytes = %d\n", begin, op->u.collective.num_bytes); + fprintf(out, "%scollective.num_bytes = %d\n", prefix, op->u.collective.num_bytes); break; case CODES_WK_WAITALL: case CODES_WK_WAITSOME: case CODES_WK_WAITANY: case CODES_WK_TESTALL: - fprintf(out, "%s waits.count = %d\n", begin, op->u.waits.count); - fprintf(out, "%s waits.req_ids = %p\n", begin, op->u.waits.req_ids); + fprintf(out, "%s waits.count = %d\n", prefix, op->u.waits.count); + fprintf(out, "%s waits.req_ids = %p\n", prefix, op->u.waits.req_ids); break; case CODES_WK_WAIT: - fprintf(out, "%s wait.req_id = %u\n", begin, op->u.wait.req_id); + fprintf(out, "%s wait.req_id = %u\n", prefix, op->u.wait.req_id); break; case CODES_WK_REQ_FREE: - fprintf(out, "%s free.req_id = %u\n", begin, op->u.free.req_id); + fprintf(out, "%s free.req_id = %u\n", prefix, op->u.free.req_id); break; case CODES_WK_END: From ca89cf14591bcae883f3b535c6ff64763b04af6b Mon Sep 17 00:00:00 2001 From: helq Date: Fri, 7 Mar 2025 17:14:43 -0500 Subject: [PATCH 030/110] Small implementation fixes (typo and exporting function name) --- codes/net/dragonfly-dally.h | 2 ++ src/networks/model-net/core/model-net-lp.c | 6 +++++- src/networks/model-net/dragonfly-dally.C | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/codes/net/dragonfly-dally.h b/codes/net/dragonfly-dally.h index b5d93b88..dc73d145 100644 --- a/codes/net/dragonfly-dally.h +++ b/codes/net/dragonfly-dally.h @@ -136,6 +136,8 @@ struct terminal_dally_message tw_stime last_bufupdate_time; }; +void print_terminal_dally_message(FILE * out, char const * prefix, struct terminal_dally_message * msg); + #ifdef __cplusplus } #endif diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c index 1a065c8f..92d3dfe1 100644 --- a/src/networks/model-net/core/model-net-lp.c +++ b/src/networks/model-net/core/model-net-lp.c @@ -1176,7 +1176,7 @@ static void save_state_net_state(model_net_base_state * into, model_net_base_sta into->sub_state = NULL; crv_checkpointer * chptr = method_array[from->net_id]->checkpointer; - if (chptr && chptr->check_lps) { + if (chptr && chptr->save_lp) { into->sub_state = calloc(1, from->sub_type->state_sz); chptr->save_lp(into->sub_state, from->sub_state); } @@ -1201,6 +1201,10 @@ static void clean_state_net_state(model_net_base_state * state) { free(state->sched_recv); if (state->sub_state != NULL) { + crv_checkpointer * chptr = method_array[state->net_id]->checkpointer; + if (chptr && chptr->clean_lp) { + chptr->clean_lp(state->sub_state); + } free(state->sub_state); } free(state->node_copy_next_available_time); diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index bb067dcd..9cb3a6fe 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -7003,7 +7003,7 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state } // Print fuction originally constructed with help from Claude.ai -static void print_terminal_dally_message(FILE * out, char const * prefix, struct terminal_dally_message * msg) { +void print_terminal_dally_message(FILE * out, char const * prefix, struct terminal_dally_message * msg) { fprintf(out, "%sterminal_dally_message ->\n", prefix); fprintf(out, "%s | magic = %d\n", prefix, msg->magic); fprintf(out, "%s | travel_start_time = %f\n", prefix, msg->travel_start_time); From 2dd6db52f5dcc3ec6cf2d470191736b42e122438 Mon Sep 17 00:00:00 2001 From: helq Date: Fri, 7 Mar 2025 17:16:55 -0500 Subject: [PATCH 031/110] Implementing base deep-copy/clean/comparison/print for dragonfly lps --- src/networks/model-net/dragonfly-dally.C | 216 +++++++++++++++++------ 1 file changed, 158 insertions(+), 58 deletions(-) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 9cb3a6fe..b986ab32 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -517,9 +517,6 @@ struct terminal_state DragonflyConnectionManager connMan; tlc_state *local_congestion_controller; - map workload_lpid_to_app_id; - set app_ids; - int workloads_finished_flag; int** vc_occupancy; // vc_occupancies [rail_id][qos_level] @@ -596,10 +593,10 @@ struct terminal_state // Variables to recover latency of packets sent to other terminals // Sent packets (to be populated at by commit handler of packet sender) map sent_packets; - uint64_t last_packet_sent_id; + int64_t last_packet_sent_id; // We need the next packet to be injected in the network before feeding the packet info forward (the predictor needs starting time, delay to send next packet and latency) struct { - uint64_t packet_ID; + int64_t packet_ID; double travel_end_time; } arrival_of_last_packet; // received (and not completed, yet) packets. The value associated to a key is the remaining number of "bytes" to receive before the packet is consumed totally. If a packet size == chunk size, this map will never be used/filled @@ -3039,12 +3036,12 @@ static void dragonfly_dally_terminal_highdef_to_surrogate( s->finished_msgs = frozen_state->finished_msgs; s->rank_tbl_pop = frozen_state->rank_tbl_pop; s->last_packet_sent_id = frozen_state->last_packet_sent_id; + s->rank_tbl = frozen_state->rank_tbl; + s->st = frozen_state->st; memcpy(&s->arrival_of_last_packet, &frozen_state->arrival_of_last_packet, sizeof(s->arrival_of_last_packet)); memcpy(&s->zombies, &frozen_state->zombies, sizeof(s->zombies)); memcpy(&s->sent_packets, &frozen_state->sent_packets, sizeof(s->sent_packets)); memcpy(&s->remaining_sz_packets, &frozen_state->remaining_sz_packets, sizeof(s->remaining_sz_packets)); - memcpy(&s->rank_tbl, &frozen_state->rank_tbl, sizeof(s->rank_tbl)); - memcpy(&s->st, &frozen_state->st, sizeof(s->st)); s->frozen_state = frozen_state; }; @@ -3080,12 +3077,12 @@ static void dragonfly_dally_terminal_surrogate_to_highdef( frozen_state->finished_msgs = s->finished_msgs; frozen_state->rank_tbl_pop = s->rank_tbl_pop; frozen_state->last_packet_sent_id = s->last_packet_sent_id; + frozen_state->rank_tbl = s->rank_tbl; + frozen_state->st = s->st; memcpy(&frozen_state->arrival_of_last_packet, &s->arrival_of_last_packet, sizeof(s->arrival_of_last_packet)); memcpy(&frozen_state->zombies, &s->zombies, sizeof(s->zombies)); memcpy(&frozen_state->sent_packets, &s->sent_packets, sizeof(s->sent_packets)); memcpy(&frozen_state->remaining_sz_packets, &s->remaining_sz_packets, sizeof(s->remaining_sz_packets)); - memcpy(&frozen_state->rank_tbl, &s->rank_tbl, sizeof(s->rank_tbl)); - memcpy(&frozen_state->st, &s->st, sizeof(s->st)); memcpy(s, frozen_state, sizeof(terminal_state)); memset(frozen_state, 0, sizeof(terminal_state)); free(frozen_state); @@ -3472,9 +3469,6 @@ static void terminal_dally_init( terminal_state * s, tw_lp * lp ) codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, NULL, 1, s->router_id[i] / num_routers_per_mgrp, s->router_id[i] % num_routers_per_mgrp, &s->router_lp[i]); } - s->workload_lpid_to_app_id = map(); - s->app_ids = set(); - s->terminal_available_time = (tw_stime*)calloc(p->num_rails, sizeof(tw_stime)); s->packet_counter = 0; s->min_latency = INT_MAX; @@ -6925,6 +6919,103 @@ static void router_dally_rc_event_handler(router_state * s, tw_bf * bf, } //*** ---------- START OF reverse handler checking functions ---------- *** +static void save_terminal_state(terminal_state *into, terminal_state const *from) { + memcpy(into, from, sizeof(terminal_state)); +} + +static void clean_terminal_state(terminal_state *state) { +} + +static bool check_terminal_state(terminal_state *before, terminal_state *after) { + bool is_same = true; + + // Compare scalar values + is_same &= (before->packet_counter == after->packet_counter); + is_same &= (before->packet_gen == after->packet_gen); + is_same &= (before->packet_fin == after->packet_fin); + is_same &= (before->total_gen_size == after->total_gen_size); + is_same &= (before->terminal_id == after->terminal_id); + is_same &= (before->workloads_finished_flag == after->workloads_finished_flag); + is_same &= (before->is_monitoring_bw == after->is_monitoring_bw); + is_same &= (before->rank_tbl_pop == after->rank_tbl_pop); + is_same &= (before->total_time == after->total_time); + is_same &= (before->total_msg_size == after->total_msg_size); + is_same &= (before->total_hops == after->total_hops); + is_same &= (before->finished_msgs == after->finished_msgs); + is_same &= (before->finished_chunks == after->finished_chunks); + is_same &= (before->finished_packets == after->finished_packets); + is_same &= (before->injected_chunks == after->injected_chunks); + is_same &= (before->ejected_chunks == after->ejected_chunks); + is_same &= (before->max_latency == after->max_latency); + is_same &= (before->min_latency == after->min_latency); + is_same &= (before->fin_chunks_sample == after->fin_chunks_sample); + is_same &= (before->data_size_sample == after->data_size_sample); + is_same &= (before->fin_hops_sample == after->fin_hops_sample); + is_same &= (before->fin_chunks_time == after->fin_chunks_time); + is_same &= (before->op_arr_size == after->op_arr_size); + is_same &= (before->max_arr_size == after->max_arr_size); + //is_same &= (before->fwd_events == after->fwd_events); // This is used for statistics, they are never changed when rollbacking + //is_same &= (before->rev_events == after->rev_events); // This is used for statistics, they are never changed when rollbacking + is_same &= (before->fin_chunks_ross_sample == after->fin_chunks_ross_sample); + is_same &= (before->data_size_ross_sample == after->data_size_ross_sample); + is_same &= (before->fin_hops_ross_sample == after->fin_hops_ross_sample); + is_same &= (before->fin_chunks_time_ross_sample == after->fin_chunks_time_ross_sample); + is_same &= (before->last_packet_sent_id == after->last_packet_sent_id); + is_same &= (before->last_in_queue_time == after->last_in_queue_time); + + // Compare arrival_of_last_packet struct + is_same &= (before->arrival_of_last_packet.packet_ID == after->arrival_of_last_packet.packet_ID); + is_same &= (before->arrival_of_last_packet.travel_end_time == after->arrival_of_last_packet.travel_end_time); + + // Compare arrays (assumes params is the same for both) + assert(before->params == after->params); + //if (before->params && after->params && before->params->num_rails == after->params->num_rails) { + // for (int i = 0; i < before->params->num_rails; i++) { + // is_same &= (before->router_lp[i] == after->router_lp[i]); + // is_same &= (before->router_id[i] == after->router_id[i]); + // } + //} else { + // is_same = false; + //} + + // Compare string buffers + is_same &= (strncmp(before->output_buf, after->output_buf, 4096) == 0); + is_same &= (strncmp(before->output_buf2, after->output_buf2, 4096) == 0); + is_same &= (strncmp(before->sample_buf, after->sample_buf, 4096) == 0); + + // Compare anno strings (handling NULL case) + if (before->anno && after->anno) { + is_same &= (strcmp(before->anno, after->anno) == 0); + } else { + is_same &= (before->anno == after->anno); + } + + // Compare pointers (just checking if they're both NULL or both non-NULL) + //is_same &= ((before->local_congestion_controller == NULL) == (after->local_congestion_controller == NULL)); + //is_same &= ((before->vc_occupancy == NULL) == (after->vc_occupancy == NULL)); + //is_same &= ((before->terminal_available_time == NULL) == (after->terminal_available_time == NULL)); + //is_same &= ((before->terminal_msgs == NULL) == (after->terminal_msgs == NULL)); + //is_same &= ((before->in_send_loop == NULL) == (after->in_send_loop == NULL)); + //is_same &= ((before->qos_status == NULL) == (after->qos_status == NULL)); + //is_same &= ((before->qos_data == NULL) == (after->qos_data == NULL)); + //is_same &= ((before->last_qos_lvl == NULL) == (after->last_qos_lvl == NULL)); + //is_same &= ((before->issueIdle == NULL) == (after->issueIdle == NULL)); + //is_same &= ((before->terminal_length == NULL) == (after->terminal_length == NULL)); + //is_same &= ((before->rank_tbl == NULL) == (after->rank_tbl == NULL)); + //is_same &= ((before->last_buf_full == NULL) == (after->last_buf_full == NULL)); + //is_same &= ((before->busy_time == NULL) == (after->busy_time == NULL)); + //is_same &= ((before->link_traffic == NULL) == (after->link_traffic == NULL)); + //is_same &= ((before->total_chunks == NULL) == (after->total_chunks == NULL)); + //is_same &= ((before->stalled_chunks == NULL) == (after->stalled_chunks == NULL)); + //is_same &= ((before->busy_time_sample == NULL) == (after->busy_time_sample == NULL)); + //is_same &= ((before->sample_stat == NULL) == (after->sample_stat == NULL)); + //is_same &= ((before->busy_time_ross_sample == NULL) == (after->busy_time_ross_sample == NULL)); + //is_same &= ((before->predictor_data == NULL) == (after->predictor_data == NULL)); + is_same &= ((before->frozen_state == NULL) && (after->frozen_state == NULL)); + + return is_same; +} + // Print fuction originally constructed with help from Claude.ai static void print_terminal_state(FILE * out, char const * prefix, terminal_state * state) { fprintf(out, "%sterminal_state (dragonfly-dally) ->\n", prefix); @@ -6932,13 +7023,22 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state fprintf(out, "%s | packet_gen = %d\n", prefix, state->packet_gen); fprintf(out, "%s | packet_fin = %d\n", prefix, state->packet_fin); fprintf(out, "%s | total_gen_size = %d\n", prefix, state->total_gen_size); - fprintf(out, "%s | * router_lp = %p\n", prefix, state->router_lp); - fprintf(out, "%s | * router_id = %p\n", prefix, state->router_id); + + fprintf(out, "%s | * router_lp[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%lu", i ? ", " : "", state->router_lp[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * router_id[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%u", i ? ", " : "", state->router_id[i]); + } + fprintf(out, "]\n"); + fprintf(out, "%s | terminal_id = %u\n", prefix, state->terminal_id); fprintf(out, "%s | connMan = \n", prefix); fprintf(out, "%s | *local_congestion_controller = %p\n", prefix, state->local_congestion_controller); - fprintf(out, "%s | workload_lpid_to_app_id = \n", prefix); - fprintf(out, "%s | app_ids = \n", prefix); fprintf(out, "%s | workloads_finished_flag = %d\n", prefix, state->workloads_finished_flag); fprintf(out, "%s | ** vc_occupancy = %p\n", prefix, state->vc_occupancy); fprintf(out, "%s | *terminal_available_time = %p\n", prefix, state->terminal_available_time); @@ -6958,9 +7058,9 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state fprintf(out, "%s | * params = %p\n", prefix, state->params); fprintf(out, "%s | * rank_tbl = %p\n", prefix, state->rank_tbl); fprintf(out, "%s | rank_tbl_pop = %lu\n", prefix, state->rank_tbl_pop); - fprintf(out, "%s | total_time = %f\n", prefix, state->total_time); + fprintf(out, "%s | total_time = %g\n", prefix, state->total_time); fprintf(out, "%s | total_msg_size = %lu\n", prefix, state->total_msg_size); - fprintf(out, "%s | total_hops = %f\n", prefix, state->total_hops); + fprintf(out, "%s | total_hops = %g\n", prefix, state->total_hops); fprintf(out, "%s | finished_msgs = %ld\n", prefix, state->finished_msgs); fprintf(out, "%s | finished_chunks = %ld\n", prefix, state->finished_chunks); fprintf(out, "%s | finished_packets = %ld\n", prefix, state->finished_packets); @@ -6971,17 +7071,17 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state fprintf(out, "%s | * stalled_chunks = %p\n", prefix, state->stalled_chunks); fprintf(out, "%s | injected_chunks = %lu\n", prefix, state->injected_chunks); fprintf(out, "%s | ejected_chunks = %lu\n", prefix, state->ejected_chunks); - fprintf(out, "%s | max_latency = %f\n", prefix, state->max_latency); - fprintf(out, "%s | min_latency = %f\n", prefix, state->min_latency); + fprintf(out, "%s | max_latency = %g\n", prefix, state->max_latency); + fprintf(out, "%s | min_latency = %g\n", prefix, state->min_latency); fprintf(out, "%s | output_buf = '%.4096s'\n", prefix, state->output_buf); fprintf(out, "%s | output_buf2 = '%.4096s'\n", prefix, state->output_buf2); fprintf(out, "%s | fin_chunks_sample = %ld\n", prefix, state->fin_chunks_sample); fprintf(out, "%s | data_size_sample = %ld\n", prefix, state->data_size_sample); - fprintf(out, "%s | fin_hops_sample = %f\n", prefix, state->fin_hops_sample); - fprintf(out, "%s | fin_chunks_time = %f\n", prefix, state->fin_chunks_time); + fprintf(out, "%s | fin_hops_sample = %g\n", prefix, state->fin_hops_sample); + fprintf(out, "%s | fin_chunks_time = %g\n", prefix, state->fin_chunks_time); fprintf(out, "%s | * busy_time_sample = %p\n", prefix, state->busy_time_sample); fprintf(out, "%s | sample_buf = '%.4096s'\n", prefix, state->sample_buf); - fprintf(out, "%s | * sample_stat = %p\n", prefix, state->sample_stat); + fprintf(out, "%s | * sample_stat = %p\n", prefix, state->sample_stat); // ingnoring as this part of the code is never used. Originally part of instrumentation fprintf(out, "%s | op_arr_size = %d\n", prefix, state->op_arr_size); fprintf(out, "%s | max_arr_size = %d\n", prefix, state->max_arr_size); fprintf(out, "%s | fwd_events = %ld\n", prefix, state->fwd_events); @@ -6989,14 +7089,14 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state fprintf(out, "%s | fin_chunks_ross_sample = %ld\n", prefix, state->fin_chunks_ross_sample); fprintf(out, "%s | data_size_ross_sample = %ld\n", prefix, state->data_size_ross_sample); fprintf(out, "%s | fin_hops_ross_sample = %ld\n", prefix, state->fin_hops_ross_sample); - fprintf(out, "%s | fin_chunks_time_ross_sample = %f\n", prefix, state->fin_chunks_time_ross_sample); - fprintf(out, "%s | * busy_time_ross_sample = %p\n", prefix, state->busy_time_ross_sample); - fprintf(out, "%s | ross_sample = \n", prefix); + fprintf(out, "%s | fin_chunks_time_ross_sample = %g\n", prefix, state->fin_chunks_time_ross_sample); + fprintf(out, "%s | * busy_time_ross_sample = %p\n", prefix, state->busy_time_ross_sample); // ingnoring as this part of the code is never used. Originally part of instrumentation + fprintf(out, "%s | ross_sample = \n", prefix); // ingnoring as this part of the code is never used. Originally part of instrumentation fprintf(out, "%s | sent_packets = \n", prefix); - fprintf(out, "%s | last_packet_sent_id = %lu\n", prefix, state->last_packet_sent_id); - fprintf(out, "%s | arrival_of_last_packet = {packet_ID: %lu, travel_end_time: %f}\n", prefix, state->arrival_of_last_packet.packet_ID, state->arrival_of_last_packet.travel_end_time); + fprintf(out, "%s | last_packet_sent_id = %ld\n", prefix, state->last_packet_sent_id); + fprintf(out, "%s | arrival_of_last_packet = {packet_ID: %ld, travel_end_time: %g}\n", prefix, state->arrival_of_last_packet.packet_ID, state->arrival_of_last_packet.travel_end_time); fprintf(out, "%s | remaining_sz_packets = \n", prefix); - fprintf(out, "%s | last_in_queue_time = %f\n", prefix, state->last_in_queue_time); + fprintf(out, "%s | last_in_queue_time = %g\n", prefix, state->last_in_queue_time); fprintf(out, "%s | * predictor_data = %p\n", prefix, state->predictor_data); fprintf(out, "%s | zombies = \n", prefix); fprintf(out, "%s | * frozen_state = %p\n", prefix, state->frozen_state); @@ -7006,12 +7106,12 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state void print_terminal_dally_message(FILE * out, char const * prefix, struct terminal_dally_message * msg) { fprintf(out, "%sterminal_dally_message ->\n", prefix); fprintf(out, "%s | magic = %d\n", prefix, msg->magic); - fprintf(out, "%s | travel_start_time = %f\n", prefix, msg->travel_start_time); - fprintf(out, "%s | travel_end_time = %f\n", prefix, msg->travel_end_time); + fprintf(out, "%s | travel_start_time = %g\n", prefix, msg->travel_start_time); + fprintf(out, "%s | travel_end_time = %g\n", prefix, msg->travel_end_time); fprintf(out, "%s | packet_ID = %llu\n", prefix, msg->packet_ID); fprintf(out, "%s | type = %d\n", prefix, msg->type); fprintf(out, "%s | notify_type = %d\n", prefix, msg->notify_type); - fprintf(out, "%s | category = %s\n", prefix, msg->category); + fprintf(out, "%s | category = %.16s\n", prefix, msg->category); fprintf(out, "%s | final_dest_gid = %lu\n", prefix, msg->final_dest_gid); fprintf(out, "%s | sender_lp = %lu\n", prefix, msg->sender_lp); fprintf(out, "%s | sender_mn_lp = %lu\n", prefix, msg->sender_mn_lp); @@ -7028,8 +7128,8 @@ void print_terminal_dally_message(FILE * out, char const * prefix, struct termin fprintf(out, "%s | saved_channel = %d\n", prefix, msg->saved_channel); fprintf(out, "%s | saved_vc = %d\n", prefix, msg->saved_vc); fprintf(out, "%s | next_stop = %d\n", prefix, msg->next_stop); - fprintf(out, "%s | this_router_arrival = %f\n", prefix, msg->this_router_arrival); - fprintf(out, "%s | this_router_ptp_latency = %f\n", prefix, msg->this_router_ptp_latency); + fprintf(out, "%s | this_router_arrival = %g\n", prefix, msg->this_router_arrival); + fprintf(out, "%s | this_router_ptp_latency = %g\n", prefix, msg->this_router_ptp_latency); fprintf(out, "%s | intm_lp_id = %u\n", prefix, msg->intm_lp_id); fprintf(out, "%s | last_hop = %d\n", prefix, msg->last_hop); fprintf(out, "%s | is_intm_visited = %d\n", prefix, msg->is_intm_visited); @@ -7046,12 +7146,12 @@ void print_terminal_dally_message(FILE * out, char const * prefix, struct termin fprintf(out, "%s | vc_index = %d\n", prefix, msg->vc_index); fprintf(out, "%s | rail_id = %d\n", prefix, msg->rail_id); fprintf(out, "%s | output_chan = %d\n", prefix, msg->output_chan); - fprintf(out, "%s | event_rc = \n", prefix); + fprintf(out, "%s | event_rc = %d\n", prefix, msg->event_rc); fprintf(out, "%s | is_pull = %d\n", prefix, msg->is_pull); fprintf(out, "%s | pull_size = %u\n", prefix, msg->pull_size); fprintf(out, "%s | path_type = %d\n", prefix, msg->path_type); fprintf(out, "%s | saved_app_id = %d\n", prefix, msg->saved_app_id); - fprintf(out, "%s | is_there_another_pckt_in_queue = %s\n", prefix, msg->is_there_another_pckt_in_queue ? "true" : "false"); + fprintf(out, "%s | is_there_another_pckt_in_queue = %d\n", prefix, msg->is_there_another_pckt_in_queue); fprintf(out, "%s | num_rngs = %d\n", prefix, msg->num_rngs); fprintf(out, "%s | num_cll = %d\n", prefix, msg->num_cll); fprintf(out, "%s | last_saved_qos = %d\n", prefix, msg->last_saved_qos); @@ -7061,22 +7161,22 @@ void print_terminal_dally_message(FILE * out, char const * prefix, struct termin fprintf(out, "%s | * rc_qos_data = %p\n", prefix, msg->rc_qos_data); fprintf(out, "%s | * rc_qos_status = %p\n", prefix, msg->rc_qos_status); fprintf(out, "%s | saved_send_loop = %d\n", prefix, msg->saved_send_loop); - fprintf(out, "%s | saved_available_time = %f\n", prefix, msg->saved_available_time); - fprintf(out, "%s | saved_min_lat = %f\n", prefix, msg->saved_min_lat); - fprintf(out, "%s | saved_avg_time = %f\n", prefix, msg->saved_avg_time); - fprintf(out, "%s | saved_rcv_time = %f\n", prefix, msg->saved_rcv_time); - fprintf(out, "%s | saved_busy_time = %f\n", prefix, msg->saved_busy_time); - fprintf(out, "%s | saved_total_time = %f\n", prefix, msg->saved_total_time); - fprintf(out, "%s | saved_sample_time = %f\n", prefix, msg->saved_sample_time); - fprintf(out, "%s | msg_start_time = %f\n", prefix, msg->msg_start_time); - fprintf(out, "%s | saved_busy_time_ross = %f\n", prefix, msg->saved_busy_time_ross); - fprintf(out, "%s | saved_fin_chunks_ross = %f\n", prefix, msg->saved_fin_chunks_ross); - fprintf(out, "%s | saved_last_in_queue_time = %f\n", prefix, msg->saved_last_in_queue_time); - fprintf(out, "%s | saved_next_packet_delay = %f\n", prefix, msg->saved_next_packet_delay); - fprintf(out, "%s | msg_new_mn_event = %f\n", prefix, msg->msg_new_mn_event); - fprintf(out, "%s | last_received_time = %f\n", prefix, msg->last_received_time); - fprintf(out, "%s | last_sent_time = %f\n", prefix, msg->last_sent_time); - fprintf(out, "%s | last_bufupdate_time = %f\n", prefix, msg->last_bufupdate_time); + fprintf(out, "%s | saved_available_time = %g\n", prefix, msg->saved_available_time); + fprintf(out, "%s | saved_min_lat = %g\n", prefix, msg->saved_min_lat); + fprintf(out, "%s | saved_avg_time = %g\n", prefix, msg->saved_avg_time); + fprintf(out, "%s | saved_rcv_time = %g\n", prefix, msg->saved_rcv_time); + fprintf(out, "%s | saved_busy_time = %g\n", prefix, msg->saved_busy_time); + fprintf(out, "%s | saved_total_time = %g\n", prefix, msg->saved_total_time); + fprintf(out, "%s | saved_sample_time = %g\n", prefix, msg->saved_sample_time); + fprintf(out, "%s | msg_start_time = %g\n", prefix, msg->msg_start_time); + fprintf(out, "%s | saved_busy_time_ross = %g\n", prefix, msg->saved_busy_time_ross); + fprintf(out, "%s | saved_fin_chunks_ross = %g\n", prefix, msg->saved_fin_chunks_ross); + fprintf(out, "%s | saved_last_in_queue_time = %g\n", prefix, msg->saved_last_in_queue_time); + fprintf(out, "%s | saved_next_packet_delay = %g\n", prefix, msg->saved_next_packet_delay); + fprintf(out, "%s | msg_new_mn_event = %g\n", prefix, msg->msg_new_mn_event); + fprintf(out, "%s | last_received_time = %g\n", prefix, msg->last_received_time); + fprintf(out, "%s | last_sent_time = %g\n", prefix, msg->last_sent_time); + fprintf(out, "%s | last_bufupdate_time = %g\n", prefix, msg->last_bufupdate_time); } //*** ---------- END OF reverse handler checking functions ---------- *** @@ -7110,17 +7210,17 @@ tw_lptype dragonfly_dally_lps[] = crv_checkpointer dragonfly_dally_checkpointers[] = { { &dragonfly_dally_lps[0], - 0, - (save_checkpoint_state_f) NULL, - (clean_checkpoint_state_f) NULL, - (check_states_f) NULL, + sizeof(terminal_state), + (save_checkpoint_state_f) save_terminal_state, + (clean_checkpoint_state_f) clean_terminal_state, + (check_states_f) check_terminal_state, (print_lpstate_f) print_terminal_state, - (print_checkpoint_state_f) NULL, + (print_checkpoint_state_f) print_terminal_state, (print_event_f) print_terminal_dally_message, }, { &dragonfly_dally_lps[1], - 0, + sizeof(router_state), (save_checkpoint_state_f) NULL, (clean_checkpoint_state_f) NULL, (check_states_f) NULL, From 7aa4c1141677ecbc420759c43e1c677b750327ac Mon Sep 17 00:00:00 2001 From: helq Date: Fri, 7 Mar 2025 18:02:02 -0500 Subject: [PATCH 032/110] Printing sub_message contents of model-net message --- codes/net/dragonfly-dally.h | 2 +- src/network-workloads/model-net-mpi-replay.c | 2 +- src/networks/model-net/core/model-net-lp.c | 32 ++++++++++++++++---- src/networks/model-net/dragonfly-dally.C | 26 ++++++++++++++-- 4 files changed, 52 insertions(+), 10 deletions(-) diff --git a/codes/net/dragonfly-dally.h b/codes/net/dragonfly-dally.h index dc73d145..2647c4df 100644 --- a/codes/net/dragonfly-dally.h +++ b/codes/net/dragonfly-dally.h @@ -136,7 +136,7 @@ struct terminal_dally_message tw_stime last_bufupdate_time; }; -void print_terminal_dally_message(FILE * out, char const * prefix, struct terminal_dally_message * msg); +void print_terminal_dally_message(FILE * out, char const * prefix, void * s, struct terminal_dally_message * msg); #ifdef __cplusplus } diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 653f6f31..55649581 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -3795,7 +3795,7 @@ static char const * const MPI_NW_EVENTS_to_string(enum MPI_NW_EVENTS event_type) } // Original printing function from Claude.ai -static void print_nw_message(FILE * out, char const * prefix, struct nw_message * msg) { +static void print_nw_message(FILE * out, char const * prefix, nw_state* s, struct nw_message * msg) { fprintf(out, "%snw_message ->\n", prefix); fprintf(out, "%s | msg_type = %s\n", prefix, MPI_NW_EVENTS_to_string(msg->msg_type)); fprintf(out, "%s | op_type = %s\n", prefix, op_type_string(msg->op_type)); diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c index 92d3dfe1..6a2cf4f8 100644 --- a/src/networks/model-net/core/model-net-lp.c +++ b/src/networks/model-net/core/model-net-lp.c @@ -141,7 +141,7 @@ static void clean_state_net_state(model_net_base_state * state); static bool check_model_net_state(model_net_base_state * before, model_net_base_state * after); static void print_model_net_state(FILE * out, char const * prefix, model_net_base_state * state); static void print_model_net_checkpoint(FILE * out, char const * prefix, model_net_base_state * state); -static void print_event_state(FILE * out, char const * prefix, model_net_wrap_msg * state); +static void print_event_state(FILE * out, char const * prefix, model_net_base_state * s, model_net_wrap_msg * msg); // ROSS function pointer table to check reverse event handler crv_checkpointer model_net_chkptr = { @@ -1353,7 +1353,7 @@ void print_model_net_request(FILE * out, char const * prefix, model_net_request fprintf(out, "%sapp_id = %d\n", prefix, req->app_id); } -static void print_event_state(FILE * out, char const * prefix, model_net_wrap_msg * msg) { +static void print_event_state(FILE * out, char const * prefix, model_net_base_state * state, model_net_wrap_msg * msg) { fprintf(out, "%sh\n", prefix); fprintf(out, "%s| src = %lu\n", prefix, msg->h.src); fprintf(out, "%s| event_type = %d (%s)\n", prefix, msg->h.event_type, event_type_string(msg->h.event_type)); @@ -1369,6 +1369,8 @@ static void print_event_state(FILE * out, char const * prefix, model_net_wrap_ms char subprefix_2[len_subprefix]; snprintf(subprefix_2, len_subprefix, "%s%s", prefix, addprefix_2); + crv_checkpointer * chptr; + void * sub_msg; switch (msg->h.event_type) { case MN_BASE_NEW_MSG: case MN_BASE_SCHED_NEXT: @@ -1388,11 +1390,29 @@ static void print_event_state(FILE * out, char const * prefix, model_net_wrap_ms fprintf(out, "%s | | prio = %d\n", prefix, msg->msg.m_base.rc.prio); fprintf(out, "%s | created_in_surrogate = %d\n", prefix, msg->msg.m_base.created_in_surrogate); break; - default: - fprintf(out, "%sThe content of this message cannot be deciphered yet with the information given\n", prefix); + + case MN_BASE_SAMPLE: + case MN_BASE_PASS: + case MN_BASE_END_NOTIF: + // printing sub_msg + fprintf(out, "%ssub_msg ->\n", prefix); + chptr = method_array[state->net_id]->checkpointer; + sub_msg = ((char*)msg)+msg_offsets[state->net_id]; + if (chptr && chptr->print_event) { + char addprefix[] = " | "; + int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix) + 1; + char subprefix[len_subprefix]; + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix); + chptr->print_event(out, subprefix, state->sub_state, sub_msg); + } else { + fprintf(out, "%s | == cannot print the submessage (event print function not yet defined for network of type %s) ==\n", prefix, model_net_method_names[state->net_id]); + } + break; + + case MN_CONGESTION_EVENT: + // Nothing to print + break; } - // TODO: print internal state of message - // void * sub_msg = ((char*)msg) + msg_offsets[state->net_id]; } /* END checking reverse handler functionality */ diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index b986ab32..6674abb3 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -7102,14 +7102,36 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state fprintf(out, "%s | * frozen_state = %p\n", prefix, state->frozen_state); } +char const * const string_event_t(enum event_t type) { + switch (type) { + case T_GENERATE: return "T_GENERATE"; + case T_ARRIVE: return "T_ARRIVE"; + case T_SEND: return "T_SEND"; + case T_BUFFER: return "T_BUFFER"; + case R_SEND: return "R_SEND"; + case R_ARRIVE: return "R_ARRIVE"; + case R_BUFFER: return "R_BUFFER"; + case R_BANDWIDTH: return "R_BANDWIDTH"; + case R_BW_HALT: return "R_BW_HALT"; + case T_BANDWIDTH: return "T_BANDWIDTH"; + case R_SNAPSHOT: return "R_SNAPSHOT"; + case T_NOTIFY: return "T_NOTIFY"; + case T_ARRIVE_PREDICTED: return "T_ARRIVE_PREDICTED"; + case T_VACUOUS_EVENT: return "T_VACUOUS_EVENT"; + default: return "UNKNOWN TYPE!!"; + } +} + // Print fuction originally constructed with help from Claude.ai -void print_terminal_dally_message(FILE * out, char const * prefix, struct terminal_dally_message * msg) { +void print_terminal_dally_message(FILE * out, char const * prefix, void * s, struct terminal_dally_message * msg) { + //terminal_state * ns = (terminal_state *) s; + fprintf(out, "%sterminal_dally_message ->\n", prefix); fprintf(out, "%s | magic = %d\n", prefix, msg->magic); fprintf(out, "%s | travel_start_time = %g\n", prefix, msg->travel_start_time); fprintf(out, "%s | travel_end_time = %g\n", prefix, msg->travel_end_time); fprintf(out, "%s | packet_ID = %llu\n", prefix, msg->packet_ID); - fprintf(out, "%s | type = %d\n", prefix, msg->type); + fprintf(out, "%s | type = %d (%s)\n", prefix, msg->type, string_event_t((enum event_t) msg->type)); fprintf(out, "%s | notify_type = %d\n", prefix, msg->notify_type); fprintf(out, "%s | category = %.16s\n", prefix, msg->category); fprintf(out, "%s | final_dest_gid = %lu\n", prefix, msg->final_dest_gid); From f3818d0ce93261e73a487449a494731c530942d3 Mon Sep 17 00:00:00 2001 From: helq Date: Mon, 10 Mar 2025 11:35:02 -0400 Subject: [PATCH 033/110] Implementing (an almost complete) deep-copy of terminal_state --- codes/congestion-controller-model.h | 5 + codes/model-net.h | 5 +- src/networks/model-net/core/model-net-lp.c | 34 +- src/networks/model-net/dragonfly-dally.C | 357 +++++++++++++++++---- src/util/congestion-controller.C | 53 +++ 5 files changed, 394 insertions(+), 60 deletions(-) diff --git a/codes/congestion-controller-model.h b/codes/congestion-controller-model.h index e8b673b5..ff5f6f8f 100644 --- a/codes/congestion-controller-model.h +++ b/codes/congestion-controller-model.h @@ -156,6 +156,11 @@ typedef struct tlc_state double current_injection_bandwidth_coef; } tlc_state; +void save_tlc_state(tlc_state * into, tlc_state const * from); +void clean_tlc_state(tlc_state * into); +bool check_tlc_state(tlc_state * before, tlc_state * after); +void print_tlc_state(FILE * out, char const * prefix, tlc_state * state); + congestion_control_message* cc_msg_rc_storage_create(); void cc_msg_rc_storage_delete(void * ptr); diff --git a/codes/model-net.h b/codes/model-net.h index f003cc10..abb630ce 100644 --- a/codes/model-net.h +++ b/codes/model-net.h @@ -164,7 +164,10 @@ struct mn_stats }; bool check_model_net_request(model_net_request const * before, model_net_request const * after); -void print_model_net_request(FILE * out, char const * before, model_net_request * item); +void print_model_net_request(FILE * out, char const * prefix, model_net_request * item); + +bool check_mn_stats(struct mn_stats const * before, struct mn_stats const * after); +void print_mn_stats(FILE * out, char const * prefix, struct mn_stats * item); /* Registers all model-net LPs in ROSS. Should be called after * configuration_load, but before codes_mapping_setup */ diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c index 6a2cf4f8..536c44a1 100644 --- a/src/networks/model-net/core/model-net-lp.c +++ b/src/networks/model-net/core/model-net-lp.c @@ -1210,6 +1210,8 @@ static void clean_state_net_state(model_net_base_state * state) { free(state->node_copy_next_available_time); } +static bool warned_no_lp_checking_defined[MAX_NETS]; + static bool check_model_net_state(model_net_base_state * before, model_net_base_state * after) { bool is_same = true; is_same &= before->net_id == after->net_id; @@ -1226,8 +1228,10 @@ static bool check_model_net_state(model_net_base_state * before, model_net_base_ crv_checkpointer * chptr = method_array[before->net_id]->checkpointer; if (chptr && before->sub_state != NULL && chptr->check_lps) { is_same &= chptr->check_lps(before->sub_state, after->sub_state); - } else { - tw_error(TW_LOC, "Network of type \"%s\" has not been configured to be checkpointed", model_net_method_names[before->net_id]); + // Warning once that checking for LP subtype has not been fully implemented + } else if (!warned_no_lp_checking_defined[before->net_id]) { + fprintf(stderr, "Warning: Network of type \"%s\" has not been fully configured to be checkpointed (Running this model under SEQUENTIAL_ROLLBACK_CHECK won't capture any issues that arise from the reverse event handlers).\n", model_net_method_names[before->net_id]); + warned_no_lp_checking_defined[before->net_id] = true; } is_same &= before->next_available_time == after->next_available_time; for (int i=0; i < before->params->node_copy_queues; i++) { @@ -1353,6 +1357,32 @@ void print_model_net_request(FILE * out, char const * prefix, model_net_request fprintf(out, "%sapp_id = %d\n", prefix, req->app_id); } +bool check_mn_stats(struct mn_stats const * before, struct mn_stats const * after) { + bool is_same = true; + + is_same &= (strncmp(before->category, after->category, CATEGORY_NAME_MAX) == 0); + is_same &= (before->send_count == after->send_count); + is_same &= (before->send_bytes == after->send_bytes); + is_same &= (before->send_time == after->send_time); + is_same &= (before->recv_count == after->recv_count); + is_same &= (before->recv_bytes == after->recv_bytes); + is_same &= (before->recv_time == after->recv_time); + is_same &= (before->max_event_size == after->max_event_size); + + return is_same; +} + +void print_mn_stats(FILE * out, char const * prefix, struct mn_stats * req) { + fprintf(out, "%scategory = '%s'\n", prefix, req->category); + fprintf(out, "%ssend_count = %ld\n", prefix, req->send_count); + fprintf(out, "%ssend_bytes = %ld\n", prefix, req->send_bytes); + fprintf(out, "%ssend_time = %g\n", prefix, req->send_time); + fprintf(out, "%srecv_count = %ld\n", prefix, req->recv_count); + fprintf(out, "%srecv_bytes = %ld\n", prefix, req->recv_bytes); + fprintf(out, "%srecv_time = %g\n", prefix, req->recv_time); + fprintf(out, "%smax_event_size = %ld\n", prefix, req->max_event_size); +} + static void print_event_state(FILE * out, char const * prefix, model_net_base_state * state, model_net_wrap_msg * msg) { fprintf(out, "%sh\n", prefix); fprintf(out, "%s| src = %lu\n", prefix, msg->h.src); diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 6674abb3..409f063a 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -496,6 +496,9 @@ struct packet_id { bool operator<(struct packet_id const &lk, struct packet_id const &rk) { return lk.packet_ID == rk.packet_ID ? lk.dfdally_src_terminal_id < rk.dfdally_src_terminal_id : lk.packet_ID < rk.packet_ID; } +bool operator==(struct packet_id const &lk, struct packet_id const &rk) { + return lk.packet_ID == rk.packet_ID && lk.dfdally_src_terminal_id < rk.dfdally_src_terminal_id; +} // Some more function declarations static void notify_dest_lp_of(terminal_state * s, tw_lp * lp, terminal_dally_message * msg, enum notify_t notification); @@ -3558,6 +3561,7 @@ static void terminal_dally_init( terminal_state * s, tw_lp * lp ) fprintf(dragonfly_term_bw_log, "\n term-id time-stamp port-id busy-time"); }*/ + s->local_congestion_controller = NULL; if (g_congestion_control_enabled) { s->local_congestion_controller = (tlc_state*)calloc(1,sizeof(tlc_state)); cc_terminal_local_controller_init(s->local_congestion_controller, lp, s->terminal_id, &s->workloads_finished_flag); @@ -6919,17 +6923,132 @@ static void router_dally_rc_event_handler(router_state * s, tw_bf * bf, } //*** ---------- START OF reverse handler checking functions ---------- *** +bool warn_incomplete_definition_terminal_state_check = false; + static void save_terminal_state(terminal_state *into, terminal_state const *from) { + if (!warn_incomplete_definition_terminal_state_check) { + fprintf(stderr, "Warning: Deep-cloning and comparing has not been fully implemented for the (sub)LP type: `terminal_state` (Running this model under SEQUENTIAL_ROLLBACK_CHECK might not capture issues that arise from its reverse event handler).\n"); + warn_incomplete_definition_terminal_state_check = true; + } + + // These should be deep-cloned/compared/printed if we want to run the functionality they are activated at + // from->predictor_data + // from->sample_stat + // from->ross_sample + // from->busy_time_ross_sample + memcpy(into, from, sizeof(terminal_state)); -} + dragonfly_param const * p = into->params; + int const num_qos_levels = p->num_qos_levels; + int const num_rails = p->num_rails; + + into->vc_occupancy = (int **) malloc(num_rails * sizeof(int*)); + into->terminal_length = (int**) malloc(num_rails * sizeof(int*)); + into->last_buf_full = (tw_stime*) malloc(num_rails * sizeof(tw_stime)); + into->in_send_loop = (int*) malloc(num_rails * sizeof(int)); + into->issueIdle = (int*) malloc(num_rails * sizeof(int)); + into->qos_status = (int**) malloc(num_rails * sizeof(int*)); + into->qos_data = (int**) malloc(num_rails * sizeof(int*)); + into->last_qos_lvl = (int*) malloc(num_rails * sizeof(int)); + into->terminal_available_time = (tw_stime*) malloc(num_rails * sizeof(tw_stime)); + into->stalled_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t)); + into->total_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t)); + into->busy_time = (tw_stime*) malloc(num_rails * sizeof(tw_stime)); + //into->terminal_msgs = (terminal_dally_message_list***) malloc(num_rails * sizeof(terminal_dally_message_list**)); + + for(int i = 0; i < num_rails; i++) { + into->vc_occupancy[i] = (int*) malloc(num_qos_levels * sizeof(int)); + into->terminal_length[i] = (int*) malloc(num_qos_levels * sizeof(int)); + into->qos_status[i] = (int*) malloc(num_qos_levels * sizeof(int)); + into->qos_data[i] = (int*) malloc(num_qos_levels * sizeof(int)); + for (int j = 0; jvc_occupancy[i][j] = from->vc_occupancy[i][j]; + into->terminal_length[i][j] = from->terminal_length[i][j]; + into->qos_data[i][j] = from->qos_data[i][j]; + into->qos_status[i][j] = from->qos_status[i][j]; + } + into->last_buf_full[i] = from->last_buf_full[i]; + into->in_send_loop[i] = from->in_send_loop[i]; + into->issueIdle[i] = from->issueIdle[i]; + into->last_qos_lvl[i] = from->last_qos_lvl[i]; + into->terminal_available_time[i] = from->terminal_available_time[i]; + into->stalled_chunks[i] = from->stalled_chunks[i]; + into->total_chunks[i] = from->total_chunks[i]; + into->busy_time[i] = from->busy_time[i]; + } + + into->link_traffic = (uint64_t*) malloc(p->radix * sizeof(uint64_t)); + for (int i = 0; i < p->radix; i++) { + into->link_traffic[i] = from->link_traffic[i]; + } + + if (from->local_congestion_controller != NULL) { + assert(g_congestion_control_enabled); + into->local_congestion_controller = (tlc_state*) malloc(sizeof(tlc_state)); + save_tlc_state(into->local_congestion_controller, from->local_congestion_controller); + } + + // Magic deep-copy using C++ mechanisms (the values do not point to any pointers) + into->remaining_sz_packets = from->remaining_sz_packets; + into->zombies = from->zombies; +} + +// Partially written by Claude static void clean_terminal_state(terminal_state *state) { + dragonfly_param const * p = state->params; + int const num_rails = p->num_rails; + + // Free all allocated memory + for (int i = 0; i < num_rails; i++) { + free(state->vc_occupancy[i]); + free(state->terminal_length[i]); + free(state->qos_status[i]); + free(state->qos_data[i]); + } + + free(state->vc_occupancy); + free(state->terminal_length); + free(state->last_buf_full); + free(state->in_send_loop); + free(state->issueIdle); + free(state->qos_status); + free(state->qos_data); + free(state->last_qos_lvl); + free(state->terminal_available_time); + free(state->stalled_chunks); + free(state->total_chunks); + free(state->busy_time); + free(state->link_traffic); + + if (state->local_congestion_controller != NULL) { + clean_tlc_state(state->local_congestion_controller); + free(state->local_congestion_controller); + } + + // Finish cleaning (free memory), and check and print!! + state->remaining_sz_packets.~map(); + state->zombies.~set(); } static bool check_terminal_state(terminal_state *before, terminal_state *after) { bool is_same = true; - // Compare scalar values + // There is no need to deep-copy the following. They're never modified + assert(before->params == after->params); + assert(before->router_lp == after->router_lp); + assert(before->router_id == after->router_id); + + // We ignore the comparison of the following. They are not meant to be rolled-back + // before->fwd_events + // before->rev_events + // before->sent_packets + // before->last_packet_sent_id + // before->arrival_of_last_packet + // before->anno + assert(before->frozen_state == after->frozen_state); + + // Comparing all other elements of the struct is_same &= (before->packet_counter == after->packet_counter); is_same &= (before->packet_gen == after->packet_gen); is_same &= (before->packet_fin == after->packet_fin); @@ -6954,30 +7073,12 @@ static bool check_terminal_state(terminal_state *before, terminal_state *after) is_same &= (before->fin_chunks_time == after->fin_chunks_time); is_same &= (before->op_arr_size == after->op_arr_size); is_same &= (before->max_arr_size == after->max_arr_size); - //is_same &= (before->fwd_events == after->fwd_events); // This is used for statistics, they are never changed when rollbacking - //is_same &= (before->rev_events == after->rev_events); // This is used for statistics, they are never changed when rollbacking is_same &= (before->fin_chunks_ross_sample == after->fin_chunks_ross_sample); is_same &= (before->data_size_ross_sample == after->data_size_ross_sample); is_same &= (before->fin_hops_ross_sample == after->fin_hops_ross_sample); is_same &= (before->fin_chunks_time_ross_sample == after->fin_chunks_time_ross_sample); - is_same &= (before->last_packet_sent_id == after->last_packet_sent_id); is_same &= (before->last_in_queue_time == after->last_in_queue_time); - // Compare arrival_of_last_packet struct - is_same &= (before->arrival_of_last_packet.packet_ID == after->arrival_of_last_packet.packet_ID); - is_same &= (before->arrival_of_last_packet.travel_end_time == after->arrival_of_last_packet.travel_end_time); - - // Compare arrays (assumes params is the same for both) - assert(before->params == after->params); - //if (before->params && after->params && before->params->num_rails == after->params->num_rails) { - // for (int i = 0; i < before->params->num_rails; i++) { - // is_same &= (before->router_lp[i] == after->router_lp[i]); - // is_same &= (before->router_id[i] == after->router_id[i]); - // } - //} else { - // is_same = false; - //} - // Compare string buffers is_same &= (strncmp(before->output_buf, after->output_buf, 4096) == 0); is_same &= (strncmp(before->output_buf2, after->output_buf2, 4096) == 0); @@ -6990,28 +7091,47 @@ static bool check_terminal_state(terminal_state *before, terminal_state *after) is_same &= (before->anno == after->anno); } + dragonfly_param const * p = before->params; + int const num_qos_levels = p->num_qos_levels; + int const num_rails = p->num_rails; + + for (int i = 0; i < num_rails; i++) { + for (int j = 0; j < num_qos_levels; j++) { + is_same &= (before->vc_occupancy[i][j] == after->vc_occupancy[i][j]); + is_same &= (before->terminal_length[i][j] == after->terminal_length[i][j]); + is_same &= (before->qos_status[i][j] == after->qos_status[i][j]); + is_same &= (before->qos_data[i][j] == after->qos_data[i][j]); + } + + is_same &= (before->last_buf_full[i] == after->last_buf_full[i]); + is_same &= (before->in_send_loop[i] == after->in_send_loop[i]); + is_same &= (before->issueIdle[i] == after->issueIdle[i]); + is_same &= (before->last_qos_lvl[i] == after->last_qos_lvl[i]); + is_same &= (before->terminal_available_time[i] == after->terminal_available_time[i]); + is_same &= (before->stalled_chunks[i] == after->stalled_chunks[i]); + is_same &= (before->total_chunks[i] == after->total_chunks[i]); + is_same &= (before->busy_time[i] == after->busy_time[i]); + } + + for (int i = 0; i < p->radix; i++) { + is_same &= (before->link_traffic[i] == after->link_traffic[i]); + } + + // Ignoring model statistics. In general, we don't care if there are errors in the statistics, as they are only approximate. The stastistics don't interferee with the state of the model. There is a bug within the statistics when rolbacking though. A parameters is never reversed properly + //for (size_t i = 0; i < CATEGORY_MAX; i++) { + // is_same &= check_mn_stats(&before->dragonfly_stats_array[i], &after->dragonfly_stats_array[i]); + //} + + if (after->local_congestion_controller != NULL) { + is_same &= check_tlc_state(before->local_congestion_controller, after->local_congestion_controller); + } + + is_same &= before->remaining_sz_packets == after->remaining_sz_packets; + is_same &= before->zombies == after->zombies; + // Compare pointers (just checking if they're both NULL or both non-NULL) - //is_same &= ((before->local_congestion_controller == NULL) == (after->local_congestion_controller == NULL)); - //is_same &= ((before->vc_occupancy == NULL) == (after->vc_occupancy == NULL)); - //is_same &= ((before->terminal_available_time == NULL) == (after->terminal_available_time == NULL)); //is_same &= ((before->terminal_msgs == NULL) == (after->terminal_msgs == NULL)); - //is_same &= ((before->in_send_loop == NULL) == (after->in_send_loop == NULL)); - //is_same &= ((before->qos_status == NULL) == (after->qos_status == NULL)); - //is_same &= ((before->qos_data == NULL) == (after->qos_data == NULL)); - //is_same &= ((before->last_qos_lvl == NULL) == (after->last_qos_lvl == NULL)); - //is_same &= ((before->issueIdle == NULL) == (after->issueIdle == NULL)); - //is_same &= ((before->terminal_length == NULL) == (after->terminal_length == NULL)); //is_same &= ((before->rank_tbl == NULL) == (after->rank_tbl == NULL)); - //is_same &= ((before->last_buf_full == NULL) == (after->last_buf_full == NULL)); - //is_same &= ((before->busy_time == NULL) == (after->busy_time == NULL)); - //is_same &= ((before->link_traffic == NULL) == (after->link_traffic == NULL)); - //is_same &= ((before->total_chunks == NULL) == (after->total_chunks == NULL)); - //is_same &= ((before->stalled_chunks == NULL) == (after->stalled_chunks == NULL)); - //is_same &= ((before->busy_time_sample == NULL) == (after->busy_time_sample == NULL)); - //is_same &= ((before->sample_stat == NULL) == (after->sample_stat == NULL)); - //is_same &= ((before->busy_time_ross_sample == NULL) == (after->busy_time_ross_sample == NULL)); - //is_same &= ((before->predictor_data == NULL) == (after->predictor_data == NULL)); - is_same &= ((before->frozen_state == NULL) && (after->frozen_state == NULL)); return is_same; } @@ -7024,13 +7144,13 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state fprintf(out, "%s | packet_fin = %d\n", prefix, state->packet_fin); fprintf(out, "%s | total_gen_size = %d\n", prefix, state->total_gen_size); - fprintf(out, "%s | * router_lp[%d] = [", prefix, state->params->num_rails); + fprintf(out, "%s | * router_lp[%d] = [", prefix, state->params->num_rails); for (int i=0; iparams->num_rails; i++) { fprintf(out, "%s%lu", i ? ", " : "", state->router_lp[i]); } fprintf(out, "]\n"); - fprintf(out, "%s | * router_id[%d] = [", prefix, state->params->num_rails); + fprintf(out, "%s | * router_id[%d] = [", prefix, state->params->num_rails); for (int i=0; iparams->num_rails; i++) { fprintf(out, "%s%u", i ? ", " : "", state->router_id[i]); } @@ -7038,22 +7158,101 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state fprintf(out, "%s | terminal_id = %u\n", prefix, state->terminal_id); fprintf(out, "%s | connMan = \n", prefix); + + char addprefix[] = " | "; + int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix) + 1; + char * subprefix = (char *) malloc(len_subprefix * sizeof(char)); fprintf(out, "%s | *local_congestion_controller = %p\n", prefix, state->local_congestion_controller); + if (state->local_congestion_controller != NULL) { + print_tlc_state(out, subprefix, state->local_congestion_controller); + } + free(subprefix); + fprintf(out, "%s | workloads_finished_flag = %d\n", prefix, state->workloads_finished_flag); - fprintf(out, "%s | ** vc_occupancy = %p\n", prefix, state->vc_occupancy); - fprintf(out, "%s | *terminal_available_time = %p\n", prefix, state->terminal_available_time); + + fprintf(out, "%s | ** vc_occupancy[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s | rail %d: [", prefix, i); + for (int j=0; jparams->num_qos_levels; j++) { + fprintf(out, "%s%d", j ? ", " : "", state->vc_occupancy[i][j]); + } + fprintf(out, "]\n"); + } + fprintf(out, "%s | ]\n", prefix); + + fprintf(out, "%s | *terminal_available_time[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%g", i ? ", " : "", state->terminal_available_time[i]); + } + fprintf(out, "]\n"); + fprintf(out, "%s | *** terminal_msgs = %p\n", prefix, state->terminal_msgs); fprintf(out, "%s | *** terminal_msgs_tail = %p\n", prefix, state->terminal_msgs_tail); - fprintf(out, "%s | * in_send_loop = %p\n", prefix, state->in_send_loop); - fprintf(out, "%s | dragonfly_stats_array = \n", prefix); - fprintf(out, "%s | ** qos_status = %p\n", prefix, state->qos_status); - fprintf(out, "%s | ** qos_data = %p\n", prefix, state->qos_data); - fprintf(out, "%s | * last_qos_lvl = %p\n", prefix, state->last_qos_lvl); + + fprintf(out, "%s | * in_send_loop[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%d", i ? ", " : "", state->in_send_loop[i]); + } + fprintf(out, "]\n"); + + char addprefix_2[] = " | | "; + len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_2) + 1; + subprefix = (char *) malloc(len_subprefix * sizeof(char)); + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_2); + fprintf(out, "%s | dragonfly_stats_array = [\n", prefix); + for (int i = 0; i < CATEGORY_MAX; i++) { + fprintf(out, "%s | %d:\n", prefix, i); + print_mn_stats(out, subprefix, &state->dragonfly_stats_array[i]); + } + fprintf(out, "%s | ]\n", prefix); + free(subprefix); + + fprintf(out, "%s | ** qos_status[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s | rail %d: [", prefix, i); + for (int j=0; jparams->num_qos_levels; j++) { + fprintf(out, "%s%d", j ? ", " : "", state->qos_status[i][j]); + } + fprintf(out, "]\n"); + } + fprintf(out, "%s | ]\n", prefix); + + fprintf(out, "%s | ** qos_data[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s | rail %d: [", prefix, i); + for (int j=0; jparams->num_qos_levels; j++) { + fprintf(out, "%s%d", j ? ", " : "", state->qos_data[i][j]); + } + fprintf(out, "]\n"); + } + fprintf(out, "%s | ]\n", prefix); + + fprintf(out, "%s | * last_qos_lvl[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%d", i ? ", " : "", state->last_qos_lvl[i]); + } + fprintf(out, "]\n"); + fprintf(out, "%s | is_monitoring_bw = %d\n", prefix, state->is_monitoring_bw); fprintf(out, "%s | * st = %p\n", prefix, state->st); fprintf(out, "%s | * cc_st = %p\n", prefix, state->cc_st); - fprintf(out, "%s | * issueIdle = %p\n", prefix, state->issueIdle); - fprintf(out, "%s | ** terminal_length = %p\n", prefix, state->terminal_length); + + fprintf(out, "%s | * issueIdle[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%d", i ? ", " : "", state->issueIdle[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | ** terminal_length[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s | rail %d: [", prefix, i); + for (int j=0; jparams->num_qos_levels; j++) { + fprintf(out, "%s%d", j ? ", " : "", state->terminal_length[i][j]); + } + fprintf(out, "]\n"); + } + fprintf(out, "%s | ]\n", prefix); + fprintf(out, "%s | * anno = %s\n", prefix, state->anno ? state->anno : "(nil)"); fprintf(out, "%s | * params = %p\n", prefix, state->params); fprintf(out, "%s | * rank_tbl = %p\n", prefix, state->rank_tbl); @@ -7064,11 +7263,37 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state fprintf(out, "%s | finished_msgs = %ld\n", prefix, state->finished_msgs); fprintf(out, "%s | finished_chunks = %ld\n", prefix, state->finished_chunks); fprintf(out, "%s | finished_packets = %ld\n", prefix, state->finished_packets); - fprintf(out, "%s | * last_buf_full = %p\n", prefix, state->last_buf_full); - fprintf(out, "%s | * busy_time = %p\n", prefix, state->busy_time); - fprintf(out, "%s | * link_traffic = %p\n", prefix, state->link_traffic); - fprintf(out, "%s | * total_chunks = %p\n", prefix, state->total_chunks); - fprintf(out, "%s | * stalled_chunks = %p\n", prefix, state->stalled_chunks); + + fprintf(out, "%s | * last_buf_full[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%g", i ? ", " : "", state->last_buf_full[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * busy_time[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%g", i ? ", " : "", state->busy_time[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * link_traffic[%d] = [", prefix, state->params->radix); + for (int i=0; iparams->radix; i++) { + fprintf(out, "%s%lu", i ? ", " : "", state->link_traffic[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * total_chunks[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%lu", i ? ", " : "", state->total_chunks[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * stalled_chunks[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%lu", i ? ", " : "", state->stalled_chunks[i]); + } + fprintf(out, "]\n"); + fprintf(out, "%s | injected_chunks = %lu\n", prefix, state->injected_chunks); fprintf(out, "%s | ejected_chunks = %lu\n", prefix, state->ejected_chunks); fprintf(out, "%s | max_latency = %g\n", prefix, state->max_latency); @@ -7092,13 +7317,31 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state fprintf(out, "%s | fin_chunks_time_ross_sample = %g\n", prefix, state->fin_chunks_time_ross_sample); fprintf(out, "%s | * busy_time_ross_sample = %p\n", prefix, state->busy_time_ross_sample); // ingnoring as this part of the code is never used. Originally part of instrumentation fprintf(out, "%s | ross_sample = \n", prefix); // ingnoring as this part of the code is never used. Originally part of instrumentation + + // modified outside of process and reverse computation (at commit and at surrogate change) fprintf(out, "%s | sent_packets = \n", prefix); + fprintf(out, "%s | last_packet_sent_id = %ld\n", prefix, state->last_packet_sent_id); fprintf(out, "%s | arrival_of_last_packet = {packet_ID: %ld, travel_end_time: %g}\n", prefix, state->arrival_of_last_packet.packet_ID, state->arrival_of_last_packet.travel_end_time); - fprintf(out, "%s | remaining_sz_packets = \n", prefix); + + fprintf(out, "%s | remaining_sz_packets = {\n", prefix); + std::map::iterator it_map; + for (it_map = state->remaining_sz_packets.begin(); it_map != state->remaining_sz_packets.end(); ++it_map) { + fprintf(out, "%s | {packet_ID: %lu, dfdally_src_terminal_id: %u} -> %d,\n", prefix, it_map->first.packet_ID, it_map->first.dfdally_src_terminal_id, it_map->second); + + } + fprintf(out, "%s | }\n", prefix); + fprintf(out, "%s | last_in_queue_time = %g\n", prefix, state->last_in_queue_time); fprintf(out, "%s | * predictor_data = %p\n", prefix, state->predictor_data); - fprintf(out, "%s | zombies = \n", prefix); + + fprintf(out, "%s | zombies = [\n", prefix); + std::set::iterator it; + for (it = state->zombies.begin(); it != state->zombies.end(); ++it) { + fprintf(out, "%s | {packet_ID: %lu, dfdally_src_terminal_id: %u},\n", prefix, it->packet_ID, it->dfdally_src_terminal_id); + } + fprintf(out, "%s | ]\n", prefix); + fprintf(out, "%s | * frozen_state = %p\n", prefix, state->frozen_state); } diff --git a/src/util/congestion-controller.C b/src/util/congestion-controller.C index a0dab10c..d8ff1a1a 100644 --- a/src/util/congestion-controller.C +++ b/src/util/congestion-controller.C @@ -906,6 +906,59 @@ static double calculate_bandwidth_usage_percent(int bytes_transmitted, double ma return percent_bw; } +void save_tlc_state(tlc_state * into, tlc_state const * from) { + memcpy(into, from, sizeof(tlc_state)); + into->ejected_rate_windows = (double*) malloc(cc_bandwidth_rolling_window_count * sizeof(double)); + for (int i = 0; i < cc_bandwidth_rolling_window_count; i++) { + into->ejected_rate_windows[i] = from->ejected_rate_windows[i]; + } +} + +void clean_tlc_state(tlc_state * state) { + free(state->ejected_rate_windows); +} + +bool check_tlc_state(tlc_state * before, tlc_state * after) { + bool is_same = true; + + is_same &= before->terminal_id == after->terminal_id; + is_same &= before->app_id == after->app_id; + is_same &= before->abatement_signal_count == after->abatement_signal_count; + is_same &= before->window_epoch == after->window_epoch; + is_same &= before->ejected_packet_bytes == after->ejected_packet_bytes; + + for (int i = 0; i < cc_bandwidth_rolling_window_count; i++) { + is_same &= before->ejected_rate_windows[i] == after->ejected_rate_windows[i]; + } + + is_same &= before->cur_average_rate == after->cur_average_rate; + is_same &= before->is_abatement_active == after->is_abatement_active; + is_same &= *before->workloads_finished_flag_ptr == *after->workloads_finished_flag_ptr; + is_same &= before->current_injection_bandwidth_coef == after->current_injection_bandwidth_coef; + + return is_same; +} + +void print_tlc_state(FILE * out, char const * prefix, tlc_state * state) { + fprintf(out, "%s tlc_state ->\n", prefix); + fprintf(out, "%s | terminal_id = %d\n", prefix, state->terminal_id); + fprintf(out, "%s | app_id = %d\n", prefix, state->app_id); + fprintf(out, "%s | abatement_signal_count = %d\n", prefix, state->abatement_signal_count); + fprintf(out, "%s | window_epoch = %ud\n", prefix, state->window_epoch); + fprintf(out, "%s | ejected_packet_bytes = %ud\n", prefix, state->ejected_packet_bytes); + + fprintf(out, "%s | ejected_rate_windows[%d] = [", prefix, cc_bandwidth_rolling_window_count); + for (int i = 0; i < cc_bandwidth_rolling_window_count; i++) { + fprintf(out, "%g%s", state->ejected_rate_windows[i], i == cc_bandwidth_rolling_window_count - 1 ? "" : ", "); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | cur_average_rate = %g\n", prefix, state->cur_average_rate); + fprintf(out, "%s | is_abatement_active = %d\n", prefix, state->is_abatement_active); + fprintf(out, "%s | workloads_finished_flag_ptr = %d\n", prefix, *state->workloads_finished_flag_ptr); + fprintf(out, "%s | current_injection_bandwidth_coef = %g\n", prefix, state->current_injection_bandwidth_coef); +} + void cc_terminal_process_bandwidth_check(tlc_state *s, congestion_control_message *msg, tw_lp *lp) { double usage_percent = calculate_bandwidth_usage_percent(s->ejected_packet_bytes, s->params->terminal_configured_bandwidth, 1); //multiplier for multiple rails but right now we're just using 1 From 0898c37d3fb962ad35766dd555a72c25c71bd9b9 Mon Sep 17 00:00:00 2001 From: helq Date: Mon, 10 Mar 2025 12:30:00 -0400 Subject: [PATCH 034/110] Fixing reversibility bug in terminal_state (dragonfly-dally) --- src/networks/model-net/dragonfly-dally.C | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 409f063a..cba515bb 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -4468,7 +4468,7 @@ static void packet_send_rc(terminal_state * s, tw_bf * bf, terminal_dally_messag if(msg->qos_reset2) s->qos_status[msg->rail_id][1] = Q_ACTIVE; - if(msg->last_saved_qos) + if(msg->last_saved_qos >= 0) s->last_qos_lvl[msg->rail_id] = msg->last_saved_qos; if(bf->c1) { From d3d76217aa0c314a7226e1f8a86bd90adea913ea Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 11 Mar 2025 19:16:13 -0400 Subject: [PATCH 035/110] Commenting what has is left to be implemented to fully deep-copy `struct terminal_state` --- src/networks/model-net/dragonfly-dally.C | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index cba515bb..63e1080d 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -5580,6 +5580,7 @@ static void dragonfly_dally_terminal_final( terminal_state * s, for(int i = 0; i < s->params->num_rails; i++) { free(s->vc_occupancy[i]); + // TODO: terminal_msgs are not properly freed if there are messages left. Correct this! free(s->terminal_msgs[i]); free(s->terminal_msgs_tail[i]); } @@ -6931,6 +6932,10 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from warn_incomplete_definition_terminal_state_check = true; } + // Missing deep-clone/comparison/print members. These members are always accessed, so it is possible to discover some bugs if we print their contents + // from->terminal_msgs + // from->rank_tbl + // These should be deep-cloned/compared/printed if we want to run the functionality they are activated at // from->predictor_data // from->sample_stat @@ -6955,7 +6960,6 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from into->stalled_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t)); into->total_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t)); into->busy_time = (tw_stime*) malloc(num_rails * sizeof(tw_stime)); - //into->terminal_msgs = (terminal_dally_message_list***) malloc(num_rails * sizeof(terminal_dally_message_list**)); for(int i = 0; i < num_rails; i++) { into->vc_occupancy[i] = (int*) malloc(num_qos_levels * sizeof(int)); @@ -7129,10 +7133,6 @@ static bool check_terminal_state(terminal_state *before, terminal_state *after) is_same &= before->remaining_sz_packets == after->remaining_sz_packets; is_same &= before->zombies == after->zombies; - // Compare pointers (just checking if they're both NULL or both non-NULL) - //is_same &= ((before->terminal_msgs == NULL) == (after->terminal_msgs == NULL)); - //is_same &= ((before->rank_tbl == NULL) == (after->rank_tbl == NULL)); - return is_same; } From 41680da53bc38389644a1cec41e433f1f6642ee0 Mon Sep 17 00:00:00 2001 From: helq Date: Wed, 12 Mar 2025 14:23:57 -0400 Subject: [PATCH 036/110] Implementing deep-copy of member terminal_msgs in terminal_state --- codes/net/dragonfly-dally.h | 1 + .../model-net/core/model-net-sched-impl.c | 2 + src/networks/model-net/dragonfly-dally.C | 202 +++++++++++++++++- 3 files changed, 197 insertions(+), 8 deletions(-) diff --git a/codes/net/dragonfly-dally.h b/codes/net/dragonfly-dally.h index 2647c4df..504446b0 100644 --- a/codes/net/dragonfly-dally.h +++ b/codes/net/dragonfly-dally.h @@ -137,6 +137,7 @@ struct terminal_dally_message }; void print_terminal_dally_message(FILE * out, char const * prefix, void * s, struct terminal_dally_message * msg); +bool check_terminal_dally_message(struct terminal_dally_message * before, struct terminal_dally_message * after); #ifdef __cplusplus } diff --git a/src/networks/model-net/core/model-net-sched-impl.c b/src/networks/model-net/core/model-net-sched-impl.c index a3ff4fde..ffe71d7a 100644 --- a/src/networks/model-net/core/model-net-sched-impl.c +++ b/src/networks/model-net/core/model-net-sched-impl.c @@ -389,6 +389,8 @@ static void save_mn_sched_qitem(mn_sched_qitem * into, mn_sched_qitem const * fr into->req = from->req; into->sched_params = from->sched_params; into->rem = from->rem; + into->remote_event = NULL; + into->local_event = NULL; if (from->remote_event != NULL) { assert(from->req.remote_event_size > 0); into->remote_event = malloc(from->req.remote_event_size); diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 63e1080d..4ccc80e0 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -497,7 +497,7 @@ bool operator<(struct packet_id const &lk, struct packet_id const &rk) { return lk.packet_ID == rk.packet_ID ? lk.dfdally_src_terminal_id < rk.dfdally_src_terminal_id : lk.packet_ID < rk.packet_ID; } bool operator==(struct packet_id const &lk, struct packet_id const &rk) { - return lk.packet_ID == rk.packet_ID && lk.dfdally_src_terminal_id < rk.dfdally_src_terminal_id; + return lk.packet_ID == rk.packet_ID && lk.dfdally_src_terminal_id == rk.dfdally_src_terminal_id; } // Some more function declarations static void notify_dest_lp_of(terminal_state * s, tw_lp * lp, terminal_dally_message * msg, enum notify_t notification); @@ -1621,6 +1621,119 @@ static terminal_dally_message_list* return_tail( return tail; } +// Copies a list and returns the tail +static terminal_dally_message_list * copy_terminal_dally_message_list(terminal_dally_message_list ** into_thisq, terminal_dally_message_list const * from_thisq) { + if (from_thisq == NULL) { + *into_thisq = NULL; + return NULL; + } + + terminal_dally_message_list const * from_head = from_thisq; + terminal_dally_message_list * prev = NULL; + while(from_head != NULL) { + terminal_dally_message_list * copy_head = (terminal_dally_message_list *) malloc(sizeof(terminal_dally_message_list)); + + //copy_head->msg = from_head->msg; + memcpy(copy_head, from_head, sizeof(terminal_dally_message_list)); + copy_head->prev = prev; + + if (from_head->event_data != NULL) { + int const message_size = from_head->msg.remote_event_size_bytes + from_head->msg.local_event_size_bytes; + assert(message_size > 0); + copy_head->event_data = (char *) malloc(message_size); + memcpy(copy_head->event_data, from_head->event_data, message_size); + } + + if (prev == NULL) { + *into_thisq = copy_head; + } else { + prev->next = copy_head; + } + + prev = copy_head; + from_head = from_head->next; + } + prev->next = NULL; + + return prev; +} + +static void clean_terminal_dally_message_list(terminal_dally_message_list * thisq) { + if (thisq == NULL) { + return; + } + + terminal_dally_message_list * prev = thisq; + terminal_dally_message_list * head = prev->next; + free(prev->event_data); + while (head != NULL) { + free(head->event_data); + free(prev); + prev = head; + head = head->next; + } + free(prev); +} + +static bool check_terminal_dally_message_list(terminal_dally_message_list * before, terminal_dally_message_list * after) { + bool is_same = true; + + terminal_dally_message_list * head_before = before; + terminal_dally_message_list * head_after = after; + while (head_before != NULL && head_after != NULL) { + is_same &= check_terminal_dally_message(&head_before->msg, &head_after->msg); + is_same &= (head_before->event_data == NULL) == (head_after->event_data == NULL); + + int const message_size = head_before->msg.remote_event_size_bytes + head_before->msg.local_event_size_bytes; + int const message_size_after = head_after->msg.remote_event_size_bytes + head_after->msg.local_event_size_bytes; + is_same &= message_size == message_size_after; + + if (is_same && head_before->event_data != NULL) { + assert(message_size > 0); + + is_same &= !memcmp(head_before->event_data, head_after->event_data, message_size); + } + + head_before = head_before->next; + head_after = head_after->next; + } + + if (head_before != NULL || head_after != NULL) { + is_same = false; // at least one of them is longer than the other + } + + return is_same; +} + +static void print_terminal_dally_message_list(FILE * out, char const * prefix, terminal_state * ns, terminal_dally_message_list * thisq) { + if (thisq == NULL) { + return; + } + + char addprefix_2[] = " | | "; + int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_2) + 1; + char * subprefix = (char *) malloc(len_subprefix * sizeof(char)); + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_2); + + terminal_dally_message_list * head = thisq; + while (head != NULL) { + fprintf(out, "%s{\n", prefix); + fprintf(out, "%s | msg:\n", prefix); + print_terminal_dally_message(out, subprefix, ns, &head->msg); + fprintf(out, "%s | event_data = %p\n", prefix, head->event_data); + int const message_size = head->msg.remote_event_size_bytes + head->msg.local_event_size_bytes; + if (head->event_data != NULL) { + assert(message_size > 0); + tw_fprint_binary_array(out, subprefix, head->event_data, message_size); + } + fprintf(out, "%s},\n", prefix); + head = head->next; + } + + free(subprefix); +} + + static tw_stime* buff_time_storage_create(terminal_state *s) { tw_stime* storage = (tw_stime*)malloc(s->params->num_rails * sizeof(tw_stime)); @@ -6933,7 +7046,6 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from } // Missing deep-clone/comparison/print members. These members are always accessed, so it is possible to discover some bugs if we print their contents - // from->terminal_msgs // from->rank_tbl // These should be deep-cloned/compared/printed if we want to run the functionality they are activated at @@ -6960,17 +7072,20 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from into->stalled_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t)); into->total_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t)); into->busy_time = (tw_stime*) malloc(num_rails * sizeof(tw_stime)); + into->terminal_msgs = (terminal_dally_message_list***) malloc(num_rails * sizeof(terminal_dally_message_list**)); for(int i = 0; i < num_rails; i++) { into->vc_occupancy[i] = (int*) malloc(num_qos_levels * sizeof(int)); into->terminal_length[i] = (int*) malloc(num_qos_levels * sizeof(int)); into->qos_status[i] = (int*) malloc(num_qos_levels * sizeof(int)); into->qos_data[i] = (int*) malloc(num_qos_levels * sizeof(int)); + into->terminal_msgs[i] = (terminal_dally_message_list**) malloc(num_qos_levels * sizeof(terminal_dally_message_list*)); for (int j = 0; jvc_occupancy[i][j] = from->vc_occupancy[i][j]; into->terminal_length[i][j] = from->terminal_length[i][j]; into->qos_data[i][j] = from->qos_data[i][j]; into->qos_status[i][j] = from->qos_status[i][j]; + copy_terminal_dally_message_list(&into->terminal_msgs[i][j], from->terminal_msgs[i][j]); } into->last_buf_full[i] = from->last_buf_full[i]; into->in_send_loop[i] = from->in_send_loop[i]; @@ -7002,6 +7117,7 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from static void clean_terminal_state(terminal_state *state) { dragonfly_param const * p = state->params; int const num_rails = p->num_rails; + int const num_qos_levels = p->num_qos_levels; // Free all allocated memory for (int i = 0; i < num_rails; i++) { @@ -7009,6 +7125,10 @@ static void clean_terminal_state(terminal_state *state) { free(state->terminal_length[i]); free(state->qos_status[i]); free(state->qos_data[i]); + for (int j = 0; jterminal_msgs[i][j]); + } + free(state->terminal_msgs[i]); } free(state->vc_occupancy); @@ -7024,13 +7144,13 @@ static void clean_terminal_state(terminal_state *state) { free(state->total_chunks); free(state->busy_time); free(state->link_traffic); + free(state->terminal_msgs); if (state->local_congestion_controller != NULL) { clean_tlc_state(state->local_congestion_controller); free(state->local_congestion_controller); } - // Finish cleaning (free memory), and check and print!! state->remaining_sz_packets.~map(); state->zombies.~set(); } @@ -7105,6 +7225,7 @@ static bool check_terminal_state(terminal_state *before, terminal_state *after) is_same &= (before->terminal_length[i][j] == after->terminal_length[i][j]); is_same &= (before->qos_status[i][j] == after->qos_status[i][j]); is_same &= (before->qos_data[i][j] == after->qos_data[i][j]); + is_same &= check_terminal_dally_message_list(before->terminal_msgs[i][j], after->terminal_msgs[i][j]); } is_same &= (before->last_buf_full[i] == after->last_buf_full[i]); @@ -7186,7 +7307,21 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state } fprintf(out, "]\n"); - fprintf(out, "%s | *** terminal_msgs = %p\n", prefix, state->terminal_msgs); + char addprefix_2[] = " | | | "; + len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_2) + 1; + subprefix = (char *) malloc(len_subprefix * sizeof(char)); + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_2); + fprintf(out, "%s | *** terminal_msgs[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s | rail %d: [\n", prefix, i); + for (int j=0; jparams->num_qos_levels; j++) { + fprintf(out, "%s | | qos level %d\n", prefix, j); + print_terminal_dally_message_list(out, subprefix, state, state->terminal_msgs[i][j]); + } + } + fprintf(out, "%s | ]\n", prefix); + free(subprefix); + fprintf(out, "%s | *** terminal_msgs_tail = %p\n", prefix, state->terminal_msgs_tail); fprintf(out, "%s | * in_send_loop[%d] = [", prefix, state->params->num_rails); @@ -7195,10 +7330,10 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state } fprintf(out, "]\n"); - char addprefix_2[] = " | | "; - len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_2) + 1; + char addprefix_3[] = " | | "; + len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_3) + 1; subprefix = (char *) malloc(len_subprefix * sizeof(char)); - snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_2); + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_3); fprintf(out, "%s | dragonfly_stats_array = [\n", prefix); for (int i = 0; i < CATEGORY_MAX; i++) { fprintf(out, "%s | %d:\n", prefix, i); @@ -7328,7 +7463,6 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state std::map::iterator it_map; for (it_map = state->remaining_sz_packets.begin(); it_map != state->remaining_sz_packets.end(); ++it_map) { fprintf(out, "%s | {packet_ID: %lu, dfdally_src_terminal_id: %u} -> %d,\n", prefix, it_map->first.packet_ID, it_map->first.dfdally_src_terminal_id, it_map->second); - } fprintf(out, "%s | }\n", prefix); @@ -7365,6 +7499,58 @@ char const * const string_event_t(enum event_t type) { } } +// Built with help of Claude +bool check_terminal_dally_message(struct terminal_dally_message * before, struct terminal_dally_message * after) { + bool is_same = true; + + // Compare all fields + is_same &= before->magic == after->magic; + is_same &= before->travel_start_time == after->travel_start_time; + is_same &= before->travel_end_time == after->travel_end_time; + is_same &= before->packet_ID == after->packet_ID; + is_same &= before->type == after->type; + is_same &= before->notify_type == after->notify_type; + is_same &= strncmp(before->category, after->category, CATEGORY_NAME_MAX) == 0; + is_same &= before->final_dest_gid == after->final_dest_gid; + is_same &= before->sender_lp == after->sender_lp; + is_same &= before->sender_mn_lp == after->sender_mn_lp; + is_same &= before->dest_terminal_lpid == after->dest_terminal_lpid; + is_same &= before->dfdally_src_terminal_id == after->dfdally_src_terminal_id; + is_same &= before->dfdally_dest_terminal_id == after->dfdally_dest_terminal_id; + is_same &= before->src_terminal_id == after->src_terminal_id; + is_same &= before->origin_router_id == after->origin_router_id; + is_same &= before->app_id == after->app_id; + is_same &= before->my_N_hop == after->my_N_hop; + is_same &= before->my_l_hop == after->my_l_hop; + is_same &= before->my_g_hop == after->my_g_hop; + is_same &= before->my_hops_cur_group == after->my_hops_cur_group; + is_same &= before->next_stop == after->next_stop; + is_same &= before->this_router_arrival == after->this_router_arrival; + is_same &= before->this_router_ptp_latency == after->this_router_ptp_latency; + is_same &= before->intm_lp_id == after->intm_lp_id; + is_same &= before->last_hop == after->last_hop; + is_same &= before->is_intm_visited == after->is_intm_visited; + is_same &= before->intm_rtr_id == after->intm_rtr_id; + is_same &= before->intm_grp_id == after->intm_grp_id; + is_same &= before->chunk_id == after->chunk_id; + is_same &= before->packet_size == after->packet_size; + is_same &= before->message_id == after->message_id; + is_same &= before->total_size == after->total_size; + is_same &= before->remote_event_size_bytes == after->remote_event_size_bytes; + is_same &= before->local_event_size_bytes == after->local_event_size_bytes; + is_same &= before->vc_index == after->vc_index; + is_same &= before->rail_id == after->rail_id; + is_same &= before->output_chan == after->output_chan; + is_same &= before->is_pull == after->is_pull; + is_same &= before->pull_size == after->pull_size; + is_same &= before->path_type == after->path_type; + is_same &= before->is_there_another_pckt_in_queue == after->is_there_another_pckt_in_queue; + is_same &= before->qos_reset1 == after->qos_reset1; + is_same &= before->qos_reset2 == after->qos_reset2; + + return is_same; +} + // Print fuction originally constructed with help from Claude.ai void print_terminal_dally_message(FILE * out, char const * prefix, void * s, struct terminal_dally_message * msg) { //terminal_state * ns = (terminal_state *) s; From f8c5163b1185eae2f5533231b353d77c4b019ebd Mon Sep 17 00:00:00 2001 From: helq Date: Wed, 12 Mar 2025 14:33:26 -0400 Subject: [PATCH 037/110] Fixing copy of C++ non-initialized members --- src/networks/model-net/dragonfly-dally.C | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 4ccc80e0..321908c3 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -7108,9 +7108,26 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from save_tlc_state(into->local_congestion_controller, from->local_congestion_controller); } - // Magic deep-copy using C++ mechanisms (the values do not point to any pointers) - into->remaining_sz_packets = from->remaining_sz_packets; - into->zombies = from->zombies; + // I would use the C++ amgic to copy these containers but they don't work as well :S + new (&into->remaining_sz_packets) map(); + new (&into->zombies) set(); + + // Sorry const, I promise not to change the state of remaining_sz_packets + map * from_remaining_sz_packets = (map *) &from->remaining_sz_packets; + set * from_zombies = (set *) &from->zombies; + + std::map::iterator it_map; + for (it_map = from_remaining_sz_packets->begin(); it_map != from_remaining_sz_packets->end(); ++it_map) { + into->remaining_sz_packets[it_map->first] = it_map->second; + } + + std::set::iterator it_set; + for (it_set = from_zombies->begin(); it_set != from_zombies->end(); ++it_set) { + struct packet_id const zombie = { + .packet_ID = it_set->packet_ID, + .dfdally_src_terminal_id = it_set->dfdally_src_terminal_id}; + into->zombies.insert(zombie); + } } // Partially written by Claude From 4a1819b431341482e5264052e9536a0bbda2e24b Mon Sep 17 00:00:00 2001 From: helq Date: Wed, 12 Mar 2025 14:35:50 -0400 Subject: [PATCH 038/110] Some members of terminal_state are not be deep-copied in surrogate mode --- src/networks/model-net/dragonfly-dally.C | 381 +++++++++++++---------- 1 file changed, 208 insertions(+), 173 deletions(-) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 321908c3..6df7ac68 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -7060,46 +7060,48 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from int const num_qos_levels = p->num_qos_levels; int const num_rails = p->num_rails; - into->vc_occupancy = (int **) malloc(num_rails * sizeof(int*)); - into->terminal_length = (int**) malloc(num_rails * sizeof(int*)); - into->last_buf_full = (tw_stime*) malloc(num_rails * sizeof(tw_stime)); - into->in_send_loop = (int*) malloc(num_rails * sizeof(int)); - into->issueIdle = (int*) malloc(num_rails * sizeof(int)); - into->qos_status = (int**) malloc(num_rails * sizeof(int*)); - into->qos_data = (int**) malloc(num_rails * sizeof(int*)); - into->last_qos_lvl = (int*) malloc(num_rails * sizeof(int)); - into->terminal_available_time = (tw_stime*) malloc(num_rails * sizeof(tw_stime)); - into->stalled_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t)); - into->total_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t)); - into->busy_time = (tw_stime*) malloc(num_rails * sizeof(tw_stime)); - into->terminal_msgs = (terminal_dally_message_list***) malloc(num_rails * sizeof(terminal_dally_message_list**)); - - for(int i = 0; i < num_rails; i++) { - into->vc_occupancy[i] = (int*) malloc(num_qos_levels * sizeof(int)); - into->terminal_length[i] = (int*) malloc(num_qos_levels * sizeof(int)); - into->qos_status[i] = (int*) malloc(num_qos_levels * sizeof(int)); - into->qos_data[i] = (int*) malloc(num_qos_levels * sizeof(int)); - into->terminal_msgs[i] = (terminal_dally_message_list**) malloc(num_qos_levels * sizeof(terminal_dally_message_list*)); - for (int j = 0; jvc_occupancy[i][j] = from->vc_occupancy[i][j]; - into->terminal_length[i][j] = from->terminal_length[i][j]; - into->qos_data[i][j] = from->qos_data[i][j]; - into->qos_status[i][j] = from->qos_status[i][j]; - copy_terminal_dally_message_list(&into->terminal_msgs[i][j], from->terminal_msgs[i][j]); - } - into->last_buf_full[i] = from->last_buf_full[i]; - into->in_send_loop[i] = from->in_send_loop[i]; - into->issueIdle[i] = from->issueIdle[i]; - into->last_qos_lvl[i] = from->last_qos_lvl[i]; - into->terminal_available_time[i] = from->terminal_available_time[i]; - into->stalled_chunks[i] = from->stalled_chunks[i]; - into->total_chunks[i] = from->total_chunks[i]; - into->busy_time[i] = from->busy_time[i]; - } - - into->link_traffic = (uint64_t*) malloc(p->radix * sizeof(uint64_t)); - for (int i = 0; i < p->radix; i++) { - into->link_traffic[i] = from->link_traffic[i]; + if (!is_surrogate_on) { + into->vc_occupancy = (int **) malloc(num_rails * sizeof(int*)); + into->terminal_length = (int**) malloc(num_rails * sizeof(int*)); + into->last_buf_full = (tw_stime*) malloc(num_rails * sizeof(tw_stime)); + into->in_send_loop = (int*) malloc(num_rails * sizeof(int)); + into->issueIdle = (int*) malloc(num_rails * sizeof(int)); + into->qos_status = (int**) malloc(num_rails * sizeof(int*)); + into->qos_data = (int**) malloc(num_rails * sizeof(int*)); + into->last_qos_lvl = (int*) malloc(num_rails * sizeof(int)); + into->terminal_available_time = (tw_stime*) malloc(num_rails * sizeof(tw_stime)); + into->stalled_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t)); + into->total_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t)); + into->busy_time = (tw_stime*) malloc(num_rails * sizeof(tw_stime)); + into->terminal_msgs = (terminal_dally_message_list***) malloc(num_rails * sizeof(terminal_dally_message_list**)); + + for(int i = 0; i < num_rails; i++) { + into->vc_occupancy[i] = (int*) malloc(num_qos_levels * sizeof(int)); + into->terminal_length[i] = (int*) malloc(num_qos_levels * sizeof(int)); + into->qos_status[i] = (int*) malloc(num_qos_levels * sizeof(int)); + into->qos_data[i] = (int*) malloc(num_qos_levels * sizeof(int)); + into->terminal_msgs[i] = (terminal_dally_message_list**) malloc(num_qos_levels * sizeof(terminal_dally_message_list*)); + for (int j = 0; jvc_occupancy[i][j] = from->vc_occupancy[i][j]; + into->terminal_length[i][j] = from->terminal_length[i][j]; + into->qos_data[i][j] = from->qos_data[i][j]; + into->qos_status[i][j] = from->qos_status[i][j]; + copy_terminal_dally_message_list(&into->terminal_msgs[i][j], from->terminal_msgs[i][j]); + } + into->last_buf_full[i] = from->last_buf_full[i]; + into->in_send_loop[i] = from->in_send_loop[i]; + into->issueIdle[i] = from->issueIdle[i]; + into->last_qos_lvl[i] = from->last_qos_lvl[i]; + into->terminal_available_time[i] = from->terminal_available_time[i]; + into->stalled_chunks[i] = from->stalled_chunks[i]; + into->total_chunks[i] = from->total_chunks[i]; + into->busy_time[i] = from->busy_time[i]; + } + + into->link_traffic = (uint64_t*) malloc(p->radix * sizeof(uint64_t)); + for (int i = 0; i < p->radix; i++) { + into->link_traffic[i] = from->link_traffic[i]; + } } if (from->local_congestion_controller != NULL) { @@ -7136,32 +7138,33 @@ static void clean_terminal_state(terminal_state *state) { int const num_rails = p->num_rails; int const num_qos_levels = p->num_qos_levels; - // Free all allocated memory - for (int i = 0; i < num_rails; i++) { - free(state->vc_occupancy[i]); - free(state->terminal_length[i]); - free(state->qos_status[i]); - free(state->qos_data[i]); - for (int j = 0; jterminal_msgs[i][j]); - } - free(state->terminal_msgs[i]); - } - - free(state->vc_occupancy); - free(state->terminal_length); - free(state->last_buf_full); - free(state->in_send_loop); - free(state->issueIdle); - free(state->qos_status); - free(state->qos_data); - free(state->last_qos_lvl); - free(state->terminal_available_time); - free(state->stalled_chunks); - free(state->total_chunks); - free(state->busy_time); - free(state->link_traffic); - free(state->terminal_msgs); + if (!is_surrogate_on) { + for (int i = 0; i < num_rails; i++) { + free(state->vc_occupancy[i]); + free(state->terminal_length[i]); + free(state->qos_status[i]); + free(state->qos_data[i]); + for (int j = 0; jterminal_msgs[i][j]); + } + free(state->terminal_msgs[i]); + } + + free(state->vc_occupancy); + free(state->terminal_length); + free(state->last_buf_full); + free(state->in_send_loop); + free(state->issueIdle); + free(state->qos_status); + free(state->qos_data); + free(state->last_qos_lvl); + free(state->terminal_available_time); + free(state->stalled_chunks); + free(state->total_chunks); + free(state->busy_time); + free(state->link_traffic); + free(state->terminal_msgs); + } if (state->local_congestion_controller != NULL) { clean_tlc_state(state->local_congestion_controller); @@ -7232,31 +7235,33 @@ static bool check_terminal_state(terminal_state *before, terminal_state *after) is_same &= (before->anno == after->anno); } - dragonfly_param const * p = before->params; - int const num_qos_levels = p->num_qos_levels; - int const num_rails = p->num_rails; + if (!is_surrogate_on) { + dragonfly_param const * p = before->params; + int const num_qos_levels = p->num_qos_levels; + int const num_rails = p->num_rails; - for (int i = 0; i < num_rails; i++) { - for (int j = 0; j < num_qos_levels; j++) { - is_same &= (before->vc_occupancy[i][j] == after->vc_occupancy[i][j]); - is_same &= (before->terminal_length[i][j] == after->terminal_length[i][j]); - is_same &= (before->qos_status[i][j] == after->qos_status[i][j]); - is_same &= (before->qos_data[i][j] == after->qos_data[i][j]); - is_same &= check_terminal_dally_message_list(before->terminal_msgs[i][j], after->terminal_msgs[i][j]); - } + for (int i = 0; i < num_rails; i++) { + for (int j = 0; j < num_qos_levels; j++) { + is_same &= (before->vc_occupancy[i][j] == after->vc_occupancy[i][j]); + is_same &= (before->terminal_length[i][j] == after->terminal_length[i][j]); + is_same &= (before->qos_status[i][j] == after->qos_status[i][j]); + is_same &= (before->qos_data[i][j] == after->qos_data[i][j]); + is_same &= check_terminal_dally_message_list(before->terminal_msgs[i][j], after->terminal_msgs[i][j]); + } - is_same &= (before->last_buf_full[i] == after->last_buf_full[i]); - is_same &= (before->in_send_loop[i] == after->in_send_loop[i]); - is_same &= (before->issueIdle[i] == after->issueIdle[i]); - is_same &= (before->last_qos_lvl[i] == after->last_qos_lvl[i]); - is_same &= (before->terminal_available_time[i] == after->terminal_available_time[i]); - is_same &= (before->stalled_chunks[i] == after->stalled_chunks[i]); - is_same &= (before->total_chunks[i] == after->total_chunks[i]); - is_same &= (before->busy_time[i] == after->busy_time[i]); - } + is_same &= (before->last_buf_full[i] == after->last_buf_full[i]); + is_same &= (before->in_send_loop[i] == after->in_send_loop[i]); + is_same &= (before->issueIdle[i] == after->issueIdle[i]); + is_same &= (before->last_qos_lvl[i] == after->last_qos_lvl[i]); + is_same &= (before->terminal_available_time[i] == after->terminal_available_time[i]); + is_same &= (before->stalled_chunks[i] == after->stalled_chunks[i]); + is_same &= (before->total_chunks[i] == after->total_chunks[i]); + is_same &= (before->busy_time[i] == after->busy_time[i]); + } - for (int i = 0; i < p->radix; i++) { - is_same &= (before->link_traffic[i] == after->link_traffic[i]); + for (int i = 0; i < p->radix; i++) { + is_same &= (before->link_traffic[i] == after->link_traffic[i]); + } } // Ignoring model statistics. In general, we don't care if there are errors in the statistics, as they are only approximate. The stastistics don't interferee with the state of the model. There is a bug within the statistics when rolbacking though. A parameters is never reversed properly @@ -7308,44 +7313,54 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state fprintf(out, "%s | workloads_finished_flag = %d\n", prefix, state->workloads_finished_flag); - fprintf(out, "%s | ** vc_occupancy[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels); - for (int i=0; iparams->num_rails; i++) { - fprintf(out, "%s | rail %d: [", prefix, i); - for (int j=0; jparams->num_qos_levels; j++) { - fprintf(out, "%s%d", j ? ", " : "", state->vc_occupancy[i][j]); + if (is_surrogate_on) { + fprintf(out, "%s | ** vc_occupancy = %p\n", prefix, state->vc_occupancy); + fprintf(out, "%s | *terminal_available_time = %p\n", prefix, state->terminal_available_time); + fprintf(out, "%s | *** terminal_msgs = %p\n", prefix, state->terminal_msgs); + } else { + fprintf(out, "%s | ** vc_occupancy[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s | rail %d: [", prefix, i); + for (int j=0; jparams->num_qos_levels; j++) { + fprintf(out, "%s%d", j ? ", " : "", state->vc_occupancy[i][j]); + } + fprintf(out, "]\n"); } - fprintf(out, "]\n"); - } - fprintf(out, "%s | ]\n", prefix); + fprintf(out, "%s | ]\n", prefix); - fprintf(out, "%s | *terminal_available_time[%d] = [", prefix, state->params->num_rails); - for (int i=0; iparams->num_rails; i++) { - fprintf(out, "%s%g", i ? ", " : "", state->terminal_available_time[i]); - } - fprintf(out, "]\n"); + fprintf(out, "%s | *terminal_available_time[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%g", i ? ", " : "", state->terminal_available_time[i]); + } + fprintf(out, "]\n"); - char addprefix_2[] = " | | | "; - len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_2) + 1; - subprefix = (char *) malloc(len_subprefix * sizeof(char)); - snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_2); - fprintf(out, "%s | *** terminal_msgs[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels); - for (int i=0; iparams->num_rails; i++) { - fprintf(out, "%s | rail %d: [\n", prefix, i); - for (int j=0; jparams->num_qos_levels; j++) { - fprintf(out, "%s | | qos level %d\n", prefix, j); - print_terminal_dally_message_list(out, subprefix, state, state->terminal_msgs[i][j]); + char addprefix_2[] = " | | | "; + len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_2) + 1; + subprefix = (char *) malloc(len_subprefix * sizeof(char)); + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_2); + fprintf(out, "%s | *** terminal_msgs[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s | rail %d: [\n", prefix, i); + for (int j=0; jparams->num_qos_levels; j++) { + fprintf(out, "%s | | qos level %d\n", prefix, j); + print_terminal_dally_message_list(out, subprefix, state, state->terminal_msgs[i][j]); + } } + fprintf(out, "%s | ]\n", prefix); + free(subprefix); } - fprintf(out, "%s | ]\n", prefix); - free(subprefix); fprintf(out, "%s | *** terminal_msgs_tail = %p\n", prefix, state->terminal_msgs_tail); - fprintf(out, "%s | * in_send_loop[%d] = [", prefix, state->params->num_rails); - for (int i=0; iparams->num_rails; i++) { - fprintf(out, "%s%d", i ? ", " : "", state->in_send_loop[i]); + if (is_surrogate_on) { + fprintf(out, "%s | * in_send_loop = %p\n", prefix, state->in_send_loop); + } else { + fprintf(out, "%s | * in_send_loop[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%d", i ? ", " : "", state->in_send_loop[i]); + } + fprintf(out, "]\n"); } - fprintf(out, "]\n"); char addprefix_3[] = " | | "; len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_3) + 1; @@ -7359,51 +7374,62 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state fprintf(out, "%s | ]\n", prefix); free(subprefix); - fprintf(out, "%s | ** qos_status[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels); - for (int i=0; iparams->num_rails; i++) { - fprintf(out, "%s | rail %d: [", prefix, i); - for (int j=0; jparams->num_qos_levels; j++) { - fprintf(out, "%s%d", j ? ", " : "", state->qos_status[i][j]); + if (is_surrogate_on) { + fprintf(out, "%s | ** qos_status = %p\n", prefix, state->qos_status); + fprintf(out, "%s | ** qos_data = %p\n", prefix, state->qos_data); + fprintf(out, "%s | * last_qos_lvl = %p\n", prefix, state->last_qos_lvl); + } else { + fprintf(out, "%s | ** qos_status[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s | rail %d: [", prefix, i); + for (int j=0; jparams->num_qos_levels; j++) { + fprintf(out, "%s%d", j ? ", " : "", state->qos_status[i][j]); + } + fprintf(out, "]\n"); } - fprintf(out, "]\n"); - } - fprintf(out, "%s | ]\n", prefix); + fprintf(out, "%s | ]\n", prefix); - fprintf(out, "%s | ** qos_data[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels); - for (int i=0; iparams->num_rails; i++) { - fprintf(out, "%s | rail %d: [", prefix, i); - for (int j=0; jparams->num_qos_levels; j++) { - fprintf(out, "%s%d", j ? ", " : "", state->qos_data[i][j]); + fprintf(out, "%s | ** qos_data[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s | rail %d: [", prefix, i); + for (int j=0; jparams->num_qos_levels; j++) { + fprintf(out, "%s%d", j ? ", " : "", state->qos_data[i][j]); + } + fprintf(out, "]\n"); + } + fprintf(out, "%s | ]\n", prefix); + + fprintf(out, "%s | * last_qos_lvl[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%d", i ? ", " : "", state->last_qos_lvl[i]); } fprintf(out, "]\n"); } - fprintf(out, "%s | ]\n", prefix); - - fprintf(out, "%s | * last_qos_lvl[%d] = [", prefix, state->params->num_rails); - for (int i=0; iparams->num_rails; i++) { - fprintf(out, "%s%d", i ? ", " : "", state->last_qos_lvl[i]); - } - fprintf(out, "]\n"); fprintf(out, "%s | is_monitoring_bw = %d\n", prefix, state->is_monitoring_bw); fprintf(out, "%s | * st = %p\n", prefix, state->st); fprintf(out, "%s | * cc_st = %p\n", prefix, state->cc_st); - fprintf(out, "%s | * issueIdle[%d] = [", prefix, state->params->num_rails); - for (int i=0; iparams->num_rails; i++) { - fprintf(out, "%s%d", i ? ", " : "", state->issueIdle[i]); - } - fprintf(out, "]\n"); - - fprintf(out, "%s | ** terminal_length[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels); - for (int i=0; iparams->num_rails; i++) { - fprintf(out, "%s | rail %d: [", prefix, i); - for (int j=0; jparams->num_qos_levels; j++) { - fprintf(out, "%s%d", j ? ", " : "", state->terminal_length[i][j]); + if (is_surrogate_on) { + fprintf(out, "%s | * issueIdle = %p\n", prefix, state->issueIdle); + fprintf(out, "%s | ** terminal_length = %p\n", prefix, state->terminal_length); + } else { + fprintf(out, "%s | * issueIdle[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%d", i ? ", " : "", state->issueIdle[i]); } fprintf(out, "]\n"); + + fprintf(out, "%s | ** terminal_length[%d][%d] = [\n", prefix, state->params->num_rails, state->params->num_qos_levels); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s | rail %d: [", prefix, i); + for (int j=0; jparams->num_qos_levels; j++) { + fprintf(out, "%s%d", j ? ", " : "", state->terminal_length[i][j]); + } + fprintf(out, "]\n"); + } + fprintf(out, "%s | ]\n", prefix); } - fprintf(out, "%s | ]\n", prefix); fprintf(out, "%s | * anno = %s\n", prefix, state->anno ? state->anno : "(nil)"); fprintf(out, "%s | * params = %p\n", prefix, state->params); @@ -7416,35 +7442,44 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state fprintf(out, "%s | finished_chunks = %ld\n", prefix, state->finished_chunks); fprintf(out, "%s | finished_packets = %ld\n", prefix, state->finished_packets); - fprintf(out, "%s | * last_buf_full[%d] = [", prefix, state->params->num_rails); - for (int i=0; iparams->num_rails; i++) { - fprintf(out, "%s%g", i ? ", " : "", state->last_buf_full[i]); - } - fprintf(out, "]\n"); + if (is_surrogate_on) { + fprintf(out, "%s | ** terminal_length = %p\n", prefix, state->terminal_length); + fprintf(out, "%s | * last_buf_full = %p\n", prefix, state->last_buf_full); + fprintf(out, "%s | * busy_time = %p\n", prefix, state->busy_time); + fprintf(out, "%s | * link_traffic = %p\n", prefix, state->link_traffic); + fprintf(out, "%s | * total_chunks = %p\n", prefix, state->total_chunks); + fprintf(out, "%s | * stalled_chunks = %p\n", prefix, state->stalled_chunks); + } else { + fprintf(out, "%s | * last_buf_full[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%g", i ? ", " : "", state->last_buf_full[i]); + } + fprintf(out, "]\n"); - fprintf(out, "%s | * busy_time[%d] = [", prefix, state->params->num_rails); - for (int i=0; iparams->num_rails; i++) { - fprintf(out, "%s%g", i ? ", " : "", state->busy_time[i]); - } - fprintf(out, "]\n"); + fprintf(out, "%s | * busy_time[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%g", i ? ", " : "", state->busy_time[i]); + } + fprintf(out, "]\n"); - fprintf(out, "%s | * link_traffic[%d] = [", prefix, state->params->radix); - for (int i=0; iparams->radix; i++) { - fprintf(out, "%s%lu", i ? ", " : "", state->link_traffic[i]); - } - fprintf(out, "]\n"); + fprintf(out, "%s | * link_traffic[%d] = [", prefix, state->params->radix); + for (int i=0; iparams->radix; i++) { + fprintf(out, "%s%lu", i ? ", " : "", state->link_traffic[i]); + } + fprintf(out, "]\n"); - fprintf(out, "%s | * total_chunks[%d] = [", prefix, state->params->num_rails); - for (int i=0; iparams->num_rails; i++) { - fprintf(out, "%s%lu", i ? ", " : "", state->total_chunks[i]); - } - fprintf(out, "]\n"); + fprintf(out, "%s | * total_chunks[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%lu", i ? ", " : "", state->total_chunks[i]); + } + fprintf(out, "]\n"); - fprintf(out, "%s | * stalled_chunks[%d] = [", prefix, state->params->num_rails); - for (int i=0; iparams->num_rails; i++) { - fprintf(out, "%s%lu", i ? ", " : "", state->stalled_chunks[i]); + fprintf(out, "%s | * stalled_chunks[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { + fprintf(out, "%s%lu", i ? ", " : "", state->stalled_chunks[i]); + } + fprintf(out, "]\n"); } - fprintf(out, "]\n"); fprintf(out, "%s | injected_chunks = %lu\n", prefix, state->injected_chunks); fprintf(out, "%s | ejected_chunks = %lu\n", prefix, state->ejected_chunks); From d2cf6ae0e0e701651706623fa0a90797037cb360 Mon Sep 17 00:00:00 2001 From: helq Date: Wed, 12 Mar 2025 14:37:10 -0400 Subject: [PATCH 039/110] Fixing surrogate switch No simulation would run in hybrid mode because of a previous change on how the switch is done. --- src/surrogate/switch.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/surrogate/switch.c b/src/surrogate/switch.c index 32086f46..5754e77a 100644 --- a/src/surrogate/switch.c +++ b/src/surrogate/switch.c @@ -131,13 +131,16 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_stime gvt) { // We have to put the events back into the queue after we switch back, but if we never // switch back they will never get to be processed and thus we can clean them double switch_offset = g_tw_ts_end; - if (switch_at.current_i + 1 < switch_at.total) { - double const next_switch = switch_at.time_stampts[switch_at.current_i + 1]; + if (switch_at.current_i < switch_at.total) { + double const next_switch = switch_at.time_stampts[switch_at.current_i]; double const pre_switch_time = gvt; switch_offset = next_switch - pre_switch_time; assert(pre_switch_time < next_switch); //printf("gvt=%f next_switch=%f switch_offset=%f\n", pre_switch_time, next_switch, switch_offset); } + assert(0 < switch_at.current_i && switch_at.current_i <= switch_at.total); + double const current_switch_time = switch_at.time_stampts[switch_at.current_i - 1]; + assert(current_switch_time == gvt); tw_event * dequed_events = NULL; // Linked list of workload events, to be placed again in the queue int events_dequeued = 0; // for stats on code correctness @@ -164,13 +167,11 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_stime gvt) { assert(next_event->recv_ts == next_event->sig.recv_ts); next_event->recv_ts += switch_offset; next_event->sig.recv_ts = next_event->recv_ts; - } - assert(next_event->recv_ts >= g_tw_trigger_arbitrary_fun.sig_at.recv_ts); #else next_event->recv_ts += switch_offset; - } - assert(next_event->recv_ts >= g_tw_trigger_arbitrary_fun.at); #endif + } + assert(next_event->recv_ts >= current_switch_time); // store event in deque_events to inject immediately back to the queue next_event->prev = dequed_events; @@ -277,7 +278,7 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_event_sig gvt) { #else static void events_high_def_to_surrogate_switch(tw_pe * pe, tw_stime gvt) { #endif - if (g_tw_synchronization_protocol != OPTIMISTIC && g_tw_synchronization_protocol != SEQUENTIAL) { + if (g_tw_synchronization_protocol != OPTIMISTIC && g_tw_synchronization_protocol != SEQUENTIAL && g_tw_synchronization_protocol != SEQUENTIAL_ROLLBACK_CHECK) { tw_error(TW_LOC, "Sorry, sending packets to the future hasn't been implement in this mode"); } @@ -497,9 +498,9 @@ void director_call(tw_pe * pe, tw_stime gvt) { // Only in sequential mode pe->GVT does not carry the current gvt, while it does in conservative and optimistic #ifdef USE_RAND_TIEBREAKER - assert((g_tw_synchronization_protocol == SEQUENTIAL) || (pe->GVT_sig.recv_ts == gvt)); + assert((g_tw_synchronization_protocol == SEQUENTIAL) || (g_tw_synchronization_protocol == SEQUENTIAL_ROLLBACK_CHECK) || (pe->GVT_sig.recv_ts == gvt)); #else - assert((g_tw_synchronization_protocol == SEQUENTIAL) || (pe->GVT == gvt)); + assert((g_tw_synchronization_protocol == SEQUENTIAL) || (g_tw_synchronization_protocol == SEQUENTIAL_ROLLBACK_CHECK) || (pe->GVT == gvt)); #endif // Do not process if the simulation ended From 4b6bc9a915903ef5fb67be8adaa9887119f92c0c Mon Sep 17 00:00:00 2001 From: helq Date: Wed, 12 Mar 2025 14:38:46 -0400 Subject: [PATCH 040/110] Fixing state that wasn't properly reversed --- src/networks/model-net/dragonfly-dally.C | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 6df7ac68..5a63bb41 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -4605,6 +4605,7 @@ static void packet_send_rc(terminal_state * s, tw_bf * bf, terminal_dally_messag terminal_dally_message_list* cur_entry = (terminal_dally_message_list *)rc_stack_pop(s->st); + cur_entry->msg.travel_start_time = msg->saved_avg_time; int data_size = s->params->chunk_size; if(cur_entry->msg.packet_size < s->params->chunk_size) data_size = cur_entry->msg.packet_size % s->params->chunk_size; @@ -4671,6 +4672,7 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message * uint64_t num_chunks = cur_entry->msg.packet_size/s->params->chunk_size; if(cur_entry->msg.packet_size < s->params->chunk_size) num_chunks++; + msg->saved_avg_time = cur_entry->msg.travel_start_time; // reusing field saved_avg_time. It is only used in another event handler path (arrive). So, no interruptions here cur_entry->msg.travel_start_time = tw_now(lp); double bandwidth_coef = 1; From 8c8ccbc3be1811e3afe7cd069a19299045a05da5 Mon Sep 17 00:00:00 2001 From: helq Date: Wed, 12 Mar 2025 14:40:03 -0400 Subject: [PATCH 041/110] Fixing rollback of member `remaining_sz_packets` in terminal_state --- src/networks/model-net/dragonfly-dally.C | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 5a63bb41..576da098 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -5171,7 +5171,7 @@ static void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_dally_mess s->remaining_sz_packets[packet_key] += s->params->chunk_size; } else { if (bf->c29) { - s->remaining_sz_packets[packet_key] += s->params->chunk_size; + s->remaining_sz_packets.erase(packet_key); } } From ba77a088e4d5ea6122960c416d5ef1f363a06c38 Mon Sep 17 00:00:00 2001 From: helq Date: Wed, 12 Mar 2025 14:41:31 -0400 Subject: [PATCH 042/110] Fixing faulty logic when rollbacking event for background traffic --- src/network-workloads/model-net-mpi-replay.c | 5 +++-- src/networks/model-net/core/model-net-lp.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 55649581..6468d7ff 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -1136,7 +1136,7 @@ void arrive_syn_tr_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * lp) s->ross_sample.num_bytes_recvd -= data; s->send_time = m->rc.arrive.saved_send_time; s->ross_sample.send_time = m->rc.arrive.saved_send_time_sample; - if((tw_now(lp) - m->fwd.sim_start_time) > s->max_time) + if(bf->c0) { s->max_time = m->rc.arrive.saved_prev_max_time; s->ross_sample.max_time = m->rc.arrive.saved_prev_max_time; @@ -1150,6 +1150,7 @@ void arrive_syn_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * lp) m->rc.arrive.saved_send_time_sample = s->ross_sample.send_time; if((tw_now(lp) - m->fwd.sim_start_time) > s->max_time) { + bf->c0 = 1; m->rc.arrive.saved_prev_max_time = s->max_time; s->max_time = tw_now(lp) - m->fwd.sim_start_time; s->ross_sample.max_time = tw_now(lp) - m->fwd.sim_start_time; @@ -2769,7 +2770,7 @@ void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp) s->num_events_processed++; #endif /* if LP_DEBUG */ - //*(int *)bf = (int)0; + memset(bf, 0, sizeof(tw_bf)); rc_stack_gc(lp, s->matched_reqs); // rc_stack_gc(lp, s->indices); rc_stack_gc(lp, s->processed_ops); diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c index 536c44a1..b513675b 100644 --- a/src/networks/model-net/core/model-net-lp.c +++ b/src/networks/model-net/core/model-net-lp.c @@ -16,7 +16,7 @@ #define MN_NAME "model_net_base" #define DEBUG 0 -#define MODELNET_LP_DEBUG 1 +#define MODELNET_LP_DEBUG 0 /**** BEGIN SIMULATION DATA STRUCTURES ****/ int model_net_base_magic; From ddf198130db6d76cc6399781b41eb984ac5de3b2 Mon Sep 17 00:00:00 2001 From: helq Date: Thu, 13 Mar 2025 08:23:48 -0400 Subject: [PATCH 043/110] Fixing condition for surrogate switch --- src/surrogate/switch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/surrogate/switch.c b/src/surrogate/switch.c index 5754e77a..2481e28b 100644 --- a/src/surrogate/switch.c +++ b/src/surrogate/switch.c @@ -140,7 +140,7 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_stime gvt) { } assert(0 < switch_at.current_i && switch_at.current_i <= switch_at.total); double const current_switch_time = switch_at.time_stampts[switch_at.current_i - 1]; - assert(current_switch_time == gvt); + assert(current_switch_time <= gvt); tw_event * dequed_events = NULL; // Linked list of workload events, to be placed again in the queue int events_dequeued = 0; // for stats on code correctness From 03e5fd4f6d48f189a1f84a62ac79fadf45c77b78 Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 18 Mar 2025 11:32:23 -0400 Subject: [PATCH 044/110] Fixing the switch from high-fidelity to surrogate --- src/networks/model-net/dragonfly-dally.C | 2 +- src/surrogate/switch.c | 45 ++++++++++++++---------- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 576da098..83b6a46f 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -4831,7 +4831,7 @@ static void notify_dest_lp_of( terminal_dally_message * new_msg; // Lower value in priority means that it will be processed first // This event will be processed before any predicted packet arrives (even if scheduled at the same timestamp) - tw_event *e = model_net_method_event_new_user_prio(msg->dest_terminal_lpid, offset, lp, DRAGONFLY_DALLY, (void**)&new_msg, NULL, 0.5); + tw_event *e = model_net_method_event_new_user_prio(msg->dest_terminal_lpid, offset, lp, DRAGONFLY_DALLY, (void**)&new_msg, NULL, 1); memcpy(new_msg, msg, sizeof(terminal_dally_message)); // Just making sure that if the simulation breaks because we didn't set some value below, it breaks in a spectacular manner (~0 can be -1) assert(new_msg->dfdally_src_terminal_id == s->terminal_id); diff --git a/src/surrogate/switch.c b/src/surrogate/switch.c index c2b9a626..e05436da 100644 --- a/src/surrogate/switch.c +++ b/src/surrogate/switch.c @@ -134,14 +134,14 @@ static void shift_events_to_future_pe(tw_pe * pe) { // switch back they will never get to be processed and thus we can clean them double switch_offset = g_tw_ts_end; if (switch_at.current_i < switch_at.total) { - double const next_switch = switch_at.time_stampts[switch_at.current_i]; + double const next_switch = switch_at.time_stampts[switch_at.current_i + 1]; double const pre_switch_time = gvt; switch_offset = next_switch - pre_switch_time; assert(pre_switch_time < next_switch); //printf("gvt=%f next_switch=%f switch_offset=%f\n", pre_switch_time, next_switch, switch_offset); } - assert(0 < switch_at.current_i && switch_at.current_i <= switch_at.total); - double const current_switch_time = switch_at.time_stampts[switch_at.current_i - 1]; + assert(0 <= switch_at.current_i && switch_at.current_i < switch_at.total); + double const current_switch_time = switch_at.time_stampts[switch_at.current_i]; assert(current_switch_time <= gvt); tw_event * dequed_events = NULL; // Linked list of workload events, to be placed again in the queue @@ -155,6 +155,9 @@ static void shift_events_to_future_pe(tw_pe * pe) { #else assert(next_event->recv_ts >= gvt); #endif + if (next_event->event_id && next_event->state.remote) { + tw_hash_remove(pe->hash_t, next_event, next_event->send_pe); + } // finding out lp type char const * lp_type_name; @@ -191,6 +194,10 @@ static void shift_events_to_future_pe(tw_pe * pe) { prev_event->prev = NULL; tw_pq_enqueue(pe->pq, prev_event); + if (prev_event->event_id && prev_event->state.remote) { + tw_hash_insert(pe->hash_t, prev_event, prev_event->send_pe); + } + events_enqueued++; } @@ -286,7 +293,9 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe) { } tw_event *** lps_events = order_events_per_lps(pe); + printf("PE %d - AVL size %d (before shifting events)\n", g_tw_mynode, pe->avl_tree_size); shift_events_to_future_pe(pe); + printf("PE %d - AVL size %d (after shifting events to future)\n", g_tw_mynode, pe->avl_tree_size); // Going through all LPs in PE and running their specific functions for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) { @@ -308,6 +317,10 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe) { bool const is_lp_modelnet = strncmp("modelnet_", lp_type_name, 9) == 0; struct lp_types_switch const * const lp_type_switch = get_type_switch(lp_type_name); + pe->cur_event = pe->abort_event; + pe->cur_event->caused_by_me = NULL; + pe->cur_event->sig = pe->GVT_sig; + if (lp_type_switch) { if (lp_type_switch->trigger_idle_modelnet) { assert(is_lp_modelnet); @@ -363,6 +376,10 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe) { bool const is_lp_modelnet = strncmp("modelnet_", lp_type_name, 9) == 0; struct lp_types_switch const * const lp_type_switch = get_type_switch(lp_type_name); + pe->cur_event = pe->abort_event; + pe->cur_event->caused_by_me = NULL; + pe->cur_event->sig = pe->GVT_sig; + if (lp_type_switch) { if (lp_type_switch->trigger_idle_modelnet) { assert(is_lp_modelnet); @@ -385,8 +402,6 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe) { } } - -// This is an impure function, calling it twice WILL give different results. Only call it once! bool hit_trigger(tw_stime gvt) { if ( switch_at.current_i < switch_at.total && g_tw_trigger_gvt_hook.active == GVT_HOOK_triggered) { @@ -398,14 +413,6 @@ bool hit_trigger(tw_stime gvt) { #endif assert(gvt >= switch_time); // current gvt shouldn't be that far ahead from the point we wanted to trigger it - // Activating next switch - if (++switch_at.current_i < switch_at.total) { - double const next_switch = switch_at.time_stampts[switch_at.current_i]; - // Setting trigger for next switch - //printf("Adding a trigger to activate next switch!\n"); - tw_trigger_gvt_hook_at(next_switch); - } - // return true; } else { return false; @@ -415,15 +422,9 @@ bool hit_trigger(tw_stime gvt) { void switch_model(tw_pe * pe) { // Rollback if in optimistic mode -#ifdef USE_RAND_TIEBREAKER if (g_tw_synchronization_protocol == OPTIMISTIC) { rollback_and_cancel_events_pe(pe); } -#else - if (g_tw_synchronization_protocol == OPTIMISTIC) { - rollback_and_cancel_events_pe(pe); - } -#endif surr_config.director.switch_surrogate(); if (DEBUG_DIRECTOR && g_tw_mynode == 0) { printf("Switching to %s\n", surr_config.director.is_surrogate_on() ? "surrogate" : "high-fidelity"); @@ -511,6 +512,12 @@ void director_call(tw_pe * pe) { double const end = tw_clock_read(); surrogate_switching_time += end - start; + // Setting trigger for next switch + if (++switch_at.current_i < switch_at.total) { + double next_switch = switch_at.time_stampts[switch_at.current_i]; + tw_trigger_gvt_hook_at(next_switch); + } + if (DEBUG_DIRECTOR == 1 && g_tw_mynode == 0) { printf("Switch completed!\n"); } From a4cac4dcd9fd7eaa0251b1b33f6c66dcf5ad498a Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 18 Mar 2025 11:33:51 -0400 Subject: [PATCH 045/110] Adding ability to delete events at director call --- codes/surrogate/switch.h | 1 + src/networks/model-net/dragonfly-dally.C | 8 ++++++-- src/surrogate/switch.c | 17 +++++++++++++---- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/codes/surrogate/switch.h b/codes/surrogate/switch.h index c538e769..d23abb00 100644 --- a/codes/surrogate/switch.h +++ b/codes/surrogate/switch.h @@ -49,6 +49,7 @@ struct lp_types_switch { model_switch_f highdef_to_surrogate; model_switch_f surrogate_to_highdef; model_ask_if_freeze_f should_event_be_frozen; // NULL means event from LP type shouldn't be frozen + model_ask_if_freeze_f should_event_be_deleted; // NULL means event from LP type shouldn't be deleted }; struct switch_at_struct { diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 83b6a46f..2a3d0b9a 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -2446,12 +2446,16 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params) .trigger_idle_modelnet = true, .highdef_to_surrogate = (model_switch_f) dragonfly_dally_terminal_highdef_to_surrogate, .surrogate_to_highdef = (model_switch_f) dragonfly_dally_terminal_surrogate_to_highdef, - .should_event_be_frozen = dragonfly_dally_terminal_should_event_be_frozen}, + .should_event_be_frozen = dragonfly_dally_terminal_should_event_be_frozen, + .should_event_be_deleted = NULL, + }, {.lpname = "modelnet_dragonfly_dally_router", .trigger_idle_modelnet = false, .highdef_to_surrogate = NULL, .surrogate_to_highdef = NULL, - .should_event_be_frozen = dragonfly_dally_router_should_event_be_frozen}, + .should_event_be_frozen = dragonfly_dally_router_should_event_be_frozen, + .should_event_be_deleted = NULL, + }, 0 } }; diff --git a/src/surrogate/switch.c b/src/surrogate/switch.c index e05436da..babbc31f 100644 --- a/src/surrogate/switch.c +++ b/src/surrogate/switch.c @@ -166,6 +166,7 @@ static void shift_events_to_future_pe(tw_pe * pe) { struct lp_types_switch const * const lp_type_switch = get_type_switch(lp_type_name); // shifting time stamps to the future for events to freeze + bool deleted = false; if (lp_type_switch && lp_type_switch->should_event_be_frozen && lp_type_switch->should_event_be_frozen(next_event->dest_lp, next_event)) { #ifdef USE_RAND_TIEBREAKER @@ -175,13 +176,21 @@ static void shift_events_to_future_pe(tw_pe * pe) { #else next_event->recv_ts += switch_offset; #endif + assert(next_event->recv_ts >= current_switch_time); + // deleting event if we need to + } else if (lp_type_switch && lp_type_switch->should_event_be_deleted + && lp_type_switch->should_event_be_deleted(next_event->dest_lp, next_event)) { + tw_event_free(pe, next_event); + deleted = true; } - assert(next_event->recv_ts >= current_switch_time); // store event in deque_events to inject immediately back to the queue - next_event->prev = dequed_events; - dequed_events = next_event; - events_dequeued++; + if (!deleted) { + next_event->prev = dequed_events; + dequed_events = next_event; + events_dequeued++; + assert(next_event->recv_ts >= current_switch_time); + } next_event = tw_pq_dequeue(pe->pq); } From dde0551527a5d6569f4bf83486066666dab77ec8 Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 18 Mar 2025 11:46:02 -0400 Subject: [PATCH 046/110] Fixing some debug output in surrogate switch --- src/surrogate/switch.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/surrogate/switch.c b/src/surrogate/switch.c index babbc31f..088de48b 100644 --- a/src/surrogate/switch.c +++ b/src/surrogate/switch.c @@ -210,7 +210,7 @@ static void shift_events_to_future_pe(tw_pe * pe) { events_enqueued++; } - if (DEBUG_DIRECTOR > 1) { + if (DEBUG_DIRECTOR > 0 && events_dequeued != events_enqueued) { printf("PE %lu: Discrepancy on number of events processed %d (%d dequeued and %d enqueued)\n", g_tw_mynode, events_dequeued - events_enqueued, events_dequeued, events_enqueued); } @@ -302,9 +302,9 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe) { } tw_event *** lps_events = order_events_per_lps(pe); - printf("PE %d - AVL size %d (before shifting events)\n", g_tw_mynode, pe->avl_tree_size); + printf("PE %lu - AVL size %d (before shifting events)\n", g_tw_mynode, pe->avl_tree_size); shift_events_to_future_pe(pe); - printf("PE %d - AVL size %d (after shifting events to future)\n", g_tw_mynode, pe->avl_tree_size); + printf("PE %lu - AVL size %d (after shifting events to future)\n", g_tw_mynode, pe->avl_tree_size); // Going through all LPs in PE and running their specific functions for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) { From 18d300e00db60461e61bdc7c364e176132d858b9 Mon Sep 17 00:00:00 2001 From: helq Date: Thu, 20 Mar 2025 21:25:47 -0400 Subject: [PATCH 047/110] Adding deep-copy/check/print functions for router_state --- src/networks/model-net/dragonfly-dally.C | 536 ++++++++++++++++++++++- 1 file changed, 520 insertions(+), 16 deletions(-) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 2a3d0b9a..e33e6da3 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -3895,6 +3895,7 @@ static void router_dally_init(router_state * r, tw_lp * lp) } + r->snapshot_data = NULL; if (num_snapshots) { r->snapshot_data = (int**)calloc(num_snapshots, sizeof(int*)); for(int i = 0; i < num_snapshots; i++) @@ -3905,6 +3906,8 @@ static void router_dally_init(router_state * r, tw_lp * lp) } //Xin: msg counters for apps + r->agg_link_traffic = NULL; + r->agg_busy_time = NULL; if(p->counting_bool > 0) { r->agg_link_traffic = (int64_t **) calloc(p->counting_windows, sizeof(int64_t *)); @@ -7059,6 +7062,7 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from // from->sample_stat // from->ross_sample // from->busy_time_ross_sample + // from->busy_time_sample memcpy(into, from, sizeof(terminal_state)); @@ -7080,6 +7084,7 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from into->total_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t)); into->busy_time = (tw_stime*) malloc(num_rails * sizeof(tw_stime)); into->terminal_msgs = (terminal_dally_message_list***) malloc(num_rails * sizeof(terminal_dally_message_list**)); + into->link_traffic = (uint64_t*) malloc(num_rails * sizeof(uint64_t)); for(int i = 0; i < num_rails; i++) { into->vc_occupancy[i] = (int*) malloc(num_qos_levels * sizeof(int)); @@ -7102,10 +7107,6 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from into->stalled_chunks[i] = from->stalled_chunks[i]; into->total_chunks[i] = from->total_chunks[i]; into->busy_time[i] = from->busy_time[i]; - } - - into->link_traffic = (uint64_t*) malloc(p->radix * sizeof(uint64_t)); - for (int i = 0; i < p->radix; i++) { into->link_traffic[i] = from->link_traffic[i]; } } @@ -7263,9 +7264,6 @@ static bool check_terminal_state(terminal_state *before, terminal_state *after) is_same &= (before->stalled_chunks[i] == after->stalled_chunks[i]); is_same &= (before->total_chunks[i] == after->total_chunks[i]); is_same &= (before->busy_time[i] == after->busy_time[i]); - } - - for (int i = 0; i < p->radix; i++) { is_same &= (before->link_traffic[i] == after->link_traffic[i]); } } @@ -7468,8 +7466,8 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state } fprintf(out, "]\n"); - fprintf(out, "%s | * link_traffic[%d] = [", prefix, state->params->radix); - for (int i=0; iparams->radix; i++) { + fprintf(out, "%s | * link_traffic[%d] = [", prefix, state->params->num_rails); + for (int i=0; iparams->num_rails; i++) { fprintf(out, "%s%lu", i ? ", " : "", state->link_traffic[i]); } fprintf(out, "]\n"); @@ -7537,6 +7535,509 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state fprintf(out, "%s | * frozen_state = %p\n", prefix, state->frozen_state); } +// Original function implemented by Claude +static void save_router_state(router_state *into, router_state const *from) { + // Missing deep-clone/comparison/print members. These members are always accessed, so it is possible to discover some bugs if we print their contents + // from->local_congestion_controller + + // Missing deep-clone/comparison/print members. + // from->rsamples + // from->ross_rsample + // from->busy_time_sample + // from->link_traffic_sample + // from->link_traffic_ross_sample + + memcpy(into, from, sizeof(router_state)); + + dragonfly_param const * p = into->params; + int const radix = p->radix; + int const num_qos_levels = p->num_qos_levels; + + into->global_channel = (int*) malloc(p->num_global_channels * sizeof(int)); + + for (int i = 0; i < p->num_global_channels; i++) { + into->global_channel[i] = from->global_channel[i]; + } + + into->next_output_available_time = (tw_stime*) malloc(radix * sizeof(tw_stime)); + into->last_buf_full = (tw_stime*) malloc(radix * sizeof(tw_stime)); + into->busy_time = (tw_stime*) malloc(radix * sizeof(tw_stime)); + into->stalled_chunks = (unsigned long*) malloc(radix * sizeof(unsigned long)); + into->total_chunks = (unsigned long*) malloc(radix * sizeof(unsigned long)); + into->in_send_loop = (int*) malloc(radix * sizeof(int)); + into->queued_count = (int*) malloc(radix * sizeof(int)); + into->port_bandwidths = (double*) malloc(radix * sizeof(double)); + into->vc_max_sizes = (int*) malloc(radix * sizeof(int)); + into->link_traffic = (int64_t*) malloc(radix * sizeof(int64_t)); + into->last_qos_lvl = (int*) malloc(radix * sizeof(int)); + into->vc_occupancy = (int**) malloc(radix * sizeof(int*)); + into->qos_status = (int**) malloc(radix * sizeof(int*)); + into->qos_data = (int**) malloc(radix * sizeof(int*)); + into->pending_msgs = (terminal_dally_message_list***) malloc(radix * sizeof(terminal_dally_message_list**)); + into->queued_msgs = (terminal_dally_message_list***) malloc(radix * sizeof(terminal_dally_message_list**)); + + for (int i = 0; i < radix; i++) { + into->next_output_available_time[i] = from->next_output_available_time[i]; + into->last_buf_full[i] = from->last_buf_full[i]; + into->busy_time[i] = from->busy_time[i]; + into->stalled_chunks[i] = from->stalled_chunks[i]; + into->total_chunks[i] = from->total_chunks[i]; + into->in_send_loop[i] = from->in_send_loop[i]; + into->queued_count[i] = from->queued_count[i]; + into->port_bandwidths[i] = from->port_bandwidths[i]; + into->vc_max_sizes[i] = from->vc_max_sizes[i]; + into->link_traffic[i] = from->link_traffic[i]; + into->last_qos_lvl[i] = from->last_qos_lvl[i]; + + into->vc_occupancy[i] = (int*) malloc(p->num_vcs * sizeof(int)); + into->qos_status[i] = (int*) malloc(num_qos_levels * sizeof(int)); + into->qos_data[i] = (int*) malloc(num_qos_levels * sizeof(int)); + + into->pending_msgs[i] = (terminal_dally_message_list**) malloc(p->num_vcs * sizeof(terminal_dally_message_list*)); + into->queued_msgs[i] = (terminal_dally_message_list**) malloc(p->num_vcs * sizeof(terminal_dally_message_list*)); + + for (int j = 0; j < p->num_vcs; j++) { + into->vc_occupancy[i][j] = from->vc_occupancy[i][j]; + copy_terminal_dally_message_list(&into->pending_msgs[i][j], from->pending_msgs[i][j]); + copy_terminal_dally_message_list(&into->queued_msgs[i][j], from->queued_msgs[i][j]); + } + for (int j = 0; j < num_qos_levels; j++) { + into->qos_status[i][j] = from->qos_status[i][j]; + into->qos_data[i][j] = from->qos_data[i][j]; + } + } + + into->snapshot_data = NULL; + if (num_snapshots) { + into->snapshot_data = (int**) malloc(num_snapshots * sizeof(int*)); + int size_snapshot = from->params->num_vcs * from->params->radix; + for (int i = 0; i < num_snapshots; i++) { + into->snapshot_data[i] = (int*) malloc(size_snapshot * sizeof(int)); + memcpy(into->snapshot_data[i], from->snapshot_data[i], size_snapshot * sizeof(int)); + } + } + + if (p->counting_bool > 0) { + assert(from->agg_busy_time != NULL); + assert(from->agg_link_traffic != NULL); + into->agg_busy_time = (tw_stime**) malloc(p->counting_windows * sizeof(tw_stime*)); + into->agg_link_traffic = (int64_t**) malloc(p->counting_windows * sizeof(int64_t*)); + + for (int i = 0; i < p->counting_windows; i++) { + into->agg_busy_time[i] = (tw_stime*) malloc(radix * sizeof(tw_stime)); + into->agg_link_traffic[i] = (int64_t*) malloc(radix * sizeof(int64_t)); + memcpy(into->agg_busy_time[i], from->agg_busy_time[i], radix * sizeof(tw_stime)); + memcpy(into->agg_link_traffic[i], from->agg_link_traffic[i], radix * sizeof(int64_t)); + } + } + + //if (from->local_congestion_controller != NULL) { + // assert(g_congestion_control_enabled); + // into->local_congestion_controller = (rlc_state*) malloc(sizeof(rlc_state)); + // save_rlc_state(into->local_congestion_controller, from->local_congestion_controller); + //} +} + +// Original function implemented by Claude +static void clean_router_state(router_state *state) { + dragonfly_param const * p = state->params; + int const radix = p->radix; + + // Free simple arrays + free(state->global_channel); + free(state->next_output_available_time); + free(state->last_buf_full); + free(state->busy_time); + free(state->stalled_chunks); + free(state->total_chunks); + free(state->in_send_loop); + free(state->queued_count); + free(state->port_bandwidths); + free(state->vc_max_sizes); + free(state->link_traffic); + free(state->last_qos_lvl); + + // Clean and free 2D arrays + for (int i = 0; i < radix; i++) { + free(state->vc_occupancy[i]); + free(state->qos_status[i]); + free(state->qos_data[i]); + + for (int j = 0; j < p->num_vcs; j++) { + clean_terminal_dally_message_list(state->pending_msgs[i][j]); + clean_terminal_dally_message_list(state->queued_msgs[i][j]); + } + + free(state->pending_msgs[i]); + free(state->queued_msgs[i]); + } + + free(state->vc_occupancy); + free(state->qos_status); + free(state->qos_data); + free(state->pending_msgs); + free(state->queued_msgs); + + if (num_snapshots) { + for (int i = 0; i < num_snapshots; i++) { + free(state->snapshot_data[i]); + } + free(state->snapshot_data); + } + + if (p->counting_bool > 0) { + for (int i = 0; i < p->counting_windows; i++) { + free(state->agg_busy_time[i]); + free(state->agg_link_traffic[i]); + } + free(state->agg_busy_time); + free(state->agg_link_traffic); + } + + //if (state->local_congestion_controller != NULL) { + // clean_rlc_state(state->local_congestion_controller); + // free(state->local_congestion_controller); + //} +} + +// Original function implemented by Claude +static bool check_router_state(router_state const *before, router_state const *after) { + dragonfly_param const * p = before->params; + int const radix = p->radix; + int const num_qos_levels = p->num_qos_levels; + + if (before->router_id != after->router_id || + before->group_id != after->group_id || + before->plane_id != after->plane_id || + before->op_arr_size != after->op_arr_size || + before->max_arr_size != after->max_arr_size || + before->workloads_finished_flag != after->workloads_finished_flag || + before->is_monitoring_bw != after->is_monitoring_bw || + before->last_time != after->last_time) { + return false; + } + + for (int i = 0; i < p->num_global_channels; i++) { + if (before->global_channel[i] != after->global_channel[i]) { + return false; + } + } + + for (int i = 0; i < radix; i++) { + if (before->next_output_available_time[i] != after->next_output_available_time[i] || + before->last_buf_full[i] != after->last_buf_full[i] || + before->busy_time[i] != after->busy_time[i] || + before->stalled_chunks[i] != after->stalled_chunks[i] || + before->total_chunks[i] != after->total_chunks[i] || + before->in_send_loop[i] != after->in_send_loop[i] || + before->queued_count[i] != after->queued_count[i] || + before->port_bandwidths[i] != after->port_bandwidths[i] || + before->vc_max_sizes[i] != after->vc_max_sizes[i] || + before->link_traffic[i] != after->link_traffic[i] || + before->last_qos_lvl[i] != after->last_qos_lvl[i]) { + return false; + } + + for (int j = 0; j < p->num_vcs; j++) { + if (before->vc_occupancy[i][j] != after->vc_occupancy[i][j]) { + return false; + } + + if (!check_terminal_dally_message_list(before->pending_msgs[i][j], after->pending_msgs[i][j]) || + !check_terminal_dally_message_list(before->queued_msgs[i][j], after->queued_msgs[i][j])) { + return false; + } + } + + for (int j = 0; j < num_qos_levels; j++) { + if (before->qos_status[i][j] != after->qos_status[i][j] || + before->qos_data[i][j] != after->qos_data[i][j]) { + return false; + } + } + } + + if ((before->snapshot_data == NULL) != (after->snapshot_data == NULL)) { + return false; + } + + if (num_snapshots) { + assert(before->snapshot_data != NULL); + int size_snapshot = before->params->num_vcs * before->params->radix; + for (int i = 0; i < num_snapshots; i++) { + assert(after->snapshot_data[i] == NULL); + + for (int j = 0; j < size_snapshot; j++) { + if (before->snapshot_data[i][j] != after->snapshot_data[i][j]) { + return false; + } + } + } + } + + if ((before->agg_busy_time == NULL) != (after->agg_busy_time == NULL)) { + return false; + } + if ((before->agg_link_traffic == NULL) != (after->agg_link_traffic == NULL)) { + return false; + } + + if (p->counting_bool > 0) { + assert(before->agg_busy_time != NULL && after->agg_busy_time); + assert(before->agg_link_traffic != NULL && after->agg_link_traffic); + for (int i = 0; i < p->counting_windows; i++) { + for (int j = 0; j < radix; j++) { + if (before->agg_busy_time[i][j] != after->agg_busy_time[i][j] || + before->agg_link_traffic[i][j] != after->agg_link_traffic[i][j]) { + return false; + } + } + } + } + + //if (before->local_congestion_controller != NULL) { + // if (!check_rlc_state(before->local_congestion_controller, after->local_congestion_controller)) { + // return false; + // } + //} + + // Check strings + if (strncmp(before->output_buf, after->output_buf, 4096) != 0 || + strncmp(before->output_buf5, after->output_buf5, 4096) != 0 || + strncmp(before->output_buf6, after->output_buf6, 4096) != 0) { + return false; + } + + // All checks passed + return true; +} + +// Original function implemented by Claude +static void print_router_state(FILE * out, char const * prefix, router_state * state) { + dragonfly_param const * p = state->params; + int const radix = p->radix; + int const num_qos_levels = p->num_qos_levels; + + fprintf(out, "%srouter_state (dragonfly) ->\n", prefix); + fprintf(out, "%s | router_id = %u\n", prefix, state->router_id); + fprintf(out, "%s | group_id = %d\n", prefix, state->group_id); + fprintf(out, "%s | plane_id = %d\n", prefix, state->plane_id); + fprintf(out, "%s | op_arr_size = %d\n", prefix, state->op_arr_size); + fprintf(out, "%s | max_arr_size = %d\n", prefix, state->max_arr_size); + + fprintf(out, "%s | * global_channel[%d] = [", prefix, radix); + for (int i = 0; i < p->num_global_channels; i++) { + fprintf(out, "%s%d", i ? ", " : "", state->global_channel[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | connMan = \n", prefix); + + char addprefix[] = " | "; + int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix) + 1; + char * subprefix = (char *) malloc(len_subprefix * sizeof(char)); + fprintf(out, "%s | *local_congestion_controller = %p\n", prefix, state->local_congestion_controller); + //if (state->local_congestion_controller != NULL) { + // snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix); + // print_rlc_state(out, subprefix, state->local_congestion_controller); + //} + free(subprefix); + + fprintf(out, "%s | *next_output_available_time[%d] = [", prefix, radix); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s%g", i ? ", " : "", state->next_output_available_time[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * last_buf_full[%d] = [", prefix, radix); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s%g", i ? ", " : "", state->last_buf_full[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * busy_time[%d] = [", prefix, radix); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s%g", i ? ", " : "", state->busy_time[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * busy_time_sample = %p\n", prefix, state->busy_time_sample); + + fprintf(out, "%s | * stalled_chunks[%d] = [", prefix, radix); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s%lu", i ? ", " : "", state->stalled_chunks[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * total_chunks[%d] = [", prefix, radix); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s%lu", i ? ", " : "", state->total_chunks[i]); + } + fprintf(out, "]\n"); + + char addprefix_2[] = " | | | "; + len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_2) + 1; + subprefix = (char *) malloc(len_subprefix * sizeof(char)); + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_2); + + fprintf(out, "%s | *** pending_msgs[%d][%d] = [\n", prefix, radix, p->num_vcs); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s | port %d: [\n", prefix, i); + for (int j = 0; j < p->num_vcs; j++) { + fprintf(out, "%s | | vcs # %d\n", prefix, j); + print_terminal_dally_message_list(out, subprefix, NULL, state->pending_msgs[i][j]); + } + fprintf(out, "%s | ]\n", prefix); + } + fprintf(out, "%s | ]\n", prefix); + + fprintf(out, "%s | *** pending_msgs_tail = %p\n", prefix, state->pending_msgs_tail); + + fprintf(out, "%s | *** queued_msgs[%d][%d] = [\n", prefix, radix, p->num_vcs); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s | port %d: [\n", prefix, i); + for (int j = 0; j < p->num_vcs; j++) { + fprintf(out, "%s | | vcs # %d\n", prefix, j); + print_terminal_dally_message_list(out, subprefix, NULL, state->queued_msgs[i][j]); + } + fprintf(out, "%s | ]\n", prefix); + } + fprintf(out, "%s | ]\n", prefix); + free(subprefix); + + fprintf(out, "%s | *** queued_msgs_tail = %p\n", prefix, state->queued_msgs_tail); + + fprintf(out, "%s | * in_send_loop[%d] = [", prefix, radix); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s%d", i ? ", " : "", state->in_send_loop[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * queued_count[%d] = [", prefix, radix); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s%d", i ? ", " : "", state->queued_count[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * st = %p\n", prefix, state->st); + fprintf(out, "%s | * cc_st = %p\n", prefix, state->cc_st); + fprintf(out, "%s | workloads_finished_flag = %d\n", prefix, state->workloads_finished_flag); + + fprintf(out, "%s | * port_bandwidths[%d] = [", prefix, radix); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s%g", i ? ", " : "", state->port_bandwidths[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * vc_max_sizes[%d] = [", prefix, radix); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s%d", i ? ", " : "", state->vc_max_sizes[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | ** vc_occupancy[%d][%d] = [\n", prefix, radix, p->num_vcs); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s | port %d: [", prefix, i); + for (int j = 0; j < p->num_vcs; j++) { + fprintf(out, "%s%d", j ? ", " : "", state->vc_occupancy[i][j]); + } + fprintf(out, "]\n"); + } + fprintf(out, "%s | ]\n", prefix); + + fprintf(out, "%s | * link_traffic[%d] = [", prefix, radix); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s%ld", i ? ", " : "", state->link_traffic[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | * link_traffic_sample = %p\n", prefix, state->link_traffic_sample); + + fprintf(out, "%s | is_monitoring_bw = %d\n", prefix, state->is_monitoring_bw); + + fprintf(out, "%s | * last_qos_lvl[%d] = [", prefix, radix); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s%d", i ? ", " : "", state->last_qos_lvl[i]); + } + fprintf(out, "]\n"); + + fprintf(out, "%s | ** qos_status[%d][%d] = [\n", prefix, radix, num_qos_levels); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s | port %d: [", prefix, i); + for (int j = 0; j < num_qos_levels; j++) { + fprintf(out, "%s%d", j ? ", " : "", state->qos_status[i][j]); + } + fprintf(out, "]\n"); + } + fprintf(out, "%s | ]\n", prefix); + + fprintf(out, "%s | ** qos_data[%d][%d] = [\n", prefix, radix, num_qos_levels); + for (int i = 0; i < radix; i++) { + fprintf(out, "%s | port %d: [", prefix, i); + for (int j = 0; j < num_qos_levels; j++) { + fprintf(out, "%s%d", j ? ", " : "", state->qos_data[i][j]); + } + fprintf(out, "]\n"); + } + fprintf(out, "%s | ]\n", prefix); + + fprintf(out, "%s | * anno = %s\n", prefix, state->anno ? state->anno : "(nil)"); + fprintf(out, "%s | * params = %p\n", prefix, state->params); + + if (num_snapshots) { + fprintf(out, "%s | ** snapshot_data[%d][%d] = [\n", prefix, num_snapshots, radix); + int size_snapshot = p->num_vcs * p->radix; + for (int i = 0; i < num_snapshots; i++) { + fprintf(out, "%s | snapshot %d: [", prefix, i); + for (int j = 0; j < size_snapshot; j++) { + fprintf(out, "%s%d", j ? ", " : "", state->snapshot_data[i][j]); + } + fprintf(out, "]\n"); + } + fprintf(out, "%s | ]\n", prefix); + } else { + fprintf(out, "%s | ** snapshot_data = %p\n", prefix, state->snapshot_data); + } + + fprintf(out, "%s | output_buf = '%.4096s'\n", prefix, state->output_buf); + fprintf(out, "%s | * rsamples = %p\n", prefix, state->rsamples); + fprintf(out, "%s | fwd_events = %ld\n", prefix, state->fwd_events); + fprintf(out, "%s | rev_events = %ld\n", prefix, state->rev_events); + fprintf(out, "%s | output_buf5 = '%.4096s'\n", prefix, state->output_buf5); + fprintf(out, "%s | output_buf6 = '%.4096s'\n", prefix, state->output_buf6); + + if(p->counting_bool <= 0) + { + fprintf(out, "%s | ** agg_busy_time = %p\n", prefix, state->agg_busy_time); + fprintf(out, "%s | ** agg_link_traffic = %p\n", prefix, state->agg_link_traffic); + } else { + assert(state->agg_busy_time != NULL); + assert(state->agg_link_traffic != NULL); + fprintf(out, "%s | ** agg_busy_time[%d][%d] = [\n", prefix, p->counting_windows, radix); + for (int i = 0; i < p->counting_windows; i++) { + fprintf(out, "%s | window %d: [", prefix, i); + for (int j = 0; j < radix; j++) { + fprintf(out, "%s%g", j ? ", " : "", state->agg_busy_time[i][j]); + } + fprintf(out, "]\n"); + } + fprintf(out, "%s | ]\n", prefix); + + fprintf(out, "%s | ** agg_link_traffic[%d][%d] = [\n", prefix, p->counting_windows, radix); + for (int i = 0; i < p->counting_windows; i++) { + fprintf(out, "%s | window %d: [", prefix, i); + for (int j = 0; j < radix; j++) { + fprintf(out, "%s%lu", j ? ", " : "", state->agg_link_traffic[i][j]); + } + fprintf(out, "]\n"); + } + fprintf(out, "%s | ]\n", prefix); + } + + fprintf(out, "%s | ross_rsample = \n", prefix); + fprintf(out, "%s | last_time = %g\n", prefix, state->last_time); +} + char const * const string_event_t(enum event_t type) { switch (type) { case T_GENERATE: return "T_GENERATE"; @@ -7561,6 +8062,9 @@ char const * const string_event_t(enum event_t type) { bool check_terminal_dally_message(struct terminal_dally_message * before, struct terminal_dally_message * after) { bool is_same = true; + // Fields that have no effects in the simulation + // before->this_router_ptp_latency + // Compare all fields is_same &= before->magic == after->magic; is_same &= before->travel_start_time == after->travel_start_time; @@ -7584,7 +8088,6 @@ bool check_terminal_dally_message(struct terminal_dally_message * before, struct is_same &= before->my_hops_cur_group == after->my_hops_cur_group; is_same &= before->next_stop == after->next_stop; is_same &= before->this_router_arrival == after->this_router_arrival; - is_same &= before->this_router_ptp_latency == after->this_router_ptp_latency; is_same &= before->intm_lp_id == after->intm_lp_id; is_same &= before->last_hop == after->last_hop; is_same &= before->is_intm_visited == after->is_intm_visited; @@ -7612,6 +8115,7 @@ bool check_terminal_dally_message(struct terminal_dally_message * before, struct // Print fuction originally constructed with help from Claude.ai void print_terminal_dally_message(FILE * out, char const * prefix, void * s, struct terminal_dally_message * msg) { //terminal_state * ns = (terminal_state *) s; + //router_state * ns = (router_state *) s; fprintf(out, "%sterminal_dally_message ->\n", prefix); fprintf(out, "%s | magic = %d\n", prefix, msg->magic); @@ -7730,12 +8234,12 @@ crv_checkpointer dragonfly_dally_checkpointers[] = { { &dragonfly_dally_lps[1], sizeof(router_state), - (save_checkpoint_state_f) NULL, - (clean_checkpoint_state_f) NULL, - (check_states_f) NULL, - (print_lpstate_f) NULL, - (print_checkpoint_state_f) NULL, - (print_event_f) NULL, + (save_checkpoint_state_f) save_router_state, + (clean_checkpoint_state_f) clean_router_state, + (check_states_f) check_router_state, + (print_lpstate_f) print_router_state, + (print_checkpoint_state_f) print_router_state, + (print_event_f) print_terminal_dally_message, }, }; From 244f98af05736192fb0cebdbb894b5b197514827 Mon Sep 17 00:00:00 2001 From: helq Date: Thu, 20 Mar 2025 21:26:47 -0400 Subject: [PATCH 048/110] Fixing reversibility bug in router_state --- src/networks/model-net/dragonfly-dally.C | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index e33e6da3..38dbe1e6 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -6616,7 +6616,7 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes m->magic = router_magic_num; int msg_size = s->params->chunk_size; - if((cur_entry->msg.packet_size % s->params->chunk_size) && (cur_entry->msg.chunk_id == num_chunks - 1)) { + if(((cur_entry->msg.packet_size % s->params->chunk_size) || cur_entry->msg.packet_size == 0) && (cur_entry->msg.chunk_id == num_chunks - 1)) { bf->c11 = 1; s->link_traffic[output_port] += (cur_entry->msg.packet_size % s->params->chunk_size); s->link_traffic_sample[output_port] += (cur_entry->msg.packet_size % s->params->chunk_size); @@ -6910,6 +6910,7 @@ static void router_dally_event(router_state * s, tw_bf * bf, terminal_dally_mess s->ross_rsample.fwd_events++; rc_stack_gc(lp, s->st); + msg->last_received_time = s->last_time; s->last_time = tw_now(lp); assert(msg->magic == router_magic_num); @@ -7016,6 +7017,8 @@ static void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, term static void router_dally_rc_event_handler(router_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) { + s->last_time = msg->last_received_time; + for(int i = 0; i < msg->num_rngs; i++) tw_rand_reverse_unif(lp->rng); From e7e7535f8a825cf5b7d95a0a1e880689147d461d Mon Sep 17 00:00:00 2001 From: helq Date: Sat, 22 Mar 2025 20:33:56 -0400 Subject: [PATCH 049/110] Updating tie-breaker related code from ROSS update --- src/surrogate/switch.c | 6 +++--- src/util/rc-stack.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/surrogate/switch.c b/src/surrogate/switch.c index 088de48b..dede0cf5 100644 --- a/src/surrogate/switch.c +++ b/src/surrogate/switch.c @@ -80,9 +80,9 @@ static void rollback_and_cancel_events_pe(tw_pe * pe) { tw_stime const gvt = gvt_sig.recv_ts; // Backtracking the simulation to GVT for (unsigned int i = 0; i < g_tw_nkp; i++) { - tw_kp_rollback_to_sig(g_tw_kp[i], gvt_sig); + tw_kp_rollback_to_sig(g_tw_kp[i], &gvt_sig); } - assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0); + assert(tw_event_sig_compare_ptr(&pe->GVT_sig, &gvt_sig) == 0); assert(pe->GVT_sig.recv_ts == gvt); // redundant but needed because compiler cries that gvt is never used #else tw_stime const gvt = pe->GVT; @@ -151,7 +151,7 @@ static void shift_events_to_future_pe(tw_pe * pe) { // Filtering events to freeze assert(next_event->prev == NULL); #ifdef USE_RAND_TIEBREAKER - assert(tw_event_sig_compare(next_event->sig, gvt_sig) >= 0); + assert(tw_event_sig_compare_ptr(&next_event->sig, &gvt_sig) >= 0); #else assert(next_event->recv_ts >= gvt); #endif diff --git a/src/util/rc-stack.c b/src/util/rc-stack.c index 7b0540e7..9491c897 100644 --- a/src/util/rc-stack.c +++ b/src/util/rc-stack.c @@ -124,7 +124,7 @@ void rc_stack_gc(tw_lp const *lp, struct rc_stack *s) { while (ent != &s->head) { rc_entry *r = qlist_entry(ent, rc_entry, ql); #ifdef USE_RAND_TIEBREAKER - if (lp == NULL || tw_event_sig_compare(r->e_sig, lp->pe->GVT_sig) == -1) { + if (lp == NULL || tw_event_sig_compare_ptr(&r->e_sig, &lp->pe->GVT_sig) < 0) { #else if (lp == NULL || r->time < lp->pe->GVT){ #endif From e0cc46e9d3569570712b64c797eae4ac923637f9 Mon Sep 17 00:00:00 2001 From: helq Date: Thu, 29 May 2025 09:38:34 -0400 Subject: [PATCH 050/110] Finishing missing components to check in deep-copy/check/print functions for router_state --- src/network-workloads/model-net-mpi-replay.c | 2 +- src/networks/model-net/dragonfly-dally.C | 184 +++++++++++++------ 2 files changed, 127 insertions(+), 59 deletions(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 6468d7ff..c1040526 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -82,7 +82,7 @@ static tw_stime mean_interval = 100000; static int payload_sz = 1024; /* Doing LP IO*/ -static void * params = NULL; +static char * params = NULL; static char lp_io_dir[256] = {'\0'}; static char sampling_dir[32] = {'\0'}; static char mpi_msg_dir[32] = {'\0'}; diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 38dbe1e6..d7792fa2 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -23,6 +23,7 @@ #include "codes/model-net-lp.h" #include "codes/surrogate/init.h" #include "codes/net/dragonfly-dally.h" +#include "quicklist.h" #include "sys/file.h" #include "codes/quickhash.h" #include "codes/rc-stack.h" @@ -3302,6 +3303,11 @@ static void router_send_snapshot_events(router_state *s, tw_lp *lp) static void router_handle_snapshot_event(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp) { + if (msg->packet_ID >= num_snapshots) { + fprintf(stderr, "Warning: packet_ID = %llu will not be saved in the snapshot because there are only %d spaces available.\n", msg->packet_ID, num_snapshots); + return; + } + for(int i = 0; i < s->params->radix; i++) { for(int j = 0; j < s->params->num_vcs; j++) @@ -3517,7 +3523,7 @@ static void router_dally_commit(router_state * s, if (msg->type == R_SNAPSHOT) { - if (OUTPUT_SNAPSHOT == 1) + if (OUTPUT_SNAPSHOT == 1 && msg->packet_ID < num_snapshots) { char snapshot_line[8192]; int written; @@ -7049,17 +7055,103 @@ static void router_dally_rc_event_handler(router_state * s, tw_bf * bf, } //*** ---------- START OF reverse handler checking functions ---------- *** -bool warn_incomplete_definition_terminal_state_check = false; +static void copy_rank_tbl(struct qhash_table * into, struct qhash_table const * from) { + // YES! This function is very, very slow and so are all the others. This is + // the simplest implementation we could come up with without changing how + // qhash_table works or replacing it altogether. Both options would need + // substantial changes to the dragonfly model + for (int i = 0; i < from->table_size; i++) { + struct dfly_qhash_entry *entry; + qlist_for_each_entry(entry, &from->array[i], hash_link) { + struct dfly_qhash_entry *new_entry = + (struct dfly_qhash_entry *)malloc(sizeof(struct dfly_qhash_entry)); + *new_entry = *entry; // There is no need to copy contents of pointer because we don't check it + qlist_add(&new_entry->hash_link, &into->array[i]); + } + } +} -static void save_terminal_state(terminal_state *into, terminal_state const *from) { - if (!warn_incomplete_definition_terminal_state_check) { - fprintf(stderr, "Warning: Deep-cloning and comparing has not been fully implemented for the (sub)LP type: `terminal_state` (Running this model under SEQUENTIAL_ROLLBACK_CHECK might not capture issues that arise from its reverse event handler).\n"); - warn_incomplete_definition_terminal_state_check = true; +static void clean_rank_tbl(struct qhash_table * rank_tbl) { + for (int i=0; i < rank_tbl->table_size; i++) { + while(!qlist_empty(&rank_tbl->array[i])) { + struct qlist_head *item = qlist_pop(&rank_tbl->array[i]); + struct dfly_qhash_entry * entry = qlist_entry(item, struct dfly_qhash_entry, hash_link); + free(entry); + } + } +} + +static bool check_dfly_qhash_entry(struct dfly_qhash_entry * before, struct dfly_qhash_entry * after) { + // We ignore the remote data fields because they won't be needed: + // - remote_event_size + // - remote_event_data + + if (before->key.sender_id != after->key.sender_id || + before->key.message_id != after->key.message_id || + before->num_chunks != after->num_chunks || + before->remaining_packets != after->remaining_packets) { + return false; } - // Missing deep-clone/comparison/print members. These members are always accessed, so it is possible to discover some bugs if we print their contents - // from->rank_tbl + return true; +} + +static bool check_rank_tbl(qhash_table const * before, struct qhash_table const * after) { + for (int i=0; i < before->table_size; i++) { + if (qlist_count(&before->array[i]) != qlist_count(&before->array[i])) { + return false; + } + struct dfly_qhash_entry * before_entry; + struct dfly_qhash_entry * after_entry; + qlist_for_each_entry(before_entry, &before->array[i], hash_link) { + // Yes, this is slow if there are many collisions, but often there won't be any + bool found_entry = false; + qlist_for_each_entry(after_entry, &after->array[i], hash_link) { + if (check_dfly_qhash_entry(before_entry, after_entry)) { + found_entry = true; + break; + } + } + if (!found_entry) { + return false; + } + } + } + return true; +} + +static void print_rank_tbl(FILE * out, char const * prefix, struct qhash_table * rank_tbl) { + fprintf(out, "%stable_size = %d\n", prefix, rank_tbl->table_size); + fprintf(out, "%s compare = %p\n", prefix, rank_tbl->compare); + fprintf(out, "%s hash = %p\n", prefix, rank_tbl->hash); + fprintf(out, "%s array = %p\n", prefix, rank_tbl->array); + + char addprefix[] = " | | "; + int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix) + 1; + char * subprefix = (char *) malloc(len_subprefix * sizeof(char)); + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix); + + for (int i=0; i < rank_tbl->table_size; i++) { + struct dfly_qhash_entry * entry; + qlist_for_each_entry(entry, &rank_tbl->array[i], hash_link) { + fprintf(out, "%s | {\n", prefix); + fprintf(out, "%s | key.message_id = %lu\n", prefix, entry->key.message_id); + fprintf(out, "%s | key.sender_id = %lu\n", prefix, entry->key.sender_id); + fprintf(out, "%s | num_chunks = %d\n", prefix, entry->num_chunks); + fprintf(out, "%s | remaining_packets = %d\n", prefix, entry->remaining_packets); + fprintf(out, "%s | remote_event_size = %d\n", prefix, entry->remote_event_size); + fprintf(out, "%s | * remote_event_data = %p\n", prefix, entry->remote_event_data); + if (entry->remote_event_size) { + tw_fprint_binary_array(out, subprefix, entry->remote_event_data, entry->remote_event_size); + } + fprintf(out, "%s | },\n", prefix); + } + } + + free(subprefix); +} +static void save_terminal_state(terminal_state *into, terminal_state const *from) { // These should be deep-cloned/compared/printed if we want to run the functionality they are activated at // from->predictor_data // from->sample_stat @@ -7120,6 +7212,9 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from save_tlc_state(into->local_congestion_controller, from->local_congestion_controller); } + into->rank_tbl = qhash_init(dragonfly_rank_hash_compare, dragonfly_hash_func, DFLY_HASH_TABLE_SIZE); + copy_rank_tbl(into->rank_tbl, from->rank_tbl); + // I would use the C++ amgic to copy these containers but they don't work as well :S new (&into->remaining_sz_packets) map(); new (&into->zombies) set(); @@ -7181,6 +7276,9 @@ static void clean_terminal_state(terminal_state *state) { free(state->local_congestion_controller); } + clean_rank_tbl(state->rank_tbl); + qhash_finalize(state->rank_tbl); + state->remaining_sz_packets.~map(); state->zombies.~set(); } @@ -7280,6 +7378,8 @@ static bool check_terminal_state(terminal_state *before, terminal_state *after) is_same &= check_tlc_state(before->local_congestion_controller, after->local_congestion_controller); } + is_same &= check_rank_tbl(before->rank_tbl, after->rank_tbl); + is_same &= before->remaining_sz_packets == after->remaining_sz_packets; is_same &= before->zombies == after->zombies; @@ -7440,7 +7540,16 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state fprintf(out, "%s | * anno = %s\n", prefix, state->anno ? state->anno : "(nil)"); fprintf(out, "%s | * params = %p\n", prefix, state->params); - fprintf(out, "%s | * rank_tbl = %p\n", prefix, state->rank_tbl); + + fprintf(out, "%s | * rank_tbl = {\n", prefix); + char addprefix_4[] = " | "; + len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_4) + 1; + subprefix = (char *) malloc(len_subprefix * sizeof(char)); + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_4); + print_rank_tbl(out, subprefix, state->rank_tbl); + free(subprefix); + fprintf(out, "%s | }\n", prefix); + fprintf(out, "%s | rank_tbl_pop = %lu\n", prefix, state->rank_tbl_pop); fprintf(out, "%s | total_time = %g\n", prefix, state->total_time); fprintf(out, "%s | total_msg_size = %lu\n", prefix, state->total_msg_size); @@ -7610,16 +7719,6 @@ static void save_router_state(router_state *into, router_state const *from) { } } - into->snapshot_data = NULL; - if (num_snapshots) { - into->snapshot_data = (int**) malloc(num_snapshots * sizeof(int*)); - int size_snapshot = from->params->num_vcs * from->params->radix; - for (int i = 0; i < num_snapshots; i++) { - into->snapshot_data[i] = (int*) malloc(size_snapshot * sizeof(int)); - memcpy(into->snapshot_data[i], from->snapshot_data[i], size_snapshot * sizeof(int)); - } - } - if (p->counting_bool > 0) { assert(from->agg_busy_time != NULL); assert(from->agg_link_traffic != NULL); @@ -7681,13 +7780,6 @@ static void clean_router_state(router_state *state) { free(state->pending_msgs); free(state->queued_msgs); - if (num_snapshots) { - for (int i = 0; i < num_snapshots; i++) { - free(state->snapshot_data[i]); - } - free(state->snapshot_data); - } - if (p->counting_bool > 0) { for (int i = 0; i < p->counting_windows; i++) { free(state->agg_busy_time[i]); @@ -7705,6 +7797,13 @@ static void clean_router_state(router_state *state) { // Original function implemented by Claude static bool check_router_state(router_state const *before, router_state const *after) { + // The following are not checked because they don't influence any other + // components of the router state, ie, they are never used to change + // the simulation behavior. + // - snapshot_data + // - fwd_events + // - rev_events + dragonfly_param const * p = before->params; int const radix = p->radix; int const num_qos_levels = p->num_qos_levels; @@ -7760,24 +7859,6 @@ static bool check_router_state(router_state const *before, router_state const *a } } - if ((before->snapshot_data == NULL) != (after->snapshot_data == NULL)) { - return false; - } - - if (num_snapshots) { - assert(before->snapshot_data != NULL); - int size_snapshot = before->params->num_vcs * before->params->radix; - for (int i = 0; i < num_snapshots; i++) { - assert(after->snapshot_data[i] == NULL); - - for (int j = 0; j < size_snapshot; j++) { - if (before->snapshot_data[i][j] != after->snapshot_data[i][j]) { - return false; - } - } - } - } - if ((before->agg_busy_time == NULL) != (after->agg_busy_time == NULL)) { return false; } @@ -7987,20 +8068,7 @@ static void print_router_state(FILE * out, char const * prefix, router_state * s fprintf(out, "%s | * anno = %s\n", prefix, state->anno ? state->anno : "(nil)"); fprintf(out, "%s | * params = %p\n", prefix, state->params); - if (num_snapshots) { - fprintf(out, "%s | ** snapshot_data[%d][%d] = [\n", prefix, num_snapshots, radix); - int size_snapshot = p->num_vcs * p->radix; - for (int i = 0; i < num_snapshots; i++) { - fprintf(out, "%s | snapshot %d: [", prefix, i); - for (int j = 0; j < size_snapshot; j++) { - fprintf(out, "%s%d", j ? ", " : "", state->snapshot_data[i][j]); - } - fprintf(out, "]\n"); - } - fprintf(out, "%s | ]\n", prefix); - } else { - fprintf(out, "%s | ** snapshot_data = %p\n", prefix, state->snapshot_data); - } + fprintf(out, "%s | ** snapshot_data = %p\n", prefix, state->snapshot_data); fprintf(out, "%s | output_buf = '%.4096s'\n", prefix, state->output_buf); fprintf(out, "%s | * rsamples = %p\n", prefix, state->rsamples); From 0e7669355c6c5d0da881358d6eecbdb6bf76f626 Mon Sep 17 00:00:00 2001 From: helq Date: Thu, 29 May 2025 13:05:19 -0400 Subject: [PATCH 051/110] Moving general PDES code into ROSS --- src/surrogate/switch.c | 137 ++--------------------------------------- 1 file changed, 5 insertions(+), 132 deletions(-) diff --git a/src/surrogate/switch.c b/src/surrogate/switch.c index dede0cf5..f15aafe8 100644 --- a/src/surrogate/switch.c +++ b/src/surrogate/switch.c @@ -1,6 +1,8 @@ #include #include #include +#include +#include double surrogate_switching_time = 0.0; double time_in_surrogate = 0.0; @@ -9,36 +11,6 @@ static double surrogate_time_last = 0.0; // === Director functionality // - -//static void offset_future_events_in_causality_list(double switch_offset, tw_event_sig gvt) { -// (void) switch_offset; -// (void) gvt; -// int events_processed = 0; -// int events_modified = 0; -// for (unsigned int i = 0; i < g_tw_nkp; i++) { -// tw_kp * const this_kp = g_tw_kp[i]; -// -// //assert(this_kp->pevent_q.size == 0); -// // All events in pevent_q are sent into the future -// assert((this_kp->pevent_q.tail == NULL) == (this_kp->pevent_q.size == 0)); -// tw_event * cur_event = this_kp->pevent_q.tail; -// while (cur_event) { -// if (!is_workload_event(cur_event) && tw_event_sig_compare(cur_event->sig, gvt) > 0) { -// cur_event->recv_ts += switch_offset; -// cur_event->sig.recv_ts = cur_event->recv_ts; -// events_modified++; -// } -// -// cur_event = cur_event->prev; -// events_processed++; -// } -// } -// if (DEBUG_DIRECTOR > 1 && g_tw_mynode == 0) { -// printf("PE %lu: Total events from causality modified %d (from total processed %d)\n", g_tw_mynode, events_modified, events_processed); -// } -//} - - static struct lp_types_switch const * get_type_switch(char const * const name) { for (size_t i = 0; i < surr_config.n_lp_types; i++) { //printf("THIS %s and %s\n", surr_config.lp_types[i].lpname, name); @@ -50,72 +22,6 @@ static struct lp_types_switch const * get_type_switch(char const * const name) { } -// MPI barrier to determine if anyone has a true value `val`. Returns true if anyone says "TRUE" -static inline bool does_any_pe(bool val) { - bool global_val; - if(MPI_Allreduce(&val, &global_val, 1, MPI_C_BOOL, MPI_LOR, MPI_COMM_ROSS) != MPI_SUCCESS) { - tw_error(TW_LOC, "MPI_Allreduce for custom rollback and cleanup failed"); - } - return global_val; -} - - -//static tw_event_sig find_sig_smallest_larger_than(double switch_, tw_kp * kp, tw_event_sig gvt) { -// //printf("Just testing, I'm here! size=%d\n", kp->pevent_q.size); -// tw_event * cur_event = kp->pevent_q.tail; -// while (cur_event) { -// //printf("Current timestamp to rollback (%e) and gvt (%e)\n", cur_event->sig.recv_ts, gvt.recv_ts); -// if (tw_event_sig_compare(cur_event->sig, gvt) < 0 && switch_ <= cur_event->sig.recv_ts) { -// gvt = cur_event->sig; -// } -// cur_event = cur_event->prev; -// } -// return gvt; -//} - - -static void rollback_and_cancel_events_pe(tw_pe * pe) { -#ifdef USE_RAND_TIEBREAKER - tw_event_sig const gvt_sig = pe->GVT_sig; - tw_stime const gvt = gvt_sig.recv_ts; - // Backtracking the simulation to GVT - for (unsigned int i = 0; i < g_tw_nkp; i++) { - tw_kp_rollback_to_sig(g_tw_kp[i], &gvt_sig); - } - assert(tw_event_sig_compare_ptr(&pe->GVT_sig, &gvt_sig) == 0); - assert(pe->GVT_sig.recv_ts == gvt); // redundant but needed because compiler cries that gvt is never used -#else - tw_stime const gvt = pe->GVT; - // Backtracking the simulation to GVT - for (unsigned int i = 0; i < g_tw_nkp; i++) { - tw_kp_rollback_to(g_tw_kp[i], gvt); - } - assert(pe->GVT == gvt); -#endif - - // Making sure that everything gets cleaned up properly (AVL tree should be empty by the end) - do { - if (tw_nnodes() > 1) { - double const start = tw_clock_read(); - tw_net_read(pe); - pe->stats.s_net_read += tw_clock_read() - start; - } - - pe->gvt_status = 1; - tw_sched_event_q(pe); - tw_sched_cancel_q(pe); - tw_gvt_step2(pe); - - if (DEBUG_DIRECTOR > 1) { - printf("PE %lu: Time stamp at the end of GVT time: %f - AVL-tree sized: %d\n", g_tw_mynode, gvt, pe->avl_tree_size); - } - } while (does_any_pe(pe->cancel_q != NULL) || does_any_pe(pe->event_q.size != 0)); - - if (DEBUG_DIRECTOR > 1) { - printf("PE %lu: All events rolledbacked and cancelled\n", g_tw_mynode); - } -} - static void shift_events_to_future_pe(tw_pe * pe) { #ifdef USE_RAND_TIEBREAKER tw_event_sig gvt_sig = pe->GVT_sig; @@ -347,7 +253,7 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe) { // This will force a global update on all the new remote events (instead of waiting until the next GVT cycle to update events to process) if (g_tw_synchronization_protocol == OPTIMISTIC) { - rollback_and_cancel_events_pe(pe); + tw_scheduler_rollback_and_cancel_events_pe(pe); } assert(lps_events[0] != NULL); @@ -411,28 +317,11 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe) { } } -bool hit_trigger(tw_stime gvt) { - if ( switch_at.current_i < switch_at.total - && g_tw_trigger_gvt_hook.active == GVT_HOOK_triggered) { - double const switch_time = switch_at.time_stampts[switch_at.current_i]; -#ifdef USE_RAND_TIEBREAKER - assert(g_tw_trigger_gvt_hook.sig_at.recv_ts == switch_at.time_stampts[switch_at.current_i]); -#else - assert(g_tw_trigger_gvt_hook.at == switch_at.time_stampts[switch_at.current_i]); -#endif - assert(gvt >= switch_time); // current gvt shouldn't be that far ahead from the point we wanted to trigger it - - return true; - } else { - return false; - } -} - void switch_model(tw_pe * pe) { // Rollback if in optimistic mode if (g_tw_synchronization_protocol == OPTIMISTIC) { - rollback_and_cancel_events_pe(pe); + tw_scheduler_rollback_and_cancel_events_pe(pe); } surr_config.director.switch_surrogate(); if (DEBUG_DIRECTOR && g_tw_mynode == 0) { @@ -468,20 +357,8 @@ void director_call(tw_pe * pe) { fflush(stdout); } if (DEBUG_DIRECTOR == 3) { - printf("GVT %d at %f in %s arbitrary-fun-status=", i++, gvt, + printf("GVT %d at %f in %s\n", i++, gvt, surr_config.director.is_surrogate_on() ? "surrogate-mode" : "high-definition"); - - switch (g_tw_trigger_gvt_hook.active) { - case GVT_HOOK_enabled: - printf("enabled\n"); - break; - case GVT_HOOK_disabled: - printf("disabled\n"); - break; - case GVT_HOOK_triggered: - printf("triggered\n"); - break; - } } } @@ -501,10 +378,6 @@ void director_call(tw_pe * pe) { return; } - // Detecting if we are going to switch - if (! hit_trigger(gvt)) { - return; - } // ---- Past this means that we are in fact switching ---- bool const pre_switch_status = surr_config.director.is_surrogate_on(); From 01e6bf61fd43cb825009b6b1ac399d2a606ad86b Mon Sep 17 00:00:00 2001 From: helq Date: Fri, 30 May 2025 11:07:55 -0400 Subject: [PATCH 052/110] Renaming surrogate as network-surrogate --- codes/surrogate/init.h | 12 ++--- .../{switch.h => network-surrogate.h} | 5 +- .../tutorial-ping-pong-surrogate.conf.in | 2 +- src/CMakeLists.txt | 2 +- src/networks/model-net/dragonfly-dally.C | 54 +++++++++---------- src/surrogate/init.c | 31 ++++++----- .../{switch.c => network-surrogate.c} | 10 ++-- 7 files changed, 57 insertions(+), 59 deletions(-) rename codes/surrogate/{switch.h => network-surrogate.h} (95%) rename src/surrogate/{switch.c => network-surrogate.c} (98%) diff --git a/codes/surrogate/init.h b/codes/surrogate/init.h index 11ad5027..3a9a2169 100644 --- a/codes/surrogate/init.h +++ b/codes/surrogate/init.h @@ -8,7 +8,7 @@ * Copyright (c) 2023 Rensselaer Polytechnic Institute */ #include "codes/surrogate/packet-latency-predictor/common.h" -#include "codes/surrogate/switch.h" +#include "codes/surrogate/network-surrogate.h" // A simple macro to clarify code a bit #define PRINTF_ONCE(...) if (g_tw_mynode == 0) { fprintf(stderr, __VA_ARGS__); } @@ -32,7 +32,7 @@ extern "C" { void print_surrogate_stats(void); -struct surrogate_config { +struct network_surrogate_config { struct director_data director; //!< functionality needed by the director to switch back and forth from model-level surrogate-mode to (vanilla) high-definition simulation int total_terminals; //!< total number of terminals size_t n_lp_types; @@ -40,14 +40,14 @@ struct surrogate_config { }; /** Loads surrogate configuration, including packet latency predictor. */ -void surrogate_configure( +void network_surrogate_configure( char const * const annotation, - struct surrogate_config * const config, + struct network_surrogate_config * const config, struct packet_latency_predictor ** pl_pred //!< pointer to save packet latency predictor generated by. Caller must free it ); -extern struct surrogate_config surr_config; -extern bool is_surrogate_configured; +extern struct network_surrogate_config surr_config; +extern bool is_network_surrogate_configured; #ifdef __cplusplus } diff --git a/codes/surrogate/switch.h b/codes/surrogate/network-surrogate.h similarity index 95% rename from codes/surrogate/switch.h rename to codes/surrogate/network-surrogate.h index d23abb00..f941ea9f 100644 --- a/codes/surrogate/switch.h +++ b/codes/surrogate/network-surrogate.h @@ -60,9 +60,8 @@ struct switch_at_struct { extern struct switch_at_struct switch_at; - -// Switch -void director_call(tw_pe * pe); +// Main function responsible for switching between high-fidelity and (network) surrogate +void network_director(tw_pe * pe); #ifdef __cplusplus } diff --git a/doc/example/tutorial-ping-pong-surrogate.conf.in b/doc/example/tutorial-ping-pong-surrogate.conf.in index 6d2b3e58..04d2c94f 100644 --- a/doc/example/tutorial-ping-pong-surrogate.conf.in +++ b/doc/example/tutorial-ping-pong-surrogate.conf.in @@ -58,7 +58,7 @@ PARAMS # router buffer occupancy snapshots router_buffer_snapshots=( ${BUFFER_SNAPSHOTS} ); } -SURROGATE { +NETWORK_SURROGATE { # determines the director switching from surrogate to high-def simulation strategy director_mode="at-fixed-virtual-times"; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d82c2584..55e97215 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -56,7 +56,7 @@ list(APPEND SRCS util/congestion-controller.C surrogate/init.c - surrogate/switch.c + surrogate/network-surrogate.c surrogate/packet-latency-predictor/common.c surrogate/packet-latency-predictor/average.c diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index d7792fa2..3797c8ae 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -189,8 +189,8 @@ static void setup_packet_latency_path(char const * const dir_to_save); // ==== START OF Parameters to tune surrogate mode ==== // -static bool surrogate_configured = false; -static bool is_surrogate_on = false; +static bool dally_surrogate_configured = false; +static bool is_dally_surrogate_on = false; static struct packet_latency_predictor * terminal_predictor = NULL; static void switch_surrogate(void); static bool is_surrogate_on_fun(void); @@ -2435,10 +2435,10 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params) // START Surrogate configuration char director_mode[MAX_NAME_LENGTH]; director_mode[0] = '\0'; - int director_mode_len = configuration_get_value(&config, "SURROGATE", "director_mode", anno, director_mode, MAX_NAME_LENGTH); + int director_mode_len = configuration_get_value(&config, "NETWORK_SURROGATE", "director_mode", anno, director_mode, MAX_NAME_LENGTH); // if surrogate mode has been set up if (director_mode_len > 0) { - struct surrogate_config surr_conf = { + struct network_surrogate_config surr_conf = { .director = {.switch_surrogate = switch_surrogate, .is_surrogate_on = is_surrogate_on_fun}, .total_terminals = p->total_terminals, .n_lp_types = 2, @@ -2460,9 +2460,9 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params) 0 } }; - surrogate_configure(anno, &surr_conf, &terminal_predictor); + network_surrogate_configure(anno, &surr_conf, &terminal_predictor); if (terminal_predictor) { - surrogate_configured = true; + dally_surrogate_configured = true; } else { tw_error(TW_LOC, "Latency predictor is NULL. Something during surrogate configuration failed."); } @@ -2987,11 +2987,11 @@ static inline void packet_latency_save_to_file( // ==== START OF Surrogate functions definition ==== static void switch_surrogate(void) { - is_surrogate_on = ! is_surrogate_on; + is_dally_surrogate_on = ! is_dally_surrogate_on; } static bool is_surrogate_on_fun(void) { - return is_surrogate_on; + return is_dally_surrogate_on; } static void feed_packet_to_predictor(terminal_state * s, tw_lp * lp, uint64_t packet_ID, double end_time) { @@ -3002,8 +3002,8 @@ static void feed_packet_to_predictor(terminal_state * s, tw_lp * lp, uint64_t pa .next_packet_delay = sent.next_packet_delay, }; - packet_latency_save_to_file(s->terminal_id, &sent.start, &end, is_surrogate_on, false); - if (surrogate_configured && !is_surrogate_on) { + packet_latency_save_to_file(s->terminal_id, &sent.start, &end, is_dally_surrogate_on, false); + if (dally_surrogate_configured && !is_dally_surrogate_on) { assert(terminal_predictor != NULL); terminal_predictor->feed(s->predictor_data, lp, s->terminal_id, &sent.start, &end); } @@ -3085,7 +3085,7 @@ static void dragonfly_dally_terminal_highdef_to_surrogate( latency = 0; } - packet_latency_save_to_file(s->terminal_id, &sent.start, &predicted_end, is_surrogate_on, true); + packet_latency_save_to_file(s->terminal_id, &sent.start, &predicted_end, is_dally_surrogate_on, true); assert(sent.message_data); terminal_dally_message * const msg_data = (terminal_dally_message*) sent.message_data; @@ -3321,7 +3321,7 @@ static void router_handle_snapshot_event(router_state *s, tw_bf *bf, terminal_da } static void terminal_commit_packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) { - if (!packet_latency_f && !surrogate_configured) { + if (!packet_latency_f && !dally_surrogate_configured) { return; } @@ -3379,7 +3379,7 @@ static void terminal_dally_commit(terminal_state * s, switch (msg->type) { case T_GENERATE: if(bf->c10) { // if the packet was sent as a prediction, store the prediction in memory - assert(surrogate_configured); + assert(dally_surrogate_configured); auto start = (struct packet_start) { .packet_ID = msg->packet_ID, .dest_terminal_lpid = msg->dest_terminal_lpid, @@ -3396,7 +3396,7 @@ static void terminal_dally_commit(terminal_state * s, .travel_end_time = msg->travel_end_time, .next_packet_delay = msg->saved_next_packet_delay, }; - packet_latency_save_to_file(s->terminal_id, &start, &end, is_surrogate_on, true); + packet_latency_save_to_file(s->terminal_id, &start, &end, is_dally_surrogate_on, true); // If we had latency info for the last packet transmitted, then we have to store it into memory and clean the variable if (s->arrival_of_last_packet.packet_ID != -1) { @@ -3410,7 +3410,7 @@ static void terminal_dally_commit(terminal_state * s, .next_packet_delay = -1, }; - packet_latency_save_to_file(s->terminal_id, &sent.start, &end, is_surrogate_on, false); + packet_latency_save_to_file(s->terminal_id, &sent.start, &end, is_dally_surrogate_on, false); s->sent_packets.erase(s->arrival_of_last_packet.packet_ID); s->arrival_of_last_packet.packet_ID = -1; @@ -5497,7 +5497,7 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message tmp->remaining_packets--; //printf("Good day sir, not a zombie! LPID=%d packet_ID = %d dfdally_src_terminal_id = %d\n", lp->gid, msg->packet_ID, msg->dfdally_src_terminal_id); - if (packet_latency_f || surrogate_configured) { + if (packet_latency_f || dally_surrogate_configured) { notify_src_lp_on_total_latency(lp, msg); //} else { // // This vacuous msg is necessary just to keep simulations with and without the latency notification the same. Notifying the latency does not impact @@ -5589,7 +5589,7 @@ static void terminal_buf_update(terminal_state * s, static void dragonfly_dally_terminal_final( terminal_state * s, tw_lp * lp ) { - if (freeze_network_on_switch && is_surrogate_on) { + if (freeze_network_on_switch && is_dally_surrogate_on) { dragonfly_dally_terminal_surrogate_to_highdef(s, lp, NULL); } // printf("terminal id %d\n",s->terminal_id); @@ -6851,7 +6851,7 @@ terminal_dally_event( terminal_state * s, assert(msg->magic == terminal_magic_num); //printf("LPID: %llu Event type %d processed at %f\n", lp->gid, msg->type, tw_now(lp)); - if (is_surrogate_on && freeze_network_on_switch) { + if (is_dally_surrogate_on && freeze_network_on_switch) { // This event will be reversed. It comes from the past, it has been forwarded to the future // by the surrogate freezing the network procedure and should not be taken into account if (! (msg->type == T_GENERATE || msg->type == T_ARRIVE_PREDICTED || msg->type == T_NOTIFY)) { @@ -6865,7 +6865,7 @@ terminal_dally_event( terminal_state * s, switch(msg->type) { case T_GENERATE: - if (is_surrogate_on) { + if (is_dally_surrogate_on) { bf->c10 = 1; packet_generate_predicted(s,bf,msg,lp); } else { @@ -7165,7 +7165,7 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from int const num_qos_levels = p->num_qos_levels; int const num_rails = p->num_rails; - if (!is_surrogate_on) { + if (!is_dally_surrogate_on) { into->vc_occupancy = (int **) malloc(num_rails * sizeof(int*)); into->terminal_length = (int**) malloc(num_rails * sizeof(int*)); into->last_buf_full = (tw_stime*) malloc(num_rails * sizeof(tw_stime)); @@ -7243,7 +7243,7 @@ static void clean_terminal_state(terminal_state *state) { int const num_rails = p->num_rails; int const num_qos_levels = p->num_qos_levels; - if (!is_surrogate_on) { + if (!is_dally_surrogate_on) { for (int i = 0; i < num_rails; i++) { free(state->vc_occupancy[i]); free(state->terminal_length[i]); @@ -7343,7 +7343,7 @@ static bool check_terminal_state(terminal_state *before, terminal_state *after) is_same &= (before->anno == after->anno); } - if (!is_surrogate_on) { + if (!is_dally_surrogate_on) { dragonfly_param const * p = before->params; int const num_qos_levels = p->num_qos_levels; int const num_rails = p->num_rails; @@ -7420,7 +7420,7 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state fprintf(out, "%s | workloads_finished_flag = %d\n", prefix, state->workloads_finished_flag); - if (is_surrogate_on) { + if (is_dally_surrogate_on) { fprintf(out, "%s | ** vc_occupancy = %p\n", prefix, state->vc_occupancy); fprintf(out, "%s | *terminal_available_time = %p\n", prefix, state->terminal_available_time); fprintf(out, "%s | *** terminal_msgs = %p\n", prefix, state->terminal_msgs); @@ -7459,7 +7459,7 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state fprintf(out, "%s | *** terminal_msgs_tail = %p\n", prefix, state->terminal_msgs_tail); - if (is_surrogate_on) { + if (is_dally_surrogate_on) { fprintf(out, "%s | * in_send_loop = %p\n", prefix, state->in_send_loop); } else { fprintf(out, "%s | * in_send_loop[%d] = [", prefix, state->params->num_rails); @@ -7481,7 +7481,7 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state fprintf(out, "%s | ]\n", prefix); free(subprefix); - if (is_surrogate_on) { + if (is_dally_surrogate_on) { fprintf(out, "%s | ** qos_status = %p\n", prefix, state->qos_status); fprintf(out, "%s | ** qos_data = %p\n", prefix, state->qos_data); fprintf(out, "%s | * last_qos_lvl = %p\n", prefix, state->last_qos_lvl); @@ -7517,7 +7517,7 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state fprintf(out, "%s | * st = %p\n", prefix, state->st); fprintf(out, "%s | * cc_st = %p\n", prefix, state->cc_st); - if (is_surrogate_on) { + if (is_dally_surrogate_on) { fprintf(out, "%s | * issueIdle = %p\n", prefix, state->issueIdle); fprintf(out, "%s | ** terminal_length = %p\n", prefix, state->terminal_length); } else { @@ -7558,7 +7558,7 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state fprintf(out, "%s | finished_chunks = %ld\n", prefix, state->finished_chunks); fprintf(out, "%s | finished_packets = %ld\n", prefix, state->finished_packets); - if (is_surrogate_on) { + if (is_dally_surrogate_on) { fprintf(out, "%s | ** terminal_length = %p\n", prefix, state->terminal_length); fprintf(out, "%s | * last_buf_full = %p\n", prefix, state->last_buf_full); fprintf(out, "%s | * busy_time = %p\n", prefix, state->busy_time); diff --git a/src/surrogate/init.c b/src/surrogate/init.c index 4ed587c6..7c969924 100644 --- a/src/surrogate/init.c +++ b/src/surrogate/init.c @@ -1,5 +1,4 @@ #include -#include #include #ifdef USE_TORCH @@ -7,15 +6,15 @@ #endif bool freeze_network_on_switch = true; -struct surrogate_config surr_config = {0}; -bool is_surrogate_configured = false; +struct network_surrogate_config surr_config = {0}; +bool is_network_surrogate_configured = false; struct switch_at_struct switch_at; static struct packet_latency_predictor current_predictor = {0}; // === Stats! void print_surrogate_stats(void) { - if(is_surrogate_configured && g_tw_mynode == 0) { + if(is_network_surrogate_configured && g_tw_mynode == 0) { printf("\nTotal time spent on surrogate-mode: %.4f\n", (double) time_in_surrogate / g_tw_clock_rate); printf("Total time spent on switching from and to surrogate-mode: %.4f\n", (double) surrogate_switching_time / g_tw_clock_rate); } @@ -24,14 +23,14 @@ void print_surrogate_stats(void) { // === All things Surrogate Configuration -void surrogate_configure( +void network_surrogate_configure( char const * const anno, - struct surrogate_config * const sc, + struct network_surrogate_config * const sc, struct packet_latency_predictor ** pl_pred ) { assert(sc); assert(0 < sc->n_lp_types && sc->n_lp_types <= MAX_LP_TYPES); - is_surrogate_configured = true; + is_network_surrogate_configured = true; // This is the only place where the director data should be loaded and set up surr_config = *sc; @@ -39,14 +38,14 @@ void surrogate_configure( // Determining which director mode to set up char director_mode[MAX_NAME_LENGTH]; director_mode[0] = '\0'; - configuration_get_value(&config, "SURROGATE", "director_mode", anno, director_mode, MAX_NAME_LENGTH); + configuration_get_value(&config, "NETWORK_SURROGATE", "director_mode", anno, director_mode, MAX_NAME_LENGTH); if (strcmp(director_mode, "at-fixed-virtual-times") == 0) { - PRINTF_ONCE("\nSurrogate activated switching at fixed virtual times: "); + PRINTF_ONCE("\nNetwork surrogate activated switching at fixed virtual times: "); // Loading timestamps char **timestamps; size_t len; - configuration_get_multivalue(&config, "SURROGATE", "fixed_switch_timestamps", anno, ×tamps, &len); + configuration_get_multivalue(&config, "NETWORK_SURROGATE", "fixed_switch_timestamps", anno, ×tamps, &len); switch_at.current_i = 0; switch_at.total = len; @@ -64,7 +63,7 @@ void surrogate_configure( PRINTF_ONCE("\n"); // Injecting into ROSS the function to be called at GVT and the instant in time to trigger GVT - g_tw_gvt_hook = director_call; + g_tw_gvt_hook = network_director; tw_trigger_gvt_hook_at(switch_at.time_stampts[0]); @@ -80,7 +79,7 @@ void surrogate_configure( // Determining which predictor to set up and return char latency_pred_name[MAX_NAME_LENGTH]; latency_pred_name[0] = '\0'; - configuration_get_value(&config, "SURROGATE", "packet_latency_predictor", anno, latency_pred_name, MAX_NAME_LENGTH); + configuration_get_value(&config, "NETWORK_SURROGATE", "packet_latency_predictor", anno, latency_pred_name, MAX_NAME_LENGTH); if (*latency_pred_name) { if (strcmp(latency_pred_name, "average") == 0) { current_predictor = average_latency_predictor(surr_config.total_terminals); @@ -90,14 +89,14 @@ void surrogate_configure( } else if (strcmp(latency_pred_name, "torch-jit") == 0) { char torch_jit_mode[MAX_NAME_LENGTH]; torch_jit_mode[0] = '\0'; - configuration_get_value(&config, "SURROGATE", "torch_jit_mode", anno, torch_jit_mode, MAX_NAME_LENGTH); + configuration_get_value(&config, "NETWORK_SURROGATE", "torch_jit_mode", anno, torch_jit_mode, MAX_NAME_LENGTH); if (strcmp(torch_jit_mode, "single-static-model-for-all-terminals") != 0) { tw_error(TW_LOC, "Unknown torch-jit mode `%s`", torch_jit_mode); } char torch_jit_model_path[MAX_NAME_LENGTH]; torch_jit_model_path[0] = '\0'; - configuration_get_value(&config, "SURROGATE", "torch_jit_model_path", anno, torch_jit_model_path, MAX_NAME_LENGTH); + configuration_get_value(&config, "NETWORK_SURROGATE", "torch_jit_model_path", anno, torch_jit_model_path, MAX_NAME_LENGTH); surrogate_torch_init(torch_jit_model_path); *pl_pred = &torch_latency_predictor; @@ -118,7 +117,7 @@ void surrogate_configure( } // Finding out whether to ignore some packet latencies - int rc = configuration_get_value_double(&config, "SURROGATE", "ignore_until", anno, &ignore_until); + int rc = configuration_get_value_double(&config, "NETWORK_SURROGATE", "ignore_until", anno, &ignore_until); if (rc) { ignore_until = -1; // any negative number disables ignore_until, all packet latencies will be considered PRINTF_ONCE("`ignore_until` disabled (all packet latencies will be used in training the predictor)\n"); @@ -129,7 +128,7 @@ void surrogate_configure( // Determining which predictor to set up and return char network_treatment_name[MAX_NAME_LENGTH]; network_treatment_name[0] = '\0'; - configuration_get_value(&config, "SURROGATE", "network_treatment_on_switch", anno, network_treatment_name, MAX_NAME_LENGTH); + configuration_get_value(&config, "NETWORK_SURROGATE", "network_treatment_on_switch", anno, network_treatment_name, MAX_NAME_LENGTH); if (*network_treatment_name) { if (strcmp(network_treatment_name, "freeze") == 0) { freeze_network_on_switch = true; diff --git a/src/surrogate/switch.c b/src/surrogate/network-surrogate.c similarity index 98% rename from src/surrogate/switch.c rename to src/surrogate/network-surrogate.c index f15aafe8..38875061 100644 --- a/src/surrogate/switch.c +++ b/src/surrogate/network-surrogate.c @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -31,7 +31,7 @@ static void shift_events_to_future_pe(tw_pe * pe) { #endif tw_event * next_event = tw_pq_dequeue(pe->pq); - // If there aren't any events left to process, the simulation has already finished and we have nothing to do + // If there aren't any events left to process, then this PE has nothing to do if (next_event == NULL) { return; } @@ -207,10 +207,10 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe) { tw_error(TW_LOC, "Sorry, sending packets to the future hasn't been implement in this mode"); } - tw_event *** lps_events = order_events_per_lps(pe); printf("PE %lu - AVL size %d (before shifting events)\n", g_tw_mynode, pe->avl_tree_size); shift_events_to_future_pe(pe); printf("PE %lu - AVL size %d (after shifting events to future)\n", g_tw_mynode, pe->avl_tree_size); + tw_event *** lps_events = order_events_per_lps(pe); // Going through all LPs in PE and running their specific functions for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) { @@ -341,8 +341,8 @@ void switch_model(tw_pe * pe) { } -void director_call(tw_pe * pe) { - assert(is_surrogate_configured); +void network_director(tw_pe * pe) { + assert(is_network_surrogate_configured); #ifdef USE_RAND_TIEBREAKER tw_stime gvt = pe->GVT_sig.recv_ts; From ab3b9511da7fa4f98a438812c908149ddd37a286 Mon Sep 17 00:00:00 2001 From: helq Date: Fri, 30 May 2025 15:12:22 -0400 Subject: [PATCH 053/110] Renaming network average predictor to allow for more predictors --- .../packet-latency-predictor/common.h | 20 +++++++++---------- .../packet-latency-predictor/average.c | 8 ++++---- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/codes/surrogate/packet-latency-predictor/common.h b/codes/surrogate/packet-latency-predictor/common.h index aae0f0d7..61b0283c 100644 --- a/codes/surrogate/packet-latency-predictor/common.h +++ b/codes/surrogate/packet-latency-predictor/common.h @@ -36,18 +36,18 @@ struct packet_end { }; // Definition of functions needed to define a predictor -typedef void (*init_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id); // Initializes the predictor (eg, LSTM) -typedef void (*feed_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start const *, struct packet_end const *); // Feeds known latency for packet sent at `now` -typedef struct packet_end (*predict_pred_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start const *); // Get prediction for packet sent to `destination` at `now` -typedef void (*predict_pred_rc_f) (void * predictor_data, tw_lp * lp); // Reverse prediction (reverse state of predictor one prediction) +typedef void (*init_pred_lat_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id); // Initializes the predictor (eg, LSTM) +typedef void (*feed_pred_lat_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start const *, struct packet_end const *); // Feeds known latency for packet sent at `now` +typedef struct packet_end (*predict_pred_lat_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start const *); // Get prediction for packet sent to `destination` at `now` +typedef void (*predict_pred_lat_rc_f) (void * predictor_data, tw_lp * lp); // Reverse prediction (reverse state of predictor one prediction) -// Each network model defines its own way to setup the packet latency predictor +// API for packet latency predictors struct packet_latency_predictor { - init_pred_f init; - feed_pred_f feed; - predict_pred_f predict; - predict_pred_rc_f predict_rc; - size_t predictor_data_sz; // `predictor_data` size + init_pred_lat_f init; + feed_pred_lat_f feed; + predict_pred_lat_f predict; + predict_pred_lat_rc_f predict_rc; + size_t predictor_data_sz; // `predictor_data` size }; #ifdef __cplusplus diff --git a/src/surrogate/packet-latency-predictor/average.c b/src/surrogate/packet-latency-predictor/average.c index 88f084d3..82db7e1c 100644 --- a/src/surrogate/packet-latency-predictor/average.c +++ b/src/surrogate/packet-latency-predictor/average.c @@ -104,10 +104,10 @@ static void predict_latency_rc(struct latency_surrogate * data, tw_lp * lp) { struct packet_latency_predictor average_latency_predictor(int num_terminals) { return (struct packet_latency_predictor) { - .init = (init_pred_f) init_pred, - .feed = (feed_pred_f) feed_pred, - .predict = (predict_pred_f) predict_latency, - .predict_rc = (predict_pred_rc_f) predict_latency_rc, + .init = (init_pred_lat_f) init_pred, + .feed = (feed_pred_lat_f) feed_pred, + .predict = (predict_pred_lat_f) predict_latency, + .predict_rc = (predict_pred_lat_rc_f) predict_latency_rc, .predictor_data_sz = sizeof(struct latency_surrogate) + num_terminals * sizeof(struct aggregated_latency_one_terminal) }; } From 77964abc300c17e64602824c7ebf46bf8eb69986 Mon Sep 17 00:00:00 2001 From: helq Date: Fri, 30 May 2025 15:22:34 -0400 Subject: [PATCH 054/110] Network predictors do not need to allocate memory when initialized It is possible to allocate all memory needed for all predictors within a PE in the predictors' .c file, even before initializing any predictor (calling `.init`). Thus a predictor might have size zero --- src/networks/model-net/dragonfly-dally.C | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 3797c8ae..0fec30f6 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -3704,11 +3704,12 @@ static void terminal_dally_init( terminal_state * s, tw_lp * lp ) s->frozen_state = NULL; // alloc'ing memory for predictor, calling initiliazer for predictor - if (terminal_predictor != NULL && terminal_predictor->predictor_data_sz > 0) { - s->predictor_data = calloc(1, terminal_predictor->predictor_data_sz); + s->predictor_data = NULL; + if (terminal_predictor != NULL) { + if (terminal_predictor->predictor_data_sz > 0) { + s->predictor_data = calloc(1, terminal_predictor->predictor_data_sz); + } terminal_predictor->init(s->predictor_data, lp, s->terminal_id); - } else { - s->predictor_data = NULL; } s->last_in_queue_time = 0; return; From 8c65ec238ab9c2ca9e390b703efae5ae18b279c4 Mon Sep 17 00:00:00 2001 From: helq Date: Fri, 30 May 2025 18:01:59 -0400 Subject: [PATCH 055/110] Each computer node tracks its own workload id --- src/network-workloads/model-net-mpi-replay.c | 23 ++++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index c1040526..f456aab0 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -67,7 +67,6 @@ char workload_type[128]; char workload_name[128]; char workload_file[8192]; char offset_file[8192]; -static int wrkld_id; static int num_net_traces = 0; static int prioritize_collectives = 0; static int num_dumpi_traces = 0; @@ -293,7 +292,7 @@ struct nw_state #endif /* if LP_DEBUG */ long num_events_per_lp; tw_lpid nw_id; - short wrkld_end; + short wrkld_id; int app_id; int local_rank; int qos_level; @@ -1251,18 +1250,18 @@ static void skip_to_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * // consuming all events until indicated iteration is reached bool reached_end = false; while (!reached_end) { - codes_workload_get_next(wrkld_id, s->app_id, s->local_rank, &mpi_op); + codes_workload_get_next(s->wrkld_id, s->app_id, s->local_rank, &mpi_op); switch (mpi_op.op_type) { case CODES_WK_MARK: if (mpi_op.u.send.tag == resume_at_iter) { reached_end = true; - codes_workload_get_next_rc(wrkld_id, s->app_id, s->local_rank, &mpi_op); + codes_workload_get_next_rc(s->wrkld_id, s->app_id, s->local_rank, &mpi_op); } break; // If we reach the end of simulation, rollback once to allow the operation to be processed normally case CODES_WK_END: - codes_workload_get_next_rc(wrkld_id, s->app_id, s->local_rank, &mpi_op); + codes_workload_get_next_rc(s->wrkld_id, s->app_id, s->local_rank, &mpi_op); reached_end = true; break; default: @@ -2523,6 +2522,7 @@ void nw_test_init(nw_state* s, tw_lp* lp) s->qos_level = 0; //TODO: We need a more elegant solution for determining if qos is enabled or not. // This had been -1 but if qos is not configured (single job no workload conf file) // then this will error out + s->wrkld_id = -1; char type_name[512]; @@ -2721,8 +2721,9 @@ void nw_test_init(nw_state* s, tw_lp* lp) } else { - wrkld_id = codes_workload_load(type_name, params, s->app_id, s->local_rank); + s->wrkld_id = codes_workload_load(type_name, params, s->app_id, s->local_rank); codes_issue_next_event(lp); + printf("my wrkld_id = %d\n", s->wrkld_id); } if(enable_sampling && sampling_interval > 0) { @@ -2911,7 +2912,7 @@ void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp) static void get_next_mpi_operation_rc(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp) { - codes_workload_get_next_rc(wrkld_id, s->app_id, s->local_rank, m->mpi_op); + codes_workload_get_next_rc(s->wrkld_id, s->app_id, s->local_rank, m->mpi_op); if(m->op_type == CODES_WK_END) { @@ -3022,10 +3023,8 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l { //struct codes_workload_op * mpi_op = malloc(sizeof(struct codes_workload_op)); // printf("\n App id %d local rank %d ", s->app_id, s->local_rank); - // struct codes_workload_op mpi_op; - // codes_workload_get_next(wrkld_id, s->app_id, s->local_rank, &mpi_op); struct codes_workload_op * mpi_op = (struct codes_workload_op*)malloc(sizeof(struct codes_workload_op)); - codes_workload_get_next(wrkld_id, s->app_id, s->local_rank, mpi_op); + codes_workload_get_next(s->wrkld_id, s->app_id, s->local_rank, mpi_op); m->mpi_op = mpi_op; m->op_type = mpi_op->op_type; @@ -3562,7 +3561,7 @@ static bool check_nw_lp_state(nw_state * before, nw_state const * after) { // Basic fields is_same &= (before->num_events_per_lp == after->num_events_per_lp); is_same &= (before->nw_id == after->nw_id); - is_same &= (before->wrkld_end == after->wrkld_end); + is_same &= (before->wrkld_id == after->wrkld_id); is_same &= (before->app_id == after->app_id); is_same &= (before->local_rank == after->local_rank); is_same &= (before->qos_level == after->qos_level); @@ -3657,7 +3656,7 @@ static void print_nw_lp_state(FILE * out, char const * prefix, nw_state * state) #endif /* if LP_DE%sBUG */ fprintf(out, "%s | num_events_per_lp = %ld\n", prefix, state->num_events_per_lp); fprintf(out, "%s | nw_id = %lu\n", prefix, state->nw_id); - fprintf(out, "%s | wrkld_end = %d\n", prefix, state->wrkld_end); + fprintf(out, "%s | wrkld_end = %d\n", prefix, state->wrkld_id); fprintf(out, "%s | app_id = %d\n", prefix, state->app_id); fprintf(out, "%s | local_rank = %d\n", prefix, state->local_rank); fprintf(out, "%s | qos_level = %d\n", prefix, state->qos_level); From d3f75b8bc4e44d7e9f5c022fe3a81ce9d45bcf07 Mon Sep 17 00:00:00 2001 From: helq Date: Sat, 31 May 2025 17:09:44 -0400 Subject: [PATCH 056/110] Renaming another variable from surrogate to network-surrogate --- codes/surrogate/init.h | 4 +-- codes/surrogate/network-surrogate.h | 6 ++-- src/surrogate/init.c | 30 ++++++++-------- src/surrogate/network-surrogate.c | 34 +++++++++---------- .../packet-latency-predictor/average.c | 2 +- 5 files changed, 38 insertions(+), 38 deletions(-) diff --git a/codes/surrogate/init.h b/codes/surrogate/init.h index 3a9a2169..6846b2e0 100644 --- a/codes/surrogate/init.h +++ b/codes/surrogate/init.h @@ -43,10 +43,10 @@ struct network_surrogate_config { void network_surrogate_configure( char const * const annotation, struct network_surrogate_config * const config, - struct packet_latency_predictor ** pl_pred //!< pointer to save packet latency predictor generated by. Caller must free it + struct packet_latency_predictor ** pl_pred //!< pointer to save packet latency predictor. Caller does not need to free pointer ); -extern struct network_surrogate_config surr_config; +extern struct network_surrogate_config net_surr_config; extern bool is_network_surrogate_configured; #ifdef __cplusplus diff --git a/codes/surrogate/network-surrogate.h b/codes/surrogate/network-surrogate.h index f941ea9f..a6060ea1 100644 --- a/codes/surrogate/network-surrogate.h +++ b/codes/surrogate/network-surrogate.h @@ -1,5 +1,5 @@ -#ifndef CODES_SURROGATE_SWITCH_H -#define CODES_SURROGATE_SWITCH_H +#ifndef CODES_SURROGATE_NETWORK_SURROGATE_H +#define CODES_SURROGATE_NETWORK_SURROGATE_H /** * switch.h -- DIRECTOR FUNCTION in charge of switching back and forth from high-fidelity and surrogate modes @@ -58,7 +58,7 @@ struct switch_at_struct { double * time_stampts; // list of precise timestamps at which to switch }; -extern struct switch_at_struct switch_at; +extern struct switch_at_struct switch_network_at; // Main function responsible for switching between high-fidelity and (network) surrogate void network_director(tw_pe * pe); diff --git a/src/surrogate/init.c b/src/surrogate/init.c index 7c969924..63f9ff89 100644 --- a/src/surrogate/init.c +++ b/src/surrogate/init.c @@ -6,10 +6,10 @@ #endif bool freeze_network_on_switch = true; -struct network_surrogate_config surr_config = {0}; +struct network_surrogate_config net_surr_config = {0}; bool is_network_surrogate_configured = false; -struct switch_at_struct switch_at; -static struct packet_latency_predictor current_predictor = {0}; +struct switch_at_struct switch_network_at; +static struct packet_latency_predictor current_net_predictor = {0}; // === Stats! @@ -33,7 +33,7 @@ void network_surrogate_configure( is_network_surrogate_configured = true; // This is the only place where the director data should be loaded and set up - surr_config = *sc; + net_surr_config = *sc; // Determining which director mode to set up char director_mode[MAX_NAME_LENGTH]; @@ -47,25 +47,25 @@ void network_surrogate_configure( size_t len; configuration_get_multivalue(&config, "NETWORK_SURROGATE", "fixed_switch_timestamps", anno, ×tamps, &len); - switch_at.current_i = 0; - switch_at.total = len; - switch_at.time_stampts = malloc(len * sizeof(double)); + switch_network_at.current_i = 0; + switch_network_at.total = len; + switch_network_at.time_stampts = malloc(len * sizeof(double)); for (size_t i = 0; i < len; i++) { errno = 0; - switch_at.time_stampts[i] = strtod(timestamps[i], NULL); + switch_network_at.time_stampts[i] = strtod(timestamps[i], NULL); if (errno == ERANGE || errno == EILSEQ){ tw_error(TW_LOC, "Sequence `%s' could not be succesfully interpreted as a _double_.", timestamps[i]); } - PRINTF_ONCE("%g%s", switch_at.time_stampts[i], i == len-1 ? "" : ", "); + PRINTF_ONCE("%g%s", switch_network_at.time_stampts[i], i == len-1 ? "" : ", "); } PRINTF_ONCE("\n"); // Injecting into ROSS the function to be called at GVT and the instant in time to trigger GVT g_tw_gvt_hook = network_director; - tw_trigger_gvt_hook_at(switch_at.time_stampts[0]); + tw_trigger_gvt_hook_at(switch_network_at.time_stampts[0]); // freeing timestamps before it dissapears for (size_t i = 0; i < len; i++) { @@ -82,8 +82,8 @@ void network_surrogate_configure( configuration_get_value(&config, "NETWORK_SURROGATE", "packet_latency_predictor", anno, latency_pred_name, MAX_NAME_LENGTH); if (*latency_pred_name) { if (strcmp(latency_pred_name, "average") == 0) { - current_predictor = average_latency_predictor(surr_config.total_terminals); - *pl_pred = ¤t_predictor; + current_net_predictor = average_latency_predictor(net_surr_config.total_terminals); + *pl_pred = ¤t_net_predictor; #ifdef USE_TORCH } else if (strcmp(latency_pred_name, "torch-jit") == 0) { @@ -111,8 +111,8 @@ void network_surrogate_configure( ")", latency_pred_name); } } else { - current_predictor = average_latency_predictor(surr_config.total_terminals); - *pl_pred = ¤t_predictor; + current_net_predictor = average_latency_predictor(net_surr_config.total_terminals); + *pl_pred = ¤t_net_predictor; PRINTF_ONCE("Enabling average packet latency predictor (default behaviour)\n"); } @@ -146,7 +146,7 @@ void network_surrogate_configure( //surr_config.director.switch_surrogate(); if (DEBUG_DIRECTOR && g_tw_mynode == 0) { - fprintf(stderr, "Simulation starting on %s mode\n", surr_config.director.is_surrogate_on() ? "surrogate" : "high-fidelity"); + fprintf(stderr, "Simulation starting on %s mode\n", net_surr_config.director.is_surrogate_on() ? "surrogate" : "high-fidelity"); } } // === END OF All things Surrogate Configuration diff --git a/src/surrogate/network-surrogate.c b/src/surrogate/network-surrogate.c index 38875061..a8bc671c 100644 --- a/src/surrogate/network-surrogate.c +++ b/src/surrogate/network-surrogate.c @@ -12,10 +12,10 @@ static double surrogate_time_last = 0.0; // static struct lp_types_switch const * get_type_switch(char const * const name) { - for (size_t i = 0; i < surr_config.n_lp_types; i++) { + for (size_t i = 0; i < net_surr_config.n_lp_types; i++) { //printf("THIS %s and %s\n", surr_config.lp_types[i].lpname, name); - if (strcmp(surr_config.lp_types[i].lpname, name) == 0) { - return &surr_config.lp_types[i]; + if (strcmp(net_surr_config.lp_types[i].lpname, name) == 0) { + return &net_surr_config.lp_types[i]; } } return NULL; @@ -39,15 +39,15 @@ static void shift_events_to_future_pe(tw_pe * pe) { // We have to put the events back into the queue after we switch back, but if we never // switch back they will never get to be processed and thus we can clean them double switch_offset = g_tw_ts_end; - if (switch_at.current_i < switch_at.total) { - double const next_switch = switch_at.time_stampts[switch_at.current_i + 1]; + if (switch_network_at.current_i < switch_network_at.total) { + double const next_switch = switch_network_at.time_stampts[switch_network_at.current_i + 1]; double const pre_switch_time = gvt; switch_offset = next_switch - pre_switch_time; assert(pre_switch_time < next_switch); //printf("gvt=%f next_switch=%f switch_offset=%f\n", pre_switch_time, next_switch, switch_offset); } - assert(0 <= switch_at.current_i && switch_at.current_i < switch_at.total); - double const current_switch_time = switch_at.time_stampts[switch_at.current_i]; + assert(0 <= switch_network_at.current_i && switch_network_at.current_i < switch_network_at.total); + double const current_switch_time = switch_network_at.time_stampts[switch_network_at.current_i]; assert(current_switch_time <= gvt); tw_event * dequed_events = NULL; // Linked list of workload events, to be placed again in the queue @@ -323,14 +323,14 @@ void switch_model(tw_pe * pe) { if (g_tw_synchronization_protocol == OPTIMISTIC) { tw_scheduler_rollback_and_cancel_events_pe(pe); } - surr_config.director.switch_surrogate(); + net_surr_config.director.switch_surrogate(); if (DEBUG_DIRECTOR && g_tw_mynode == 0) { - printf("Switching to %s\n", surr_config.director.is_surrogate_on() ? "surrogate" : "high-fidelity"); + printf("Switching to %s\n", net_surr_config.director.is_surrogate_on() ? "surrogate" : "high-fidelity"); } // "Freezing" network events and activating LP's switch functions if (freeze_network_on_switch) { - if (surr_config.director.is_surrogate_on()) { + if (net_surr_config.director.is_surrogate_on()) { model_net_method_switch_to_surrogate(); events_high_def_to_surrogate_switch(pe); } else { @@ -358,7 +358,7 @@ void network_director(tw_pe * pe) { } if (DEBUG_DIRECTOR == 3) { printf("GVT %d at %f in %s\n", i++, gvt, - surr_config.director.is_surrogate_on() ? "surrogate-mode" : "high-definition"); + net_surr_config.director.is_surrogate_on() ? "surrogate-mode" : "high-definition"); } } @@ -372,14 +372,14 @@ void network_director(tw_pe * pe) { // Do not process if the simulation ended if (gvt >= g_tw_ts_end) { // If the simulation ended and the surrogate is still on, stop timer checking surrogate time - if (surr_config.director.is_surrogate_on()) { + if (net_surr_config.director.is_surrogate_on()) { time_in_surrogate += tw_clock_read() - surrogate_time_last; } return; } // ---- Past this means that we are in fact switching ---- - bool const pre_switch_status = surr_config.director.is_surrogate_on(); + bool const pre_switch_status = net_surr_config.director.is_surrogate_on(); // Asking the director/model to switch if (DEBUG_DIRECTOR && g_tw_mynode == 0) { @@ -395,8 +395,8 @@ void network_director(tw_pe * pe) { surrogate_switching_time += end - start; // Setting trigger for next switch - if (++switch_at.current_i < switch_at.total) { - double next_switch = switch_at.time_stampts[switch_at.current_i]; + if (++switch_network_at.current_i < switch_network_at.total) { + double next_switch = switch_network_at.time_stampts[switch_network_at.current_i]; tw_trigger_gvt_hook_at(next_switch); } @@ -408,8 +408,8 @@ void network_director(tw_pe * pe) { } // Determining time in surrogate - if (pre_switch_status != surr_config.director.is_surrogate_on()) { - if (surr_config.director.is_surrogate_on()) { + if (pre_switch_status != net_surr_config.director.is_surrogate_on()) { + if (net_surr_config.director.is_surrogate_on()) { // Start tracking time spent in surrogate mode surrogate_time_last = end; } else { diff --git a/src/surrogate/packet-latency-predictor/average.c b/src/surrogate/packet-latency-predictor/average.c index 82db7e1c..2b8af6ea 100644 --- a/src/surrogate/packet-latency-predictor/average.c +++ b/src/surrogate/packet-latency-predictor/average.c @@ -29,7 +29,7 @@ static void init_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int assert(data->aggregated_next_packet_delay.total_msgs == 0); assert(data->aggregated_next_packet_delay.sum_latency == 0); - data->num_terminals = surr_config.total_terminals; + data->num_terminals = net_surr_config.total_terminals; } static void feed_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start const * start, struct packet_end const * end) { From 9bfa92688ee629fb7db8f4edd5c48a94f238c8a1 Mon Sep 17 00:00:00 2001 From: helq Date: Mon, 2 Jun 2025 11:47:31 -0400 Subject: [PATCH 057/110] Adding some documentation for nw_state --- src/network-workloads/model-net-mpi-replay.c | 47 ++++++++++---------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index f456aab0..9dfcd306 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -284,30 +284,40 @@ typedef struct mpi_msgs_queue mpi_msgs_queue; typedef struct completed_requests completed_requests; typedef struct pending_waits pending_waits; -/* state of the network LP. It contains the pointers to send/receive lists */ +/* + * state of the network LP. It contains the pointers to send/receive lists + * + * nw-lp's can only run one job! Which all start at time 0 + * + * Three possible states for nw-lp: + * - run application (non-synthetic workload) + * - run background noise pattern (synthetic workload) + * - do nothing + **/ struct nw_state { #if LP_DEBUG size_t num_events_processed; #endif /* if LP_DEBUG */ - long num_events_per_lp; - tw_lpid nw_id; - short wrkld_id; - int app_id; - int local_rank; - int qos_level; - int synthetic_pattern; - int is_finished; - int num_own_job_ranks_completed; //counted by the root rank 0 of a job - - //array of whether this rank knows other jobs are completed. - int * known_completed_jobs; + tw_lpid nw_id; // compute node id, as labeled by the network + int local_rank; // id local to the application or synthetic workload, this is the number that the application sees, their phony "MPI rank" + // Parameters used for non-synthetic workloads + short wrkld_id; // workload machinery in charge, e.g, swm + int app_id; // application id, position on the queue for this app to run + int * known_completed_jobs; //array of whether this rank knows other jobs are completed. struct rc_stack * processed_ops; struct rc_stack * processed_wait_op; struct rc_stack * matched_reqs; -// struct rc_stack * indices; + struct pending_waits * wait_op; // Pending wait operation + + // Parameters used for synthetic workload parameters + int synthetic_pattern; + int is_finished; + int num_own_job_ranks_completed; //counted by the root rank 0 of a job + + int qos_level; /* count of sends, receives, collectives and delays */ unsigned long num_sends; @@ -349,9 +359,6 @@ struct nw_state struct qlist_head completed_reqs; tw_stime cur_interval_end; - - /* Pending wait operation */ - struct pending_waits * wait_op; /* Message size latency information */ struct qhash_table * msg_sz_table; @@ -2663,12 +2670,10 @@ void nw_test_init(nw_state* s, tw_lp* lp) rc_stack_create(&s->processed_ops); rc_stack_create(&s->processed_wait_op); rc_stack_create(&s->matched_reqs); -// rc_stack_create(&s->indices); assert(s->processed_ops != NULL); assert(s->processed_wait_op != NULL); assert(s->matched_reqs != NULL); -// assert(s->indices != NULL); /* clock starts ticking when the first event is processed */ s->start_time = tw_now(lp); @@ -2773,7 +2778,6 @@ void nw_test_event_handler(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp) memset(bf, 0, sizeof(tw_bf)); rc_stack_gc(lp, s->matched_reqs); -// rc_stack_gc(lp, s->indices); rc_stack_gc(lp, s->processed_ops); rc_stack_gc(lp, s->processed_wait_op); @@ -3286,7 +3290,6 @@ void nw_test_finalize(nw_state* s, tw_lp* lp) //printf("\n LP %ld Time spent in communication %llu ", lp->gid, total_time - s->compute_time); rc_stack_destroy(s->matched_reqs); -// rc_stack_destroy(s->indices); rc_stack_destroy(s->processed_ops); rc_stack_destroy(s->processed_wait_op); @@ -3559,7 +3562,6 @@ static bool check_nw_lp_state(nw_state * before, nw_state const * after) { bool is_same = true; // Basic fields - is_same &= (before->num_events_per_lp == after->num_events_per_lp); is_same &= (before->nw_id == after->nw_id); is_same &= (before->wrkld_id == after->wrkld_id); is_same &= (before->app_id == after->app_id); @@ -3654,7 +3656,6 @@ static void print_nw_lp_state(FILE * out, char const * prefix, nw_state * state) #if LP_DEBUG fprintf(out, "%s | num_events_processed = %zu\n", prefix, state->num_events_processed); #endif /* if LP_DE%sBUG */ - fprintf(out, "%s | num_events_per_lp = %ld\n", prefix, state->num_events_per_lp); fprintf(out, "%s | nw_id = %lu\n", prefix, state->nw_id); fprintf(out, "%s | wrkld_end = %d\n", prefix, state->wrkld_id); fprintf(out, "%s | app_id = %d\n", prefix, state->app_id); From 81099b7eb0790408a9b35d66a85156be7bdb1fbb Mon Sep 17 00:00:00 2001 From: helq Date: Thu, 5 Jun 2025 18:27:48 -0400 Subject: [PATCH 058/110] Initial implementation of director for application iteration Many of the input parameters for the director and predictor are hardcoded. Things like, number of iterations that the application will run, number of applications and how often to call the director (how many GVTs to wait until calling it). --- .../app-iteration-predictor/average.h | 21 + .../app-iteration-predictor/common.h | 75 ++++ codes/surrogate/application-surrogate.h | 25 ++ codes/surrogate/init.h | 8 + src/CMakeLists.txt | 3 + src/network-workloads/model-net-mpi-replay.c | 84 +++- .../app-iteration-predictor/average.c | 398 ++++++++++++++++++ .../app-iteration-predictor/common.c | 1 + src/surrogate/application-surrogate.c | 74 ++++ src/surrogate/init.c | 24 ++ 10 files changed, 696 insertions(+), 17 deletions(-) create mode 100644 codes/surrogate/app-iteration-predictor/average.h create mode 100644 codes/surrogate/app-iteration-predictor/common.h create mode 100644 codes/surrogate/application-surrogate.h create mode 100644 src/surrogate/app-iteration-predictor/average.c create mode 100644 src/surrogate/app-iteration-predictor/common.c create mode 100644 src/surrogate/application-surrogate.c diff --git a/codes/surrogate/app-iteration-predictor/average.h b/codes/surrogate/app-iteration-predictor/average.h new file mode 100644 index 00000000..0ec22283 --- /dev/null +++ b/codes/surrogate/app-iteration-predictor/average.h @@ -0,0 +1,21 @@ +#ifndef CODES_SURROGATE_ITERATION_PREDICTOR_AVERAGE_H +#define CODES_SURROGATE_ITERATION_PREDICTOR_AVERAGE_H + +/** + * This predictor collects the time that it takes to complete an iteration, and + * uses this information as the prediction. The trigger becomes + */ + +#include "surrogate/app-iteration-predictor/common.h" + +struct avg_app_config { + int num_apps; + int num_nodes_in_pe; + int num_of_iters_to_feed; +}; + +struct app_iteration_predictor avg_app_iteration_predictor(struct avg_app_config *); + +void free_avg_app_iteration_predictor(void); + +#endif /* end of include guard */ diff --git a/codes/surrogate/app-iteration-predictor/common.h b/codes/surrogate/app-iteration-predictor/common.h new file mode 100644 index 00000000..d2eabc99 --- /dev/null +++ b/codes/surrogate/app-iteration-predictor/common.h @@ -0,0 +1,75 @@ +#ifndef CODES_SURROGATE_ITERATION_PREDICTOR_COMMON_H +#define CODES_SURROGATE_ITERATION_PREDICTOR_COMMON_H + +/** + * common.h -- common datatypes and functionality to all application iteration predictors + * -Elkin Cruz + * + * Copyright (c) 2025 Rensselaer Polytechnic Institute + */ +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Iteration application prediction machinery. Notice that any of these predictors have to know how many iterations to run in total, thus they need data about the number of steps the application will take. + */ + +struct app_iter_node_config { + int app_id; + int app_ending_iter; +}; + +// This returns how much to skip ahead and when to restart +struct iteration_pred { + int resume_at_iter; + double restart_at; +}; + +enum FAST_FORWARD { + FAST_FORWARD_switching = 0, + FAST_FORWARD_restart, // Stop accumulating data (we gain nothing from switching to surrogate-mode) and restart at future point in time +}; +struct fast_forward_values { + enum FAST_FORWARD status; // Are we switching to surrogate-mode + // Only needed for "switching" and "restart" + double restarting_at; // Time at which we will have fully restarted (or expect to) +}; + + +// Model calls to predictor +typedef void (*init_pred_iter_f) (tw_lp * lp, int nw_id_in_pe, struct app_iter_node_config *); // Initializes the predictor (eg, average) +typedef void (*feed_pred_iter_f) (tw_lp * lp, int nw_id_in_pe, int iteration_id, double iteration_time); // Feeds last iteration time +typedef void (*end_pred_iter_f) (tw_lp * lp, int nw_id_in_pe, double time); // Tells the predictor that the application has stopped running +typedef struct iteration_pred (*predict_pred_iter_f) (tw_lp * lp, int nw_id_in_pe); // Get prediction +typedef void (*predict_pred_iter_rc_f) (tw_lp * lp, int nw_id_in_pe); // Reverse prediction (reverse state of predictor one prediction) +// Director calls to predictor module +typedef bool (*have_we_hit_switch_f) (tw_lp * lp, int nw_id_in_pe, int iteration_id); // Are we ready to switch to a future iterationº +typedef bool (*is_predictor_read_f) (void); // Checking if it is a good time to switch (enough data has been collected or we have received some notification of an application ending, forcing us to restart collecting data). This might trigger an MPI_Allreduce call, thus has to be called by all PEs! +typedef void (*reset_pred_iter_f) (void); // Resets the predictor (eg, average) +typedef struct fast_forward_values (*prepare_fast_forward_f) (void); // Checking if it is a good time to switch (enough data has been collected) + +// API that predictors have to comply with and +struct app_iteration_predictor { + struct { + init_pred_iter_f init; + feed_pred_iter_f feed; + end_pred_iter_f ended; + predict_pred_iter_f predict; + predict_pred_iter_rc_f predict_rc; + have_we_hit_switch_f have_we_hit_switch; + } model; + struct { + reset_pred_iter_f reset; + is_predictor_read_f is_predictor_ready; + prepare_fast_forward_f prepare_fast_forward_jump; + } director; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* end of include guard */ diff --git a/codes/surrogate/application-surrogate.h b/codes/surrogate/application-surrogate.h new file mode 100644 index 00000000..184a4f98 --- /dev/null +++ b/codes/surrogate/application-surrogate.h @@ -0,0 +1,25 @@ +#ifndef CODES_SURROGATE_APP_SURROGATE_H +#define CODES_SURROGATE_APP_SURROGATE_H + +/** + * switch.h -- DIRECTOR FUNCTION in charge of switching back and forth from high-fidelity and surrogate modes for the application level + * Elkin Cruz + * + * Copyright (c) 2025 Rensselaer Polytechnic Institute + */ + +#include +#include "surrogate/app-iteration-predictor/common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Main function responsible for switching between high-fidelity and (application iteration) surrogate +void application_director_configure(int every_n_gvt, struct app_iteration_predictor *); + +#ifdef __cplusplus +} +#endif + +#endif /* end of include guard */ diff --git a/codes/surrogate/init.h b/codes/surrogate/init.h index 6846b2e0..28e90a8d 100644 --- a/codes/surrogate/init.h +++ b/codes/surrogate/init.h @@ -8,6 +8,7 @@ * Copyright (c) 2023 Rensselaer Polytechnic Institute */ #include "codes/surrogate/packet-latency-predictor/common.h" +#include "codes/surrogate/app-iteration-predictor/common.h" #include "codes/surrogate/network-surrogate.h" // A simple macro to clarify code a bit @@ -49,6 +50,13 @@ void network_surrogate_configure( extern struct network_surrogate_config net_surr_config; extern bool is_network_surrogate_configured; +void application_surrogate_configure( + int num_terminals_on_pe, + int num_apps, + struct app_iteration_predictor ** iter_pred //!< pointer to save application iteration predictor. No need to free pointer +); +void free_application_surrogate(void); + #ifdef __cplusplus } #endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 55e97215..08950e7f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -56,7 +56,10 @@ list(APPEND SRCS util/congestion-controller.C surrogate/init.c + surrogate/application-surrogate.c surrogate/network-surrogate.c + surrogate/app-iteration-predictor/common.c + surrogate/app-iteration-predictor/average.c surrogate/packet-latency-predictor/common.c surrogate/packet-latency-predictor/average.c diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 9dfcd306..2d0edd45 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -21,6 +21,7 @@ #include "codes/codes-jobmap.h" #include "codes/congestion-controller-core.h" #include "codes/surrogate/init.h" +#include "surrogate/app-iteration-predictor/common.h" /* turning on track lp will generate a lot of output messages */ #define DBG_COMM 1 @@ -42,6 +43,7 @@ #define NEAR_ZERO .0001 //timestamp for use to be 'close to zero' but still allow progress, zero offset events are hard on the PDES engine #define OUTPUT_MARKS 0 #define LP_DEBUG 0 +#define HARD_CODED_AVG_ITER_PREDICTOR 0 static int msg_size_hash_compare( void *key, struct qhash_head *link); @@ -163,6 +165,9 @@ static double sampling_interval = 5000000; static double sampling_end_time = 3000000000; static int enable_debug = 0; +// Surrogate variables +struct app_iteration_predictor *iter_predictor = NULL; +static int nw_id_counter = 0; // We can skip multiple iterations using an average as our predicted iteration time. This will skip ahead to a future step in the simulation static struct AvgSurrogateSwitchingTimesForApp *skip_iter_config; static size_t skip_iter_config_size = 0; @@ -301,6 +306,7 @@ struct nw_state #endif /* if LP_DEBUG */ tw_lpid nw_id; // compute node id, as labeled by the network + tw_lpid nw_id_in_pe; // compute node id for this PE int local_rank; // id local to the application or synthetic workload, this is the number that the application sees, their phony "MPI rank" // Parameters used for non-synthetic workloads @@ -418,6 +424,7 @@ struct nw_message int found_match; short wait_completed; short rend_send; + int resume_at_iter; } fwd; // A different struct for each type of MPI_NW_EVENTS @@ -482,7 +489,7 @@ struct nw_message int64_t saved_num_bytes; } mpi_ack; - // Surrogate variables + // For SURR_SKIP_ITERATION struct { struct AvgSurrogateSwitchingTimesForApp * config_used; } surr; @@ -1242,17 +1249,25 @@ static struct AvgSurrogateSwitchingTimesForApp * get_switch_config(struct nw_sta } static void skip_iteration_rc(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) { - m->rc.surr.config_used->done = false; + if (HARD_CODED_AVG_ITER_PREDICTOR) { + m->rc.surr.config_used->done = false; + } } static void skip_to_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) { struct codes_workload_op mpi_op; + int resume_at_iter; - struct AvgSurrogateSwitchingTimesForApp * switch_config = get_switch_config(s); - assert(switch_config != NULL); - int const resume_at_iter = switch_config->resume_at_iter; - m->rc.surr.config_used = switch_config; + if (HARD_CODED_AVG_ITER_PREDICTOR) { + struct AvgSurrogateSwitchingTimesForApp * switch_config = get_switch_config(s); + assert(switch_config != NULL); + resume_at_iter = switch_config->resume_at_iter; + m->rc.surr.config_used = switch_config; + switch_config->done = true; + } else { + resume_at_iter = m->fwd.resume_at_iter; + } // consuming all events until indicated iteration is reached bool reached_end = false; @@ -1276,8 +1291,6 @@ static void skip_to_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * } } - switch_config->done = true; - tw_event *e = tw_event_new(lp->gid, 0.0, lp); nw_message* msg = (nw_message*) tw_event_data(e); msg->msg_type = MPI_OP_GET_NEXT; @@ -2516,6 +2529,7 @@ void nw_test_init(nw_state* s, tw_lp* lp) memset(s, 0, sizeof(*s)); s->nw_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0); + s->nw_id_in_pe = nw_id_counter++; s->mpi_wkld_samples = (struct mpi_workload_sample*)calloc(MAX_STATS, sizeof(struct mpi_workload_sample)); s->sampling_indx = 0; s->is_finished = 0; @@ -2685,8 +2699,10 @@ void nw_test_init(nw_state* s, tw_lp* lp) s->app_id = lid.job; s->local_rank = lid.rank; + bool am_i_synthetic = false; if(strncmp(file_name_of_job[lid.job], "synthetic", 9) == 0) { + am_i_synthetic = true; sscanf(file_name_of_job[lid.job], "synthetic%d", &synthetic_pattern); if(synthetic_pattern <=0 || synthetic_pattern > 6) { @@ -2728,7 +2744,6 @@ void nw_test_init(nw_state* s, tw_lp* lp) { s->wrkld_id = codes_workload_load(type_name, params, s->app_id, s->local_rank); codes_issue_next_event(lp); - printf("my wrkld_id = %d\n", s->wrkld_id); } if(enable_sampling && sampling_interval > 0) { @@ -2765,6 +2780,13 @@ void nw_test_init(nw_state* s, tw_lp* lp) s->switch_config = NULL; s->switch_config_size = 0; } + if (iter_predictor && !am_i_synthetic) { + struct app_iter_node_config conf = { + .app_id = s->app_id, + .app_ending_iter = s->app_id ? 19 : 20, + }; + iter_predictor->model.init(lp, s->nw_id_in_pe, &conf); + } return; } @@ -3015,6 +3037,9 @@ static void get_next_mpi_operation_rc(nw_state* s, tw_bf * bf, nw_message * m, t break; case CODES_WK_MARK: codes_issue_next_event_rc(lp); + if (bf->c13) { + iter_predictor->model.predict_rc(lp, s->nw_id_in_pe); + } break; default: @@ -3145,15 +3170,30 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l case CODES_WK_MARK: { m->rc.mpi_next.mark.saved_marker_time = tw_now(lp); - - // If we have reached the surrogate switch time, skip next iteration(s) - if (have_we_hit_surrogate_switch(s, mpi_op)) { - tw_event *e = tw_event_new(lp->gid, time_to_skip_iterations(s), lp); - nw_message* msg = (nw_message*) tw_event_data(e); - msg->msg_type = SURR_SKIP_ITERATION; - tw_event_send(e); + int iteration_i = mpi_op->u.send.tag; + + if (HARD_CODED_AVG_ITER_PREDICTOR) { + // If we have reached the surrogate switch time, skip next iteration(s) + if (have_we_hit_surrogate_switch(s, mpi_op)) { + tw_event *e = tw_event_new(lp->gid, time_to_skip_iterations(s), lp); + nw_message* msg = (nw_message*) tw_event_data(e); + msg->msg_type = SURR_SKIP_ITERATION; + tw_event_send(e); + } else { + codes_issue_next_event(lp); + } } else { - codes_issue_next_event(lp); + if (iter_predictor && iter_predictor->model.have_we_hit_switch(lp, s->nw_id_in_pe, iteration_i)) { + bf->c13 = 1; + struct iteration_pred iter_pred = iter_predictor->model.predict(lp, s->nw_id_in_pe); + tw_event *e = tw_event_new(lp->gid, iter_pred.restart_at - tw_now(lp), lp); + nw_message* msg = (nw_message*) tw_event_data(e); + msg->msg_type = SURR_SKIP_ITERATION; + msg->fwd.resume_at_iter = iter_pred.resume_at_iter; + tw_event_send(e); + } else { + codes_issue_next_event(lp); + } } } break; @@ -3384,10 +3424,16 @@ void nw_test_event_handler_commit(nw_state* s, tw_bf * bf, nw_message * m, tw_lp switch (m->mpi_op->op_type) { case CODES_WK_END: printf("Network node %d Rank %llu App %d finished at %lf \n", s->local_rank, LLU(s->nw_id), s->app_id, m->rc.mpi_next.mark.saved_marker_time); + if (iter_predictor) { + iter_predictor->model.ended(lp, s->nw_id_in_pe, m->rc.mpi_next.mark.saved_marker_time); + } break; case CODES_WK_MARK: fprintf(iteration_log, "ITERATION %d node %llu job %d rank %d time %lf\n", m->mpi_op->u.send.tag, LLU(s->nw_id), s->app_id, s->local_rank, m->rc.mpi_next.mark.saved_marker_time); + if (iter_predictor) { + iter_predictor->model.feed(lp, s->nw_id_in_pe, m->mpi_op->u.send.tag, m->rc.mpi_next.mark.saved_marker_time); + } if (OUTPUT_MARKS) { @@ -4408,6 +4454,9 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) assert(ret == 0 || !"lp_io_prepare failure"); } + // TODO: read from config whether to load iterator predictor + application_surrogate_configure(24, 2, &iter_predictor); + tw_run(); fclose(iteration_log); //Xin @@ -4489,6 +4538,7 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) } print_surrogate_stats(); + free_application_surrogate(); #ifdef USE_RDAMARIS } // end if(g_st_ross_rank) diff --git a/src/surrogate/app-iteration-predictor/average.c b/src/surrogate/app-iteration-predictor/average.c new file mode 100644 index 00000000..77dfc954 --- /dev/null +++ b/src/surrogate/app-iteration-predictor/average.c @@ -0,0 +1,398 @@ +#include "surrogate/app-iteration-predictor/average.h" +#include "codes/codes.h" +#include +#include +#include + +static struct avg_app_config my_config = {0}; + +struct node_data { + int app_id; + double acc_iteration_time; + double prev_iteration_time; + int acc_iters; + int last_iter; +}; +static struct node_data * arr_node_data = NULL; // array containing info for all nodes + +enum ENDED_STATUS { + ENDED_STATUS_running = 0, + ENDED_STATUS_just_ended, // fully ended in this PE + ENDED_STATUS_ended_everywhere, // fully ended on all PEs +}; + +struct app_data { + int num_nodes; + int nodes_with_enough_iters; + int ending_iteration; // last iteration the simulation will run (aka, num of iterations) + int nodes_that_have_ended; + enum ENDED_STATUS ended; // use ended to stop accumulating data + // To be used when called by the model. Set by `prepare_fast_forward_jump` + struct { + int jump_at_iter; + int resume_at_iter; + double restart_at; + } pred; +}; +static struct app_data * arr_app_data = NULL; // array containing info for all apps +static bool ready_to_skip = false; + + +static void find_max_iter_per_app(int * save_last_iter); +static inline int app_id_for(int nw_id_in_pe) { + return arr_node_data[nw_id_in_pe].app_id; +} + + +static void init(tw_lp * lp, int nw_id_in_pe, struct app_iter_node_config * config) { + assert(arr_node_data); + if (my_config.num_nodes_in_pe <= nw_id_in_pe) { + tw_error(TW_LOC, "Node id relative to PE (%d) is larger than the number of nodes %d", nw_id_in_pe, my_config.num_nodes_in_pe); + } + + // Storing node data info + arr_node_data[nw_id_in_pe].app_id = config->app_id; + arr_node_data[nw_id_in_pe].last_iter = INT_MIN; + + // Storing app data info + arr_app_data[config->app_id].num_nodes++; + if (arr_app_data[config->app_id].ending_iteration == -1) { + arr_app_data[config->app_id].ending_iteration = config->app_ending_iter; + } else { + if (arr_app_data[config->app_id].ending_iteration != config->app_ending_iter) { + tw_error(TW_LOC, "Two different ranks for application %d have differing total iterations they will run (%d != %d)", config->app_id, config->app_ending_iter, arr_app_data[config->app_id].ending_iteration); + } + } +} + + +static void feed(tw_lp * lp, int nw_id_in_pe, int iter, double iteration_time) { + (void) lp; + assert(my_config.num_nodes_in_pe > (size_t) nw_id_in_pe); + assert(app_id_for(nw_id_in_pe) != -1); + struct node_data * node_data = &arr_node_data[nw_id_in_pe]; + if (node_data->last_iter >= iter) { // we only collect iteration data past the previous `last_iter` + return; + } + node_data->acc_iteration_time += iteration_time - node_data->prev_iteration_time; + node_data->prev_iteration_time = iteration_time; + node_data->acc_iters++; + node_data->last_iter = iter; + // We've hit the required number of iterations to feed our predictor + if (node_data->acc_iters == my_config.num_of_iters_to_feed) { + arr_app_data[node_data->app_id].nodes_with_enough_iters++; + } +} + + +static void ended(tw_lp * lp, int nw_id_in_pe, double iteration_time) { + assert(app_id_for(nw_id_in_pe) != -1); + struct app_data * app_data = &arr_app_data[app_id_for(nw_id_in_pe)]; + app_data->nodes_that_have_ended++; + if (app_data->nodes_that_have_ended == app_data->num_nodes) { + app_data->ended = ENDED_STATUS_just_ended; + } +} + + +static struct iteration_pred predict(tw_lp * lp, int nw_id_in_pe) { + assert(my_config.num_nodes_in_pe > (size_t) nw_id_in_pe); + assert(app_id_for(nw_id_in_pe) != -1); + struct app_data * app_data = &arr_app_data[app_id_for(nw_id_in_pe)]; + return (struct iteration_pred) { + .resume_at_iter = app_data->pred.resume_at_iter, + .restart_at = app_data->pred.restart_at, + }; +} + +static void predict_rc(tw_lp * lp, int nw_id_in_pe) {} + +static void reset_with(bool const * app_just_ended) { + ready_to_skip = false; + + int last_iter[my_config.num_apps]; + find_max_iter_per_app(last_iter); // We should start tracking iterations from the next iteration + + for (int i=0; i < my_config.num_nodes_in_pe; i++) { + struct node_data * node_data = &arr_node_data[i]; + node_data->acc_iters = 0; + node_data->acc_iteration_time = 0; + node_data->last_iter = last_iter[node_data->app_id]; + node_data->prev_iteration_time = arr_app_data[node_data->app_id].pred.restart_at; + } + for (int i=0; i < my_config.num_apps; i++) { + arr_app_data[i].nodes_with_enough_iters = 0; + } + + // If an app just fully ended (ended on all PEs but hasn't been cleaned) then clean it + for (int i = 0; i < my_config.num_apps; i++) { + if (app_just_ended[i]) { + arr_app_data[i].ended = ENDED_STATUS_ended_everywhere; + } + } +} + +static bool have_we_hit_switch(tw_lp * lp, int nw_id_in_pe, int iteration_id) { + assert(my_config.num_nodes_in_pe > (size_t) nw_id_in_pe); + int const app_id = app_id_for(nw_id_in_pe); + if (ready_to_skip && iteration_id == arr_app_data[app_id].pred.jump_at_iter) { + return true; + } + return false; +} + +static inline bool has_any_app_ended(bool * save_app_just_ended) { + // Checking any application has fully ended, in which case we have to restart collecting data + bool app_just_ended_here[my_config.num_apps]; + for (int i = 0; i < my_config.num_apps; i++) { + struct app_data * app_data = &arr_app_data[i]; + app_just_ended_here[i] = app_data->ended == ENDED_STATUS_just_ended; + } + if(MPI_Allreduce(&app_just_ended_here, save_app_just_ended, my_config.num_apps, MPI_C_BOOL, MPI_LAND, MPI_COMM_CODES) != MPI_SUCCESS) { + tw_error(TW_LOC, "MPI_Allreduce call failed!"); + } + for (int i = 0; i < my_config.num_apps; i++) { + if (save_app_just_ended[i]) { + return true; + } + } + return false; +} + +static inline bool all_apps_ended(void) { + for (int i = 0; i < my_config.num_apps; i++) { + struct app_data * app_data = &arr_app_data[i]; + if (app_data->ended != ENDED_STATUS_ended_everywhere) { + return false; + } + } + return true; +} + + +static inline bool has_everyone_accumulated_enough() { + bool everyone = true; + for (int i = 0; i < my_config.num_apps; i++) { + struct app_data * app_data = &arr_app_data[i]; + // ignoring apps that have ended already + if (app_data->ended != ENDED_STATUS_ended_everywhere) { + everyone &= app_data->nodes_with_enough_iters == app_data->num_nodes; + } + } + return everyone; +} + +static bool is_predictor_ready(void) { + bool app_just_ended[my_config.num_apps]; + if (has_any_app_ended(app_just_ended)) { + reset_with(app_just_ended); + return false; + } + + if (all_apps_ended()) { + return false; + } + + // check that all applications have collected data for enough iterations to jump ahead + bool const everyone_ready_here = has_everyone_accumulated_enough(); + bool everyone_ready; + if(MPI_Allreduce(&everyone_ready_here, &everyone_ready, 1, MPI_C_BOOL, MPI_LAND, MPI_COMM_CODES) != MPI_SUCCESS) { + tw_error(TW_LOC, "MPI_Allreduce call failed!"); + } + return everyone_ready; +} + + +static void reset(void) { + bool app_just_ended[my_config.num_apps]; + has_any_app_ended(app_just_ended); + reset_with(app_just_ended); +} + +static void find_avg_iteration_time(double * save_avg_time) { + double acc_iter_time_here[my_config.num_apps]; + int acc_iters_here[my_config.num_apps]; + for (int i=0; i < my_config.num_apps; i++) { + acc_iter_time_here[i] = 0.0; + acc_iters_here[i] = 0; + } + for (int i=0; i < my_config.num_nodes_in_pe; i++) { + struct node_data * node_data = &arr_node_data[i]; + int const app_id = node_data->app_id; + acc_iter_time_here[app_id] += node_data->acc_iteration_time; + acc_iters_here[app_id] += node_data->acc_iters; + } + double acc_iter_time[my_config.num_apps]; + if(MPI_Allreduce(&acc_iter_time_here, &acc_iter_time, my_config.num_apps, MPI_DOUBLE, MPI_SUM, MPI_COMM_CODES) != MPI_SUCCESS) { + tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't add up"); + } + int acc_iters[my_config.num_apps]; + if(MPI_Allreduce(&acc_iters_here, &acc_iters, my_config.num_apps, MPI_INT, MPI_SUM, MPI_COMM_CODES) != MPI_SUCCESS) { + tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't add up"); + } + + for (int i=0; i < my_config.num_apps; i++) { + if (acc_iters[i]) { + save_avg_time[i] = acc_iter_time[i] / acc_iters[i]; + } + } +} + +static void find_max_iter_per_app(int * save_last_iter) { + int last_iter_here[my_config.num_apps]; + for (int i=0; i < my_config.num_apps; i++) { + last_iter_here[i] = -1; + } + for (int i=0; i < my_config.num_nodes_in_pe; i++) { + struct node_data * node_data = &arr_node_data[i]; + int const app_id = node_data->app_id; + if (last_iter_here[app_id] < node_data->last_iter) { + last_iter_here[app_id] = node_data->last_iter; + } + } + if(MPI_Allreduce(&last_iter_here, save_last_iter, my_config.num_apps, MPI_INT, MPI_MAX, MPI_COMM_CODES) != MPI_SUCCESS) { + tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't compute maximum"); + } +} + +static void find_avg_time_for_max_iter(double * save_last_iter_time, int const * last_iter) { + int acc_iters_here[my_config.num_apps]; + double acc_last_iter_time[my_config.num_apps]; + for (int i=0; i < my_config.num_apps; i++) { + acc_iters_here[i] = 0; + acc_last_iter_time[i] = 0.0; + } + for (int i=0; i < my_config.num_nodes_in_pe; i++) { + struct node_data * node_data = &arr_node_data[i]; + int const app_id = node_data->app_id; + if (node_data->last_iter == last_iter[app_id]) { + acc_last_iter_time[app_id] += node_data->prev_iteration_time; + acc_iters_here[app_id]++; + } + } + if(MPI_Allreduce(&acc_last_iter_time, save_last_iter_time, my_config.num_apps, MPI_DOUBLE, MPI_SUM, MPI_COMM_CODES) != MPI_SUCCESS) { + tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't add up"); + } + int acc_iters[my_config.num_apps]; + if(MPI_Allreduce(&acc_iters_here, &acc_iters, my_config.num_apps, MPI_INT, MPI_SUM, MPI_COMM_CODES) != MPI_SUCCESS) { + tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't add up"); + } + for (int i=0; i < my_config.num_apps; i++) { + if (acc_iters[i] > 0) { + save_last_iter_time[i] /= acc_iters[i]; + } + } +} + +static struct fast_forward_values prepare_fast_forward_jump(void) { + // 0. Check if app is still running + bool is_running[my_config.num_apps]; + for (int i=0; i < my_config.num_apps; i++) { + is_running[i] = arr_app_data[i].ended != ENDED_STATUS_ended_everywhere; + } + // 1. Compute end time for each application given current data (pick smallest) + // a. Find avg iteration per app + double avg_iter_time[my_config.num_apps]; + find_avg_iteration_time(avg_iter_time); + // b. Find iteration to start stwich after + int last_iter[my_config.num_apps]; + double last_iter_time[my_config.num_apps]; + find_max_iter_per_app(last_iter); + find_avg_time_for_max_iter(last_iter_time, last_iter); + // c. Compute avg end time for all apps (loop through every node, and add value to avg array) + double apps_end_time[my_config.num_apps]; + for (int i=0; i < my_config.num_apps; i++) { + int const iterations_left = arr_app_data[i].ending_iteration - last_iter[i]; + apps_end_time[i] = last_iter_time[i] + iterations_left * avg_iter_time[i]; + } + // d. Pick smallest compute end time/time to skip + double switch_time = DBL_MAX; + for (int i=0; i < my_config.num_apps; i++) { + if (is_running[i] && switch_time > apps_end_time[i]) { + switch_time = apps_end_time[i]; + } + } + // 2. Find number of iterations to skip per node given time to skip, then compute when each application is expected to reach this point + // a. Find iteration to skip to per node + double apps_restart_at_time[my_config.num_apps]; + int apps_restart_at_iter[my_config.num_apps]; + bool worth_switching = true; + for (int i=0; i < my_config.num_apps; i++) { + if (!is_running[i]) { + continue; + } + int iters_to_skip = lround((switch_time - last_iter_time[i]) / avg_iter_time[i]); + apps_restart_at_time[i] = last_iter_time[i] + iters_to_skip * avg_iter_time[i]; + apps_restart_at_iter[i] = last_iter[i] + iters_to_skip; + + // if we are not skipping at least two iterations, there is no point in trying to fastforward + if (iters_to_skip <= 2) { + worth_switching = false; + } + } + // b. Compute last application to restart (this is restarting_at) + double last_to_finish = 0; + for (int i=0; i < my_config.num_apps; i++) { + if (is_running[i] && last_to_finish < apps_restart_at_time[i]) { + last_to_finish = apps_restart_at_time[i]; + } + } + // c. If the number of iterations to skip is zero for any app, force reset of predictor tracking + if (!worth_switching) { + return (struct fast_forward_values) { + .status = FAST_FORWARD_restart, + .restarting_at = last_to_finish, + }; + } + // 4. Set values for iteration to restart at and iterations to jump for each application + for (int i=0; i < my_config.num_apps; i++) { + if (!is_running[i]) { + continue; + } + arr_app_data[i].pred.jump_at_iter = last_iter[i] + 1; + arr_app_data[i].pred.resume_at_iter = apps_restart_at_iter[i]; + arr_app_data[i].pred.restart_at = apps_restart_at_time[i]; + } + ready_to_skip = true; + + return (struct fast_forward_values) { + .status = FAST_FORWARD_switching, + .restarting_at = last_to_finish, + }; +} + +struct app_iteration_predictor avg_app_iteration_predictor(struct avg_app_config * config_) { + my_config = *config_; + arr_node_data = calloc(my_config.num_nodes_in_pe, sizeof(struct node_data)); + arr_app_data = calloc(my_config.num_apps, sizeof(struct app_data)); + for (int i=0; i < my_config.num_nodes_in_pe; i++) { + arr_node_data[i].app_id = -1; + } + for (int i=0; i < my_config.num_apps; i++) { + arr_app_data[i].ending_iteration = -1; + } + return (struct app_iteration_predictor) { + .model = { + .init = init, + .feed = feed, + .ended = ended, + .predict = predict, + .predict_rc = predict_rc, + .have_we_hit_switch = have_we_hit_switch, + }, + .director = { + .reset = reset, + .is_predictor_ready = is_predictor_ready, + .prepare_fast_forward_jump = prepare_fast_forward_jump, + } + }; +} + +void free_avg_app_iteration_predictor(void) { + if (arr_node_data) { + free(arr_node_data); + } + if (arr_app_data) { + free(arr_app_data); + } +} diff --git a/src/surrogate/app-iteration-predictor/common.c b/src/surrogate/app-iteration-predictor/common.c new file mode 100644 index 00000000..cc8db1a3 --- /dev/null +++ b/src/surrogate/app-iteration-predictor/common.c @@ -0,0 +1 @@ +#include "surrogate/app-iteration-predictor/common.h" diff --git a/src/surrogate/application-surrogate.c b/src/surrogate/application-surrogate.c new file mode 100644 index 00000000..87a8dc74 --- /dev/null +++ b/src/surrogate/application-surrogate.c @@ -0,0 +1,74 @@ +#include "surrogate/application-surrogate.h" +#include + +static struct app_iteration_predictor * iter_predictor; +static int every_n_gvt = 1; +static enum { + PRE_JUMP = 0, + POST_JUMP_switched, // Switched to surrogate-mode + POST_JUMP_skipped, // Did not switch, and skipping until next application finishes +} director_state; + +#ifdef USE_RAND_TIEBREAKER +#define gvt_for(pe) (pe->GVT_sig.recv_ts) +#else +#define gvt_for(pe) (pe->GVT) +#endif + +#define master_printf(str, ...) if (g_tw_mynode == 0) { printf(str, __VA_ARGS__); } + +static void application_director_pre_switch(tw_pe * pe) { + if (!iter_predictor->director.is_predictor_ready()) { + return; + } + struct fast_forward_values jump_to = iter_predictor->director.prepare_fast_forward_jump(); + double const restarting_at = jump_to.restarting_at > gvt_for(pe) ? jump_to.restarting_at : gvt_for(pe); + switch (jump_to.status) { + case FAST_FORWARD_switching: + tw_trigger_gvt_hook_at(restarting_at + 1); // + 1 to force director to run right after we have fully fast-forward + master_printf("Triggering switch to application iteration surrogate mode at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe)); + director_state = POST_JUMP_switched; + break; + + case FAST_FORWARD_restart: + tw_trigger_gvt_hook_at(restarting_at + 1); // + 1 to force director to run right after we have fully fast-forward + director_state = POST_JUMP_skipped; + break; + } +} + +static void application_director_post_switch(tw_pe * pe) { + tw_trigger_gvt_hook_every(every_n_gvt); + iter_predictor->director.reset(); + + if (director_state == POST_JUMP_switched) { + master_printf("Back to full high-fidelity application iteration mode at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe)); + } else { + master_printf("Resetting predictor at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe)); + } + director_state = PRE_JUMP; +} + +void application_director(tw_pe * pe) { + // Director is not called if the simulation has ended + if (gvt_for(pe) >= g_tw_ts_end) { + return; + } + switch (director_state) { + case PRE_JUMP: + application_director_pre_switch(pe); + break; + case POST_JUMP_switched: + case POST_JUMP_skipped: + application_director_post_switch(pe); + break; + } +} + +void application_director_configure(int every_n_gvt_, struct app_iteration_predictor * iter_predictor_) { + every_n_gvt = every_n_gvt_; + iter_predictor = iter_predictor_; + g_tw_gvt_hook = application_director; + director_state = PRE_JUMP; + tw_trigger_gvt_hook_every(every_n_gvt); +} diff --git a/src/surrogate/init.c b/src/surrogate/init.c index 63f9ff89..0a7386a4 100644 --- a/src/surrogate/init.c +++ b/src/surrogate/init.c @@ -1,5 +1,7 @@ #include #include +#include +#include #ifdef USE_TORCH #include @@ -10,6 +12,7 @@ struct network_surrogate_config net_surr_config = {0}; bool is_network_surrogate_configured = false; struct switch_at_struct switch_network_at; static struct packet_latency_predictor current_net_predictor = {0}; +static struct app_iteration_predictor current_iter_predictor = {0}; // === Stats! @@ -149,4 +152,25 @@ void network_surrogate_configure( fprintf(stderr, "Simulation starting on %s mode\n", net_surr_config.director.is_surrogate_on() ? "surrogate" : "high-fidelity"); } } + +void application_surrogate_configure( + int num_terminals_in_pe, + int num_apps, + struct app_iteration_predictor ** iter_pred //!< pointer to save application iteration predictor. Caller must free it +) { + // TODO: get configuration settings from common configuration file settings + struct avg_app_config predictor_config = { + .num_apps = num_apps, + .num_nodes_in_pe = num_terminals_in_pe, + .num_of_iters_to_feed = 5, + }; + int every_n_gvt = 100; + current_iter_predictor = avg_app_iteration_predictor(&predictor_config); + application_director_configure(every_n_gvt, ¤t_iter_predictor); + *iter_pred = ¤t_iter_predictor; +} + +void free_application_surrogate(void) { + free_avg_app_iteration_predictor(); +} // === END OF All things Surrogate Configuration From 53f51c478625ebf439b338afeb826c77ab3cd4ae Mon Sep 17 00:00:00 2001 From: helq Date: Mon, 9 Jun 2025 12:17:14 -0400 Subject: [PATCH 059/110] Fixing bug on predictor when app is not fully distributed across all PEs An app/workload was assumed to be distributed on all PEs, which is not always true. Now it is possible to have a workload in one part of the PE space than others. --- .../app-iteration-predictor/average.c | 59 ++++++++++++++++--- 1 file changed, 52 insertions(+), 7 deletions(-) diff --git a/src/surrogate/app-iteration-predictor/average.c b/src/surrogate/app-iteration-predictor/average.c index 77dfc954..f8105b2f 100644 --- a/src/surrogate/app-iteration-predictor/average.c +++ b/src/surrogate/app-iteration-predictor/average.c @@ -4,6 +4,8 @@ #include #include +#define master_printf(str, ...) if (g_tw_mynode == 0) { printf(str, __VA_ARGS__); } + static struct avg_app_config my_config = {0}; struct node_data { @@ -56,7 +58,7 @@ static void init(tw_lp * lp, int nw_id_in_pe, struct app_iter_node_config * conf // Storing app data info arr_app_data[config->app_id].num_nodes++; - if (arr_app_data[config->app_id].ending_iteration == -1) { + if (arr_app_data[config->app_id].ending_iteration == INT_MIN) { arr_app_data[config->app_id].ending_iteration = config->app_ending_iter; } else { if (arr_app_data[config->app_id].ending_iteration != config->app_ending_iter) { @@ -74,6 +76,9 @@ static void feed(tw_lp * lp, int nw_id_in_pe, int iter, double iteration_time) { if (node_data->last_iter >= iter) { // we only collect iteration data past the previous `last_iter` return; } + if (arr_app_data[node_data->app_id].ended != ENDED_STATUS_running) { + tw_warning(TW_LOC, "Attempting to feed data to application predictor for an application that has either been marked as completed or not configured"); + } node_data->acc_iteration_time += iteration_time - node_data->prev_iteration_time; node_data->prev_iteration_time = iteration_time; node_data->acc_iters++; @@ -117,8 +122,10 @@ static void reset_with(bool const * app_just_ended) { struct node_data * node_data = &arr_node_data[i]; node_data->acc_iters = 0; node_data->acc_iteration_time = 0; - node_data->last_iter = last_iter[node_data->app_id]; - node_data->prev_iteration_time = arr_app_data[node_data->app_id].pred.restart_at; + if (node_data->last_iter < arr_app_data[node_data->app_id].pred.resume_at_iter) { + node_data->last_iter = last_iter[node_data->app_id]; + node_data->prev_iteration_time = arr_app_data[node_data->app_id].pred.restart_at; + } } for (int i=0; i < my_config.num_apps; i++) { arr_app_data[i].nodes_with_enough_iters = 0; @@ -141,6 +148,35 @@ static bool have_we_hit_switch(tw_lp * lp, int nw_id_in_pe, int iteration_id) { return false; } +static inline void post_init_share_ending_iteration(void) { + // Sharing ending_iteration results across PEs + int ending_iteration_here[my_config.num_apps]; + for (int i = 0; i < my_config.num_apps; i++) { + ending_iteration_here[i] = arr_app_data[i].ending_iteration; + } + int ending_iteration[my_config.num_apps]; + if(MPI_Allreduce(ending_iteration_here, ending_iteration, my_config.num_apps, MPI_INT, MPI_MAX, MPI_COMM_CODES) != MPI_SUCCESS) { + tw_error(TW_LOC, "MPI_Allreduce call failed!"); + } + + // Checking that total iterations are the same across nodes + for (int i = 0; i < my_config.num_apps; i++) { + struct app_data * app_data = &arr_app_data[i]; + if (app_data->ending_iteration == INT_MIN) { + if (ending_iteration[i] == INT_MIN) { + app_data->ended = ENDED_STATUS_ended_everywhere; + master_printf("Workload/app %d has not been configured to be tracked by iteration predictor (it might be a synthetic workload)\n", i); + } else { + // The application has "completed" in this PE already! + app_data->ended = ENDED_STATUS_just_ended; + } + app_data->ending_iteration = ending_iteration[i]; + } else if (ending_iteration[i] != app_data->ending_iteration) { + tw_error(TW_LOC, "Two different ranks for application %d (on different PEs) have differing total iterations they will run (%d != %d)", i, ending_iteration[i], app_data->ending_iteration); + } + } +} + static inline bool has_any_app_ended(bool * save_app_just_ended) { // Checking any application has fully ended, in which case we have to restart collecting data bool app_just_ended_here[my_config.num_apps]; @@ -175,7 +211,9 @@ static inline bool has_everyone_accumulated_enough() { for (int i = 0; i < my_config.num_apps; i++) { struct app_data * app_data = &arr_app_data[i]; // ignoring apps that have ended already - if (app_data->ended != ENDED_STATUS_ended_everywhere) { + bool const app_in_pe = app_data->num_nodes > 0; + bool const hasnt_ended = app_data->completed != ENDED_STATUS_ended_everywhere; + if (app_in_pe && hasnt_ended) { everyone &= app_data->nodes_with_enough_iters == app_data->num_nodes; } } @@ -183,6 +221,11 @@ static inline bool has_everyone_accumulated_enough() { } static bool is_predictor_ready(void) { + static bool post_init_done = false; + if (!post_init_done) { + post_init_share_ending_iteration(); + post_init_done = true; + } bool app_just_ended[my_config.num_apps]; if (has_any_app_ended(app_just_ended)) { reset_with(app_just_ended); @@ -241,7 +284,7 @@ static void find_avg_iteration_time(double * save_avg_time) { static void find_max_iter_per_app(int * save_last_iter) { int last_iter_here[my_config.num_apps]; for (int i=0; i < my_config.num_apps; i++) { - last_iter_here[i] = -1; + last_iter_here[i] = INT_MIN; } for (int i=0; i < my_config.num_nodes_in_pe; i++) { struct node_data * node_data = &arr_node_data[i]; @@ -366,10 +409,12 @@ struct app_iteration_predictor avg_app_iteration_predictor(struct avg_app_config arr_node_data = calloc(my_config.num_nodes_in_pe, sizeof(struct node_data)); arr_app_data = calloc(my_config.num_apps, sizeof(struct app_data)); for (int i=0; i < my_config.num_nodes_in_pe; i++) { - arr_node_data[i].app_id = -1; + struct node_data * node_data = &arr_node_data[i]; + node_data->app_id = -1; + node_data->last_iter = INT_MIN; } for (int i=0; i < my_config.num_apps; i++) { - arr_app_data[i].ending_iteration = -1; + arr_app_data[i].ending_iteration = INT_MIN; } return (struct app_iteration_predictor) { .model = { From ffea77be9bd27a5281bbaa44da5b1f3edc1ac95b Mon Sep 17 00:00:00 2001 From: helq Date: Mon, 9 Jun 2025 12:20:25 -0400 Subject: [PATCH 060/110] Refactoring/renaming some fields to aid legibility --- .../app-iteration-predictor/average.c | 64 +++++++++---------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/src/surrogate/app-iteration-predictor/average.c b/src/surrogate/app-iteration-predictor/average.c index f8105b2f..0ecfcc89 100644 --- a/src/surrogate/app-iteration-predictor/average.c +++ b/src/surrogate/app-iteration-predictor/average.c @@ -17,10 +17,10 @@ struct node_data { }; static struct node_data * arr_node_data = NULL; // array containing info for all nodes -enum ENDED_STATUS { - ENDED_STATUS_running = 0, - ENDED_STATUS_just_ended, // fully ended in this PE - ENDED_STATUS_ended_everywhere, // fully ended on all PEs +enum APP_STATUS { + APP_STATUS_running = 0, + APP_STATUS_just_completed, // fully ended in this PE + APP_STATUS_completed_everywhere, // fully ended on all PEs }; struct app_data { @@ -28,7 +28,7 @@ struct app_data { int nodes_with_enough_iters; int ending_iteration; // last iteration the simulation will run (aka, num of iterations) int nodes_that_have_ended; - enum ENDED_STATUS ended; // use ended to stop accumulating data + enum APP_STATUS status; // use ended to stop accumulating data // To be used when called by the model. Set by `prepare_fast_forward_jump` struct { int jump_at_iter; @@ -46,7 +46,7 @@ static inline int app_id_for(int nw_id_in_pe) { } -static void init(tw_lp * lp, int nw_id_in_pe, struct app_iter_node_config * config) { +static void model_calls_init(tw_lp * lp, int nw_id_in_pe, struct app_iter_node_config * config) { assert(arr_node_data); if (my_config.num_nodes_in_pe <= nw_id_in_pe) { tw_error(TW_LOC, "Node id relative to PE (%d) is larger than the number of nodes %d", nw_id_in_pe, my_config.num_nodes_in_pe); @@ -68,7 +68,7 @@ static void init(tw_lp * lp, int nw_id_in_pe, struct app_iter_node_config * conf } -static void feed(tw_lp * lp, int nw_id_in_pe, int iter, double iteration_time) { +static void model_calls_feed(tw_lp * lp, int nw_id_in_pe, int iter, double iteration_time) { (void) lp; assert(my_config.num_nodes_in_pe > (size_t) nw_id_in_pe); assert(app_id_for(nw_id_in_pe) != -1); @@ -76,7 +76,7 @@ static void feed(tw_lp * lp, int nw_id_in_pe, int iter, double iteration_time) { if (node_data->last_iter >= iter) { // we only collect iteration data past the previous `last_iter` return; } - if (arr_app_data[node_data->app_id].ended != ENDED_STATUS_running) { + if (arr_app_data[node_data->app_id].status != APP_STATUS_running) { tw_warning(TW_LOC, "Attempting to feed data to application predictor for an application that has either been marked as completed or not configured"); } node_data->acc_iteration_time += iteration_time - node_data->prev_iteration_time; @@ -90,17 +90,17 @@ static void feed(tw_lp * lp, int nw_id_in_pe, int iter, double iteration_time) { } -static void ended(tw_lp * lp, int nw_id_in_pe, double iteration_time) { +static void model_calls_ended(tw_lp * lp, int nw_id_in_pe, double iteration_time) { assert(app_id_for(nw_id_in_pe) != -1); struct app_data * app_data = &arr_app_data[app_id_for(nw_id_in_pe)]; app_data->nodes_that_have_ended++; if (app_data->nodes_that_have_ended == app_data->num_nodes) { - app_data->ended = ENDED_STATUS_just_ended; + app_data->status = APP_STATUS_just_completed; } } -static struct iteration_pred predict(tw_lp * lp, int nw_id_in_pe) { +static struct iteration_pred model_calls_predict(tw_lp * lp, int nw_id_in_pe) { assert(my_config.num_nodes_in_pe > (size_t) nw_id_in_pe); assert(app_id_for(nw_id_in_pe) != -1); struct app_data * app_data = &arr_app_data[app_id_for(nw_id_in_pe)]; @@ -110,7 +110,7 @@ static struct iteration_pred predict(tw_lp * lp, int nw_id_in_pe) { }; } -static void predict_rc(tw_lp * lp, int nw_id_in_pe) {} +static void model_calls_predict_rc(tw_lp * lp, int nw_id_in_pe) {} static void reset_with(bool const * app_just_ended) { ready_to_skip = false; @@ -134,12 +134,12 @@ static void reset_with(bool const * app_just_ended) { // If an app just fully ended (ended on all PEs but hasn't been cleaned) then clean it for (int i = 0; i < my_config.num_apps; i++) { if (app_just_ended[i]) { - arr_app_data[i].ended = ENDED_STATUS_ended_everywhere; + arr_app_data[i].status = APP_STATUS_completed_everywhere; } } } -static bool have_we_hit_switch(tw_lp * lp, int nw_id_in_pe, int iteration_id) { +static bool model_calls_have_we_hit_switch(tw_lp * lp, int nw_id_in_pe, int iteration_id) { assert(my_config.num_nodes_in_pe > (size_t) nw_id_in_pe); int const app_id = app_id_for(nw_id_in_pe); if (ready_to_skip && iteration_id == arr_app_data[app_id].pred.jump_at_iter) { @@ -164,11 +164,11 @@ static inline void post_init_share_ending_iteration(void) { struct app_data * app_data = &arr_app_data[i]; if (app_data->ending_iteration == INT_MIN) { if (ending_iteration[i] == INT_MIN) { - app_data->ended = ENDED_STATUS_ended_everywhere; + app_data->status = APP_STATUS_completed_everywhere; master_printf("Workload/app %d has not been configured to be tracked by iteration predictor (it might be a synthetic workload)\n", i); } else { // The application has "completed" in this PE already! - app_data->ended = ENDED_STATUS_just_ended; + app_data->status = APP_STATUS_just_completed; } app_data->ending_iteration = ending_iteration[i]; } else if (ending_iteration[i] != app_data->ending_iteration) { @@ -182,7 +182,7 @@ static inline bool has_any_app_ended(bool * save_app_just_ended) { bool app_just_ended_here[my_config.num_apps]; for (int i = 0; i < my_config.num_apps; i++) { struct app_data * app_data = &arr_app_data[i]; - app_just_ended_here[i] = app_data->ended == ENDED_STATUS_just_ended; + app_just_ended_here[i] = app_data->status == APP_STATUS_just_completed; } if(MPI_Allreduce(&app_just_ended_here, save_app_just_ended, my_config.num_apps, MPI_C_BOOL, MPI_LAND, MPI_COMM_CODES) != MPI_SUCCESS) { tw_error(TW_LOC, "MPI_Allreduce call failed!"); @@ -198,7 +198,7 @@ static inline bool has_any_app_ended(bool * save_app_just_ended) { static inline bool all_apps_ended(void) { for (int i = 0; i < my_config.num_apps; i++) { struct app_data * app_data = &arr_app_data[i]; - if (app_data->ended != ENDED_STATUS_ended_everywhere) { + if (app_data->status != APP_STATUS_completed_everywhere) { return false; } } @@ -212,7 +212,7 @@ static inline bool has_everyone_accumulated_enough() { struct app_data * app_data = &arr_app_data[i]; // ignoring apps that have ended already bool const app_in_pe = app_data->num_nodes > 0; - bool const hasnt_ended = app_data->completed != ENDED_STATUS_ended_everywhere; + bool const hasnt_ended = app_data->status != APP_STATUS_completed_everywhere; if (app_in_pe && hasnt_ended) { everyone &= app_data->nodes_with_enough_iters == app_data->num_nodes; } @@ -220,7 +220,7 @@ static inline bool has_everyone_accumulated_enough() { return everyone; } -static bool is_predictor_ready(void) { +static bool director_calls_is_predictor_ready(void) { static bool post_init_done = false; if (!post_init_done) { post_init_share_ending_iteration(); @@ -246,7 +246,7 @@ static bool is_predictor_ready(void) { } -static void reset(void) { +static void director_calls_reset(void) { bool app_just_ended[my_config.num_apps]; has_any_app_ended(app_just_ended); reset_with(app_just_ended); @@ -327,11 +327,11 @@ static void find_avg_time_for_max_iter(double * save_last_iter_time, int const * } } -static struct fast_forward_values prepare_fast_forward_jump(void) { +static struct fast_forward_values director_calls_prepare_fast_forward_jump(void) { // 0. Check if app is still running bool is_running[my_config.num_apps]; for (int i=0; i < my_config.num_apps; i++) { - is_running[i] = arr_app_data[i].ended != ENDED_STATUS_ended_everywhere; + is_running[i] = arr_app_data[i].status != APP_STATUS_completed_everywhere; } // 1. Compute end time for each application given current data (pick smallest) // a. Find avg iteration per app @@ -418,17 +418,17 @@ struct app_iteration_predictor avg_app_iteration_predictor(struct avg_app_config } return (struct app_iteration_predictor) { .model = { - .init = init, - .feed = feed, - .ended = ended, - .predict = predict, - .predict_rc = predict_rc, - .have_we_hit_switch = have_we_hit_switch, + .init = model_calls_init, + .feed = model_calls_feed, + .ended = model_calls_ended, + .predict = model_calls_predict, + .predict_rc = model_calls_predict_rc, + .have_we_hit_switch = model_calls_have_we_hit_switch, }, .director = { - .reset = reset, - .is_predictor_ready = is_predictor_ready, - .prepare_fast_forward_jump = prepare_fast_forward_jump, + .reset = director_calls_reset, + .is_predictor_ready = director_calls_is_predictor_ready, + .prepare_fast_forward_jump = director_calls_prepare_fast_forward_jump, } }; } From 763a71f2640026d09afd548c8d6e0199b2728e3f Mon Sep 17 00:00:00 2001 From: helq Date: Mon, 9 Jun 2025 12:21:55 -0400 Subject: [PATCH 061/110] De-harcoding parameters passed down by model-net-mpi-replay at init --- codes/codes_mapping.h | 3 +++ src/network-workloads/model-net-mpi-replay.c | 5 +++-- src/util/codes_mapping.c | 15 +++++++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/codes/codes_mapping.h b/codes/codes_mapping.h index 0e46447b..6d83098c 100644 --- a/codes/codes_mapping.h +++ b/codes/codes_mapping.h @@ -23,6 +23,9 @@ extern "C" { /* Returns number of LPs on the current PE */ int codes_mapping_get_lps_for_pe(void); +/* Returns the number of LPs of the given type name */ +tw_lpid codes_mapping_count_lps_of_type(char const lp_type_name[MAX_NAME_LENGTH]); + /* Takes the global LP ID and returns the rank (PE id) on which the LP is mapped.*/ tw_peid codes_mapping( tw_lpid gid); diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 2d0edd45..a75020e4 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -4454,8 +4454,9 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) assert(ret == 0 || !"lp_io_prepare failure"); } - // TODO: read from config whether to load iterator predictor - application_surrogate_configure(24, 2, &iter_predictor); + tw_lpid const num_nw_lps_in_pe = codes_mapping_count_lps_of_type("nw-lp"); + int const num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx); + application_surrogate_configure(num_nw_lps_in_pe, num_jobs, &iter_predictor); tw_run(); diff --git a/src/util/codes_mapping.c b/src/util/codes_mapping.c index 9a8554ba..359a6622 100644 --- a/src/util/codes_mapping.c +++ b/src/util/codes_mapping.c @@ -519,6 +519,21 @@ static void codes_mapping_init(void) return; } +tw_lpid codes_mapping_count_lps_of_type(char const lp_type_name[MAX_NAME_LENGTH]) +{ + tw_lpid count = 0; + for (tw_lpid lpid = 0; lpid < g_tw_nlp; lpid ++) { + tw_lpid ross_gid = g_tw_lp[lpid]->gid; + int grp_id, lpt_id, rep_id, offset; + char this_lp_type[MAX_NAME_LENGTH]; + codes_mapping_get_lp_info(ross_gid, NULL, &grp_id, this_lp_type, &lpt_id, NULL, &rep_id, &offset); // This lookup could be speed up, but making this call is far simpler rn + if (strncmp(lp_type_name, this_lp_type, MAX_NAME_LENGTH)) { + count++; + } + } + return count; +} + /* This function takes the global LP ID, maps it to the local LP ID and returns the LP * lps have global and local LP IDs * global LP IDs are unique across all PEs, local LP IDs are unique within a PE */ From 86c25cde28d81fdc38033e374393dfbef1dada43 Mon Sep 17 00:00:00 2001 From: helq Date: Mon, 9 Jun 2025 13:05:27 -0400 Subject: [PATCH 062/110] Configuring application surrogate through config file --- .../app-iteration-predictor/average.h | 2 +- src/network-workloads/model-net-mpi-replay.c | 25 ++++++++++++++++--- .../app-iteration-predictor/average.c | 2 +- src/surrogate/init.c | 8 ++++-- 4 files changed, 30 insertions(+), 7 deletions(-) diff --git a/codes/surrogate/app-iteration-predictor/average.h b/codes/surrogate/app-iteration-predictor/average.h index 0ec22283..0d3ace0d 100644 --- a/codes/surrogate/app-iteration-predictor/average.h +++ b/codes/surrogate/app-iteration-predictor/average.h @@ -11,7 +11,7 @@ struct avg_app_config { int num_apps; int num_nodes_in_pe; - int num_of_iters_to_feed; + int num_iters_to_collect; }; struct app_iteration_predictor avg_app_iteration_predictor(struct avg_app_config *); diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index a75020e4..6c6b0c72 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -4103,6 +4103,27 @@ void modelnet_mpi_replay_read_config() } +void modelnet_mpi_replay_configure_app_surrogate() +{ + char app_surrogate_test[MAX_NAME_LENGTH]; + app_surrogate_test[0] = '\0'; + int app_surrogate_len = configuration_get_value(&config, "APPLICATION_SURROGATE", "enable", NULL, app_surrogate_test, MAX_NAME_LENGTH); + + // Only configure if APPLICATION_SURROGATE is present and enabled + if (app_surrogate_len == 0 || atoi(app_surrogate_test) == 0) { + return; + } + + tw_lpid const num_nw_lps_in_pe = codes_mapping_count_lps_of_type("nw-lp"); + int const num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx); + application_surrogate_configure(num_nw_lps_in_pe, num_jobs, &iter_predictor); + + if (g_tw_mynode == 0) { + printf("Application surrogacy configured with a total of %d jobs\n", num_jobs); + } +} + + int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) { int rank; @@ -4454,9 +4475,7 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) assert(ret == 0 || !"lp_io_prepare failure"); } - tw_lpid const num_nw_lps_in_pe = codes_mapping_count_lps_of_type("nw-lp"); - int const num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx); - application_surrogate_configure(num_nw_lps_in_pe, num_jobs, &iter_predictor); + modelnet_mpi_replay_configure_app_surrogate(); tw_run(); diff --git a/src/surrogate/app-iteration-predictor/average.c b/src/surrogate/app-iteration-predictor/average.c index 0ecfcc89..6fc254a6 100644 --- a/src/surrogate/app-iteration-predictor/average.c +++ b/src/surrogate/app-iteration-predictor/average.c @@ -84,7 +84,7 @@ static void model_calls_feed(tw_lp * lp, int nw_id_in_pe, int iter, double itera node_data->acc_iters++; node_data->last_iter = iter; // We've hit the required number of iterations to feed our predictor - if (node_data->acc_iters == my_config.num_of_iters_to_feed) { + if (node_data->acc_iters == my_config.num_iters_to_collect) { arr_app_data[node_data->app_id].nodes_with_enough_iters++; } } diff --git a/src/surrogate/init.c b/src/surrogate/init.c index 0a7386a4..47ccc2db 100644 --- a/src/surrogate/init.c +++ b/src/surrogate/init.c @@ -158,11 +158,15 @@ void application_surrogate_configure( int num_apps, struct app_iteration_predictor ** iter_pred //!< pointer to save application iteration predictor. Caller must free it ) { - // TODO: get configuration settings from common configuration file settings + char num_iters_str[MAX_NAME_LENGTH]; + num_iters_str[0] = '\0'; + int const rc = configuration_get_value(&config, "APPLICATION_SURROGATE", "num_iters_to_collect", NULL, num_iters_str, MAX_NAME_LENGTH); + int const num_of_iters_to_feed = (rc > 0) ? atoi(num_iters_str) : 5; // default to 5 if not specified + struct avg_app_config predictor_config = { .num_apps = num_apps, .num_nodes_in_pe = num_terminals_in_pe, - .num_of_iters_to_feed = 5, + .num_iters_to_collect = num_of_iters_to_feed, }; int every_n_gvt = 100; current_iter_predictor = avg_app_iteration_predictor(&predictor_config); From fa56d854bacfd9923bf9e4915863fb7f78ed718f Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 10 Jun 2025 10:50:34 -0400 Subject: [PATCH 063/110] Passing data from non-synthetic workloads to CODES through interface The extended interface asks the workload generator to set extra information in order to run in application-surrogate mode. If this information is not given, the surrogate will not run. --- codes/codes-workload.h | 4 ++ src/network-workloads/model-net-mpi-replay.c | 15 ++-- .../app-iteration-predictor/average.c | 4 +- src/workload/codes-workload.c | 7 ++ .../methods/codes-conc-online-comm-wrkld.C | 71 ++++++++++++++++++- .../methods/codes-online-comm-wrkld.C | 51 ++++++++++++- 6 files changed, 143 insertions(+), 9 deletions(-) diff --git a/codes/codes-workload.h b/codes/codes-workload.h index 4722b5a4..e83ef3d1 100644 --- a/codes/codes-workload.h +++ b/codes/codes-workload.h @@ -354,6 +354,9 @@ int codes_workload_get_time(const char *type, int app_id, int rank, double *read_time, double *write_time, int64_t *read_bytes, int64_t *written_bytes); +// Returns the final iteration (positive) after which the workload will stop. If the result is -1, then there is nothing to do +int codes_workload_get_final_iteration(int wkld_id, int app_id, int rank); + /* implementation structure */ struct codes_workload_method { @@ -368,6 +371,7 @@ struct codes_workload_method int (*codes_workload_finalize)(const char* params, int app_id, int rank); /* added for get all read or write time */ int (*codes_workload_get_time)(const char * params, int app_id, int rank, double *read_time, double *write_time, int64_t *read_bytes, int64_t *written_bytes); + int (*codes_workload_get_final_iteration)(int app_id, int rank); }; diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 6c6b0c72..0468663c 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -2781,11 +2781,16 @@ void nw_test_init(nw_state* s, tw_lp* lp) s->switch_config_size = 0; } if (iter_predictor && !am_i_synthetic) { - struct app_iter_node_config conf = { - .app_id = s->app_id, - .app_ending_iter = s->app_id ? 19 : 20, - }; - iter_predictor->model.init(lp, s->nw_id_in_pe, &conf); + int const ending_iter = codes_workload_get_final_iteration(s->wrkld_id, s->app_id, s->local_rank); + if (ending_iter == -1) { + tw_warning(TW_LOC, "Predictor for non-synthetic job cannot be initialized. app id=%d", s->app_id); + } else { + struct app_iter_node_config conf = { + .app_id = s->app_id, + .app_ending_iter = ending_iter, + }; + iter_predictor->model.init(lp, s->nw_id_in_pe, &conf); + } } return; diff --git a/src/surrogate/app-iteration-predictor/average.c b/src/surrogate/app-iteration-predictor/average.c index 6fc254a6..5f04e846 100644 --- a/src/surrogate/app-iteration-predictor/average.c +++ b/src/surrogate/app-iteration-predictor/average.c @@ -71,7 +71,9 @@ static void model_calls_init(tw_lp * lp, int nw_id_in_pe, struct app_iter_node_c static void model_calls_feed(tw_lp * lp, int nw_id_in_pe, int iter, double iteration_time) { (void) lp; assert(my_config.num_nodes_in_pe > (size_t) nw_id_in_pe); - assert(app_id_for(nw_id_in_pe) != -1); + if (app_id_for(nw_id_in_pe) == -1) { + tw_error(TW_LOC, "Predictor for node was not initialized! Node ID (on PE) %d", nw_id_in_pe); + } struct node_data * node_data = &arr_node_data[nw_id_in_pe]; if (node_data->last_iter >= iter) { // we only collect iteration data past the previous `last_iter` return; diff --git a/src/workload/codes-workload.c b/src/workload/codes-workload.c index 45efc8c0..45657be3 100644 --- a/src/workload/codes-workload.c +++ b/src/workload/codes-workload.c @@ -368,6 +368,13 @@ int codes_workload_get_rank_cnt( return(-1); } +int codes_workload_get_final_iteration(int wkld_id, int app_id, int rank) { + if (method_array[wkld_id]->codes_workload_get_final_iteration) { + return method_array[wkld_id]->codes_workload_get_final_iteration(app_id, rank); + } + return -1; +} + void codes_workload_print_op( FILE *f, struct codes_workload_op *op, diff --git a/src/workload/methods/codes-conc-online-comm-wrkld.C b/src/workload/methods/codes-conc-online-comm-wrkld.C index 7cb36466..a78f9abf 100644 --- a/src/workload/methods/codes-conc-online-comm-wrkld.C +++ b/src/workload/methods/codes-conc-online-comm-wrkld.C @@ -73,6 +73,10 @@ struct shared_context { bool isconc; ABT_thread producer; std::deque fifo; + struct { + bool received; + int final_iteration; + } init_data_from_workload; }; struct rank_mpi_context { @@ -86,8 +90,26 @@ typedef struct rank_mpi_compare { int rank; } rank_mpi_compare; - /* Conceptual online workload implementations */ + +void UNION_Pass_app_data(struct union_app_data * app_data) { + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err; + + err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + + sctx->init_data_from_workload.received = true; + sctx->init_data_from_workload.final_iteration = app_data->final_iteration; + + ABT_thread_yield_to(global_prod_thread); +} + void UNION_MPI_Comm_size (UNION_Comm comm, int *size) { /* Retreive the shared context state */ @@ -1014,6 +1036,21 @@ void UNION_MPI_Alltoall(const void *sendbuf, //#ifdef USE_SWM +void SWM_Pass_app_data(struct swm_app_data *app_data) { + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + sctx->init_data_from_workload.received = true; + sctx->init_data_from_workload.final_iteration = app_data->final_iteration; + + ABT_thread_yield_to(global_prod_thread); +} + /* * peer: the receiving peer id * comm_id: the communicator id being used @@ -1814,6 +1851,7 @@ static int comm_online_workload_load(const void * params, int app_id, int rank) my_ctx->sctx.num_ranks = nprocs; my_ctx->sctx.wait_id = 0; my_ctx->app_id = app_id; + my_ctx->sctx.init_data_from_workload.received = false; // printf("my_ctx nprocs %d\n", my_ctx->sctx.num_ranks); @@ -1943,6 +1981,12 @@ static int comm_online_workload_load(const void * params, int app_id, int rank) &workload_caller, (void*)&(my_ctx->sctx), ABT_THREAD_ATTR_NULL, &(my_ctx->sctx.producer)); + // Running thread that we just spawn until the producer adds an OP to FIFO or SWM_Mark_total_iterations is called. We use SWM_Mark_total_iterations in order to pass information into CODES from the SWM app. + while(my_ctx->sctx.fifo.empty() && !my_ctx->sctx.init_data_from_workload.received) + { + ABT_thread_yield_to(my_ctx->sctx.producer); + } + if(DBG_LINKING) { printf("\nRank %d create app thread? %d", rank, rcode); @@ -2049,6 +2093,25 @@ static int comm_online_workload_finalize(const char* params, int app_id, int ran } return 0; } + +static int comm_online_workload_get_final_iteration(int app_id, int rank) { + rank_mpi_compare cmp; + cmp.app_id = app_id; + cmp.rank = rank; + + struct qhash_head * hash_link = qhash_search(rank_tbl, &cmp); + if(!hash_link) + { + printf("Workload/job not found for rank id %d, and app_id %d\n", rank, app_id); + return -1; + } + rank_mpi_context * ctx = qhash_entry(hash_link, rank_mpi_context, hash_link); + if (ctx->sctx.init_data_from_workload.received) { + return ctx->sctx.init_data_from_workload.final_iteration; + } + return -1; +} + extern "C" { /* workload method name and function pointers for the CODES workload API */ struct codes_workload_method conc_online_comm_workload_method = @@ -2066,7 +2129,11 @@ struct codes_workload_method conc_online_comm_workload_method = // .codes_workload_get_rank_cnt comm_online_workload_get_rank_cnt, // .codes_workload_finalize = - comm_online_workload_finalize + comm_online_workload_finalize, + // .codes_workload_get_time = + NULL, + // .codes_workload_get_final_iteration = + comm_online_workload_get_final_iteration, }; } // closing brace for extern "C" diff --git a/src/workload/methods/codes-online-comm-wrkld.C b/src/workload/methods/codes-online-comm-wrkld.C index ca6978c6..8d783403 100644 --- a/src/workload/methods/codes-online-comm-wrkld.C +++ b/src/workload/methods/codes-online-comm-wrkld.C @@ -66,6 +66,10 @@ struct shared_context { void * swm_obj; ABT_thread producer; std::deque fifo; + struct { + bool received; + int final_iteration; + } init_data_from_workload; }; struct rank_mpi_context { @@ -79,6 +83,21 @@ typedef struct rank_mpi_compare { int rank; } rank_mpi_compare; +void SWM_Pass_app_data(struct swm_app_data *app_data) { + /* Retreive the shared context state */ + ABT_thread prod; + void * arg; + int err = ABT_thread_self(&prod); + assert(err == ABT_SUCCESS); + err = ABT_thread_get_arg(prod, &arg); + assert(err == ABT_SUCCESS); + struct shared_context * sctx = static_cast(arg); + sctx->init_data_from_workload.received = true; + sctx->init_data_from_workload.final_iteration = app_data->final_iteration; + + ABT_thread_yield_to(global_prod_thread); +} + /* * peer: the receiving peer id * comm_id: the communicator id being used @@ -946,6 +965,7 @@ static int comm_online_workload_load(const void * params, int app_id, int rank) my_ctx->sctx.num_ranks = nprocs; my_ctx->sctx.wait_id = 0; my_ctx->app_id = app_id; + my_ctx->sctx.init_data_from_workload.received = false; void** generic_ptrs; int array_len = 1; @@ -1038,6 +1058,12 @@ static int comm_online_workload_load(const void * params, int app_id, int rank) &workload_caller, (void*)&(my_ctx->sctx), ABT_THREAD_ATTR_NULL, &(my_ctx->sctx.producer)); + // Running thread that we just spawn until the producer adds an OP to FIFO or SWM_Mark_total_iterations is called. We use SWM_Mark_total_iterations in order to pass information into CODES from the SWM app. + while(my_ctx->sctx.fifo.empty() && !my_ctx->sctx.init_data_from_workload.received) + { + ABT_thread_yield_to(my_ctx->sctx.producer); + } + rank_mpi_compare cmp; cmp.app_id = app_id; cmp.rank = rank; @@ -1112,6 +1138,25 @@ static int comm_online_workload_finalize(const char* params, int app_id, int ran ABT_thread_free(&(temp_data->sctx.producer)); return 0; } + +static int comm_online_workload_get_final_iteration(int app_id, int rank) { + rank_mpi_compare cmp; + cmp.app_id = app_id; + cmp.rank = rank; + + struct qhash_head * hash_link = qhash_search(rank_tbl, &cmp); + if(!hash_link) + { + printf("Workload/job not found for rank id %d, and app_id %d\n", rank, app_id); + return -1; + } + rank_mpi_context * ctx = qhash_entry(hash_link, rank_mpi_context, hash_link); + if (ctx->sctx.init_data_from_workload.received) { + return ctx->sctx.init_data_from_workload.final_iteration; + } + return -1; +} + extern "C" { /* workload method name and function pointers for the CODES workload API */ struct codes_workload_method swm_online_comm_workload_method = @@ -1129,7 +1174,11 @@ struct codes_workload_method swm_online_comm_workload_method = // .codes_workload_get_rank_cnt comm_online_workload_get_rank_cnt, // .codes_workload_finalize = - comm_online_workload_finalize + comm_online_workload_finalize, + // .codes_workload_get_time = + NULL, + // .codes_workload_get_final_iteration + comm_online_workload_get_final_iteration, }; } // closing brace for extern "C" From 2433b8ba4dfe41427ccfe1377fcdd525e67b96a2 Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 10 Jun 2025 15:41:04 -0400 Subject: [PATCH 064/110] Allowing surrogate to run in sequential mode --- codes/surrogate/application-surrogate.h | 17 +++- src/network-workloads/model-net-mpi-replay.c | 4 - src/surrogate/application-surrogate.c | 25 ++++- src/surrogate/init.c | 100 ++++++++++++++++++- 4 files changed, 134 insertions(+), 12 deletions(-) diff --git a/codes/surrogate/application-surrogate.h b/codes/surrogate/application-surrogate.h index 184a4f98..111df4ef 100644 --- a/codes/surrogate/application-surrogate.h +++ b/codes/surrogate/application-surrogate.h @@ -15,8 +15,23 @@ extern "C" { #endif +enum APP_DIRECTOR_OPTS { + APP_DIRECTOR_OPTS_every_n_gvt = 0, // Call director every `n` GVTs + APP_DIRECTOR_OPTS_call_every_ns, // Call director every X (virtual) nanoseconds +}; + +struct application_director_config { + enum APP_DIRECTOR_OPTS option; + union { + // To use when APP_DIRECTOR_OPTS_every_n_gvt + int every_n_gvt; + // To use when APP_DIRECTOR_OPTS_call_every_ns + double call_every_ns; + }; +}; + // Main function responsible for switching between high-fidelity and (application iteration) surrogate -void application_director_configure(int every_n_gvt, struct app_iteration_predictor *); +void application_director_configure(struct application_director_config *, struct app_iteration_predictor *); #ifdef __cplusplus } diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 0468663c..4398e50e 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -4122,10 +4122,6 @@ void modelnet_mpi_replay_configure_app_surrogate() tw_lpid const num_nw_lps_in_pe = codes_mapping_count_lps_of_type("nw-lp"); int const num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx); application_surrogate_configure(num_nw_lps_in_pe, num_jobs, &iter_predictor); - - if (g_tw_mynode == 0) { - printf("Application surrogacy configured with a total of %d jobs\n", num_jobs); - } } diff --git a/src/surrogate/application-surrogate.c b/src/surrogate/application-surrogate.c index 87a8dc74..48677cfd 100644 --- a/src/surrogate/application-surrogate.c +++ b/src/surrogate/application-surrogate.c @@ -2,7 +2,7 @@ #include static struct app_iteration_predictor * iter_predictor; -static int every_n_gvt = 1; +static struct application_director_config conf = {.option = APP_DIRECTOR_OPTS_call_every_ns, .every_n_gvt = 1000000}; static enum { PRE_JUMP = 0, POST_JUMP_switched, // Switched to surrogate-mode @@ -18,6 +18,11 @@ static enum { #define master_printf(str, ...) if (g_tw_mynode == 0) { printf(str, __VA_ARGS__); } static void application_director_pre_switch(tw_pe * pe) { + // Scheduling next GVT hook call if it is not scheduled every tw_trigger_gvt_hook_every + if (conf.option == APP_DIRECTOR_OPTS_call_every_ns) { + tw_trigger_gvt_hook_at(gvt_for(pe) + conf.call_every_ns); + } + if (!iter_predictor->director.is_predictor_ready()) { return; } @@ -38,7 +43,13 @@ static void application_director_pre_switch(tw_pe * pe) { } static void application_director_post_switch(tw_pe * pe) { - tw_trigger_gvt_hook_every(every_n_gvt); + // Scheduling next GVT hook call + if (conf.option == APP_DIRECTOR_OPTS_call_every_ns) { + tw_trigger_gvt_hook_at(gvt_for(pe) + conf.call_every_ns); + } else { + tw_trigger_gvt_hook_every(conf.every_n_gvt); + } + iter_predictor->director.reset(); if (director_state == POST_JUMP_switched) { @@ -65,10 +76,14 @@ void application_director(tw_pe * pe) { } } -void application_director_configure(int every_n_gvt_, struct app_iteration_predictor * iter_predictor_) { - every_n_gvt = every_n_gvt_; +void application_director_configure(struct application_director_config * conf_, struct app_iteration_predictor * iter_predictor_) { + conf = *conf_; iter_predictor = iter_predictor_; g_tw_gvt_hook = application_director; director_state = PRE_JUMP; - tw_trigger_gvt_hook_every(every_n_gvt); + if (conf.option == APP_DIRECTOR_OPTS_every_n_gvt) { + tw_trigger_gvt_hook_every(conf.every_n_gvt); + } else { + tw_trigger_gvt_hook_at(conf.call_every_ns); + } } diff --git a/src/surrogate/init.c b/src/surrogate/init.c index 47ccc2db..c6a0a6aa 100644 --- a/src/surrogate/init.c +++ b/src/surrogate/init.c @@ -7,6 +7,8 @@ #include #endif +#define master_printf(...) if (g_tw_mynode == 0) { printf(__VA_ARGS__); } + bool freeze_network_on_switch = true; struct network_surrogate_config net_surr_config = {0}; bool is_network_surrogate_configured = false; @@ -153,6 +155,86 @@ void network_surrogate_configure( } } +static int load_and_validate_int_param(const char* param_name, int default_value) { + char param_str[MAX_NAME_LENGTH]; + param_str[0] = '\0'; + int const rc = configuration_get_value(&config, "APPLICATION_SURROGATE", param_name, NULL, param_str, MAX_NAME_LENGTH); + int value = (rc > 0) ? atoi(param_str) : default_value; + + if (value <= 0) { + tw_warning(TW_LOC, "%s must be a positive integer, got %d. Using default value %d.", param_name, value, default_value); + value = default_value; + } + + return value; +} + +static struct application_director_config load_director_config(void) { + int const default_gvt = 100; + int const default_ns = 1000000; // 1ms + + enum { + MODE_NOT_SET, + MODE_EVERY_N_GVT, + MODE_EVERY_N_NANOSECONDS, + MODE_UNKNOWN + } mode; + + char director_mode[MAX_NAME_LENGTH]; + director_mode[0] = '\0'; + int const rc_mode = configuration_get_value(&config, "APPLICATION_SURROGATE", "director_mode", NULL, director_mode, MAX_NAME_LENGTH); + + if (rc_mode == 0) { + mode = MODE_NOT_SET; + } else if (strcmp(director_mode, "every-n-gvt") == 0) { + mode = MODE_EVERY_N_GVT; + } else if (strcmp(director_mode, "every-n-nanoseconds") == 0) { + mode = MODE_EVERY_N_NANOSECONDS; + } else { + mode = MODE_UNKNOWN; + } + + int every_n_gvt = load_and_validate_int_param("director_num_gvt", default_gvt); + int every_n_ns = load_and_validate_int_param("director_num_ns", default_ns); + + bool const is_sequential = (g_tw_synchronization_protocol == SEQUENTIAL || + g_tw_synchronization_protocol == SEQUENTIAL_ROLLBACK_CHECK); + + struct application_director_config config; + switch (mode) { + case MODE_EVERY_N_GVT: + if (is_sequential) { + tw_warning(TW_LOC, "Cannot use 'every-n-gvt' mode in sequential simulation. Forcing 'every-n-nanoseconds' mode."); + config.option = APP_DIRECTOR_OPTS_call_every_ns; + config.call_every_ns = every_n_ns; + } else { + config.option = APP_DIRECTOR_OPTS_every_n_gvt; + config.every_n_gvt = every_n_gvt; + } + break; + + case MODE_EVERY_N_NANOSECONDS: + config.option = APP_DIRECTOR_OPTS_call_every_ns; + config.call_every_ns = every_n_ns; + break; + + case MODE_UNKNOWN: + tw_warning(TW_LOC, "Unknown director_mode '%s'. Using default mode 'every-n-nanoseconds'.", director_mode); + config.option = APP_DIRECTOR_OPTS_call_every_ns; + config.call_every_ns = every_n_ns; + break; + + case MODE_NOT_SET: + default: + tw_warning(TW_LOC, "director_mode not set. Using default mode 'every-n-nanoseconds'."); + config.option = APP_DIRECTOR_OPTS_call_every_ns; + config.call_every_ns = every_n_ns; + break; + } + + return config; +} + void application_surrogate_configure( int num_terminals_in_pe, int num_apps, @@ -168,10 +250,24 @@ void application_surrogate_configure( .num_nodes_in_pe = num_terminals_in_pe, .num_iters_to_collect = num_of_iters_to_feed, }; - int every_n_gvt = 100; + + struct application_director_config app_dir_config = load_director_config(); + current_iter_predictor = avg_app_iteration_predictor(&predictor_config); - application_director_configure(every_n_gvt, ¤t_iter_predictor); + application_director_configure(&app_dir_config, ¤t_iter_predictor); *iter_pred = ¤t_iter_predictor; + + // Printing configuration summary + master_printf("\nApplication surrogate configuration:\n"); + master_printf(" Predictor - num_apps: %d, num_iters_to_collect: %d\n", + predictor_config.num_apps, predictor_config.num_iters_to_collect); + + if (app_dir_config.option == APP_DIRECTOR_OPTS_every_n_gvt) { + master_printf(" Director - mode: every-n-gvt, every_n_gvt: %d\n", app_dir_config.every_n_gvt); + } else { + master_printf(" Director - mode: every-n-nanoseconds, call_every_ns: %e\n", app_dir_config.call_every_ns); + } + master_printf("\n"); } void free_application_surrogate(void) { From 26fa2acd5586c26ff62e6c262dad15ad74aa01ad Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 10 Jun 2025 15:48:59 -0400 Subject: [PATCH 065/110] Minor cosmetic change --- src/network-workloads/model-net-mpi-replay.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 4398e50e..72b8f75f 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -748,7 +748,7 @@ void handle_other_finish( assert(ns->app_id == 0); //make sure that only the root workload is getting this notification assert(ns->local_rank == 0); //make sure that only the root rank is getting this notification - printf("App %d: Received finished workload notification",ns->app_id); + printf("App %d: Received finished workload notification\n", ns->app_id); // if(is_job_synthetic[ns->app_id]) // return; //nothing for synthetic (background) ranks to do here // printf(" And I am not synthetic\n"); From 1b0bdab330212e74ff661cf063daebbe0cebcebd Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 10 Jun 2025 16:17:42 -0400 Subject: [PATCH 066/110] Light refactoring of a large function in the application predictor --- .../app-iteration-predictor/average.c | 204 ++++++++++++------ 1 file changed, 136 insertions(+), 68 deletions(-) diff --git a/src/surrogate/app-iteration-predictor/average.c b/src/surrogate/app-iteration-predictor/average.c index 5f04e846..615594a5 100644 --- a/src/surrogate/app-iteration-predictor/average.c +++ b/src/surrogate/app-iteration-predictor/average.c @@ -41,6 +41,12 @@ static bool ready_to_skip = false; static void find_max_iter_per_app(int * save_last_iter); +static inline void mpi_allreduce_int_max(int const * local_data, int * result_data, int count); +static inline void mpi_allreduce_int_sum(int const * local_data, int * result_data, int count); +static inline void mpi_allreduce_double_sum(double const * local_data, double * result_data, int count); +static inline void mpi_allreduce_bool_and(bool const * local_data, bool * result_data, int count); +static inline void init_int_array(int * array, int size, int value); +static inline void init_double_array(double * array, int size, double value); static inline int app_id_for(int nw_id_in_pe) { return arr_node_data[nw_id_in_pe].app_id; } @@ -157,9 +163,7 @@ static inline void post_init_share_ending_iteration(void) { ending_iteration_here[i] = arr_app_data[i].ending_iteration; } int ending_iteration[my_config.num_apps]; - if(MPI_Allreduce(ending_iteration_here, ending_iteration, my_config.num_apps, MPI_INT, MPI_MAX, MPI_COMM_CODES) != MPI_SUCCESS) { - tw_error(TW_LOC, "MPI_Allreduce call failed!"); - } + mpi_allreduce_int_max(ending_iteration_here, ending_iteration, my_config.num_apps); // Checking that total iterations are the same across nodes for (int i = 0; i < my_config.num_apps; i++) { @@ -186,9 +190,7 @@ static inline bool has_any_app_ended(bool * save_app_just_ended) { struct app_data * app_data = &arr_app_data[i]; app_just_ended_here[i] = app_data->status == APP_STATUS_just_completed; } - if(MPI_Allreduce(&app_just_ended_here, save_app_just_ended, my_config.num_apps, MPI_C_BOOL, MPI_LAND, MPI_COMM_CODES) != MPI_SUCCESS) { - tw_error(TW_LOC, "MPI_Allreduce call failed!"); - } + mpi_allreduce_bool_and(app_just_ended_here, save_app_just_ended, my_config.num_apps); for (int i = 0; i < my_config.num_apps; i++) { if (save_app_just_ended[i]) { return true; @@ -241,9 +243,7 @@ static bool director_calls_is_predictor_ready(void) { // check that all applications have collected data for enough iterations to jump ahead bool const everyone_ready_here = has_everyone_accumulated_enough(); bool everyone_ready; - if(MPI_Allreduce(&everyone_ready_here, &everyone_ready, 1, MPI_C_BOOL, MPI_LAND, MPI_COMM_CODES) != MPI_SUCCESS) { - tw_error(TW_LOC, "MPI_Allreduce call failed!"); - } + mpi_allreduce_bool_and(&everyone_ready_here, &everyone_ready, 1); return everyone_ready; } @@ -257,10 +257,8 @@ static void director_calls_reset(void) { static void find_avg_iteration_time(double * save_avg_time) { double acc_iter_time_here[my_config.num_apps]; int acc_iters_here[my_config.num_apps]; - for (int i=0; i < my_config.num_apps; i++) { - acc_iter_time_here[i] = 0.0; - acc_iters_here[i] = 0; - } + init_double_array(acc_iter_time_here, my_config.num_apps, 0.0); + init_int_array(acc_iters_here, my_config.num_apps, 0); for (int i=0; i < my_config.num_nodes_in_pe; i++) { struct node_data * node_data = &arr_node_data[i]; int const app_id = node_data->app_id; @@ -268,13 +266,9 @@ static void find_avg_iteration_time(double * save_avg_time) { acc_iters_here[app_id] += node_data->acc_iters; } double acc_iter_time[my_config.num_apps]; - if(MPI_Allreduce(&acc_iter_time_here, &acc_iter_time, my_config.num_apps, MPI_DOUBLE, MPI_SUM, MPI_COMM_CODES) != MPI_SUCCESS) { - tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't add up"); - } + mpi_allreduce_double_sum(acc_iter_time_here, acc_iter_time, my_config.num_apps); int acc_iters[my_config.num_apps]; - if(MPI_Allreduce(&acc_iters_here, &acc_iters, my_config.num_apps, MPI_INT, MPI_SUM, MPI_COMM_CODES) != MPI_SUCCESS) { - tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't add up"); - } + mpi_allreduce_int_sum(acc_iters_here, acc_iters, my_config.num_apps); for (int i=0; i < my_config.num_apps; i++) { if (acc_iters[i]) { @@ -283,11 +277,46 @@ static void find_avg_iteration_time(double * save_avg_time) { } } +static inline void mpi_allreduce_int_max(int const * local_data, int * result_data, int count) { + if(MPI_Allreduce(local_data, result_data, count, MPI_INT, MPI_MAX, MPI_COMM_CODES) != MPI_SUCCESS) { + tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't compute maximum"); + } +} + +static inline void mpi_allreduce_int_sum(int const * local_data, int * result_data, int count) { + if(MPI_Allreduce(local_data, result_data, count, MPI_INT, MPI_SUM, MPI_COMM_CODES) != MPI_SUCCESS) { + tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't add up"); + } +} + +static inline void mpi_allreduce_double_sum(double const * local_data, double * result_data, int count) { + if(MPI_Allreduce(local_data, result_data, count, MPI_DOUBLE, MPI_SUM, MPI_COMM_CODES) != MPI_SUCCESS) { + tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't add up"); + } +} + +static inline void mpi_allreduce_bool_and(bool const * local_data, bool * result_data, int count) { + if(MPI_Allreduce(local_data, result_data, count, MPI_C_BOOL, MPI_LAND, MPI_COMM_CODES) != MPI_SUCCESS) { + tw_error(TW_LOC, "MPI_Allreduce call failed!"); + } +} + +static inline void init_int_array(int * array, int size, int value) { + for (int i = 0; i < size; i++) { + array[i] = value; + } +} + +static inline void init_double_array(double * array, int size, double value) { + for (int i = 0; i < size; i++) { + array[i] = value; + } +} + static void find_max_iter_per_app(int * save_last_iter) { int last_iter_here[my_config.num_apps]; - for (int i=0; i < my_config.num_apps; i++) { - last_iter_here[i] = INT_MIN; - } + init_int_array(last_iter_here, my_config.num_apps, INT_MIN); + for (int i=0; i < my_config.num_nodes_in_pe; i++) { struct node_data * node_data = &arr_node_data[i]; int const app_id = node_data->app_id; @@ -295,18 +324,14 @@ static void find_max_iter_per_app(int * save_last_iter) { last_iter_here[app_id] = node_data->last_iter; } } - if(MPI_Allreduce(&last_iter_here, save_last_iter, my_config.num_apps, MPI_INT, MPI_MAX, MPI_COMM_CODES) != MPI_SUCCESS) { - tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't compute maximum"); - } + mpi_allreduce_int_max(last_iter_here, save_last_iter, my_config.num_apps); } static void find_avg_time_for_max_iter(double * save_last_iter_time, int const * last_iter) { int acc_iters_here[my_config.num_apps]; double acc_last_iter_time[my_config.num_apps]; - for (int i=0; i < my_config.num_apps; i++) { - acc_iters_here[i] = 0; - acc_last_iter_time[i] = 0.0; - } + init_int_array(acc_iters_here, my_config.num_apps, 0); + init_double_array(acc_last_iter_time, my_config.num_apps, 0.0); for (int i=0; i < my_config.num_nodes_in_pe; i++) { struct node_data * node_data = &arr_node_data[i]; int const app_id = node_data->app_id; @@ -315,13 +340,9 @@ static void find_avg_time_for_max_iter(double * save_last_iter_time, int const * acc_iters_here[app_id]++; } } - if(MPI_Allreduce(&acc_last_iter_time, save_last_iter_time, my_config.num_apps, MPI_DOUBLE, MPI_SUM, MPI_COMM_CODES) != MPI_SUCCESS) { - tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't add up"); - } + mpi_allreduce_double_sum(acc_last_iter_time, save_last_iter_time, my_config.num_apps); int acc_iters[my_config.num_apps]; - if(MPI_Allreduce(&acc_iters_here, &acc_iters, my_config.num_apps, MPI_INT, MPI_SUM, MPI_COMM_CODES) != MPI_SUCCESS) { - tw_error(TW_LOC, "MPI_Allreduce failed! Couldn't add up"); - } + mpi_allreduce_int_sum(acc_iters_here, acc_iters, my_config.num_apps); for (int i=0; i < my_config.num_apps; i++) { if (acc_iters[i] > 0) { save_last_iter_time[i] /= acc_iters[i]; @@ -329,40 +350,44 @@ static void find_avg_time_for_max_iter(double * save_last_iter_time, int const * } } -static struct fast_forward_values director_calls_prepare_fast_forward_jump(void) { - // 0. Check if app is still running - bool is_running[my_config.num_apps]; - for (int i=0; i < my_config.num_apps; i++) { +static void get_running_apps(bool * is_running) { + for (int i = 0; i < my_config.num_apps; i++) { is_running[i] = arr_app_data[i].status != APP_STATUS_completed_everywhere; } - // 1. Compute end time for each application given current data (pick smallest) - // a. Find avg iteration per app - double avg_iter_time[my_config.num_apps]; - find_avg_iteration_time(avg_iter_time); - // b. Find iteration to start stwich after - int last_iter[my_config.num_apps]; - double last_iter_time[my_config.num_apps]; - find_max_iter_per_app(last_iter); - find_avg_time_for_max_iter(last_iter_time, last_iter); - // c. Compute avg end time for all apps (loop through every node, and add value to avg array) +} + +static double compute_earliest_end_time( + bool const * is_running, + double const * avg_iter_time, + int const * last_iter, + double const * last_iter_time) { + // Compute avg end time for all apps (loop through every node, and add value to avg array) double apps_end_time[my_config.num_apps]; - for (int i=0; i < my_config.num_apps; i++) { + for (int i = 0; i < my_config.num_apps; i++) { int const iterations_left = arr_app_data[i].ending_iteration - last_iter[i]; apps_end_time[i] = last_iter_time[i] + iterations_left * avg_iter_time[i]; } - // d. Pick smallest compute end time/time to skip + // Pick smallest compute end time/time to skip double switch_time = DBL_MAX; - for (int i=0; i < my_config.num_apps; i++) { + for (int i = 0; i < my_config.num_apps; i++) { if (is_running[i] && switch_time > apps_end_time[i]) { switch_time = apps_end_time[i]; } } - // 2. Find number of iterations to skip per node given time to skip, then compute when each application is expected to reach this point - // a. Find iteration to skip to per node - double apps_restart_at_time[my_config.num_apps]; - int apps_restart_at_iter[my_config.num_apps]; + return switch_time; +} + +static bool compute_restart_params( + bool const * is_running, + double const * avg_iter_time, + int const * last_iter, + double const * last_iter_time, + double switch_time, + double * apps_restart_at_time, + int * apps_restart_at_iter) { + // Find iteration to skip to per node bool worth_switching = true; - for (int i=0; i < my_config.num_apps; i++) { + for (int i = 0; i < my_config.num_apps; i++) { if (!is_running[i]) { continue; } @@ -375,22 +400,27 @@ static struct fast_forward_values director_calls_prepare_fast_forward_jump(void) worth_switching = false; } } - // b. Compute last application to restart (this is restarting_at) + return worth_switching; +} + +static double find_latest_restart_time(bool const * is_running, double const * apps_restart_at_time) { + // Compute last application to restart (this is restarting_at) double last_to_finish = 0; - for (int i=0; i < my_config.num_apps; i++) { + for (int i = 0; i < my_config.num_apps; i++) { if (is_running[i] && last_to_finish < apps_restart_at_time[i]) { last_to_finish = apps_restart_at_time[i]; } } - // c. If the number of iterations to skip is zero for any app, force reset of predictor tracking - if (!worth_switching) { - return (struct fast_forward_values) { - .status = FAST_FORWARD_restart, - .restarting_at = last_to_finish, - }; - } - // 4. Set values for iteration to restart at and iterations to jump for each application - for (int i=0; i < my_config.num_apps; i++) { + return last_to_finish; +} + +static void set_app_prediction_data( + bool const * is_running, + int const * last_iter, + int const * apps_restart_at_iter, + double const * apps_restart_at_time) { + // Set values for iteration to restart at and iterations to jump for each application + for (int i = 0; i < my_config.num_apps; i++) { if (!is_running[i]) { continue; } @@ -398,6 +428,44 @@ static struct fast_forward_values director_calls_prepare_fast_forward_jump(void) arr_app_data[i].pred.resume_at_iter = apps_restart_at_iter[i]; arr_app_data[i].pred.restart_at = apps_restart_at_time[i]; } +} + +static struct fast_forward_values director_calls_prepare_fast_forward_jump(void) { + // 0. Check if app is still running + bool is_running[my_config.num_apps]; + get_running_apps(is_running); + + // 1. Compute end time for each application given current data (pick smallest) + // a. Find avg iteration per app + double avg_iter_time[my_config.num_apps]; + find_avg_iteration_time(avg_iter_time); + // b. Find iteration to start switch after + int last_iter[my_config.num_apps]; + double last_iter_time[my_config.num_apps]; + find_max_iter_per_app(last_iter); + find_avg_time_for_max_iter(last_iter_time, last_iter); + // c. & d. Compute and pick smallest end time/time to skip + double switch_time = compute_earliest_end_time(is_running, avg_iter_time, last_iter, last_iter_time); + + // 2. Find number of iterations to skip per node given time to skip, then compute when each application is expected to reach this point + // a. Find iteration to skip to per node + double apps_restart_at_time[my_config.num_apps]; + int apps_restart_at_iter[my_config.num_apps]; + bool worth_switching = compute_restart_params(is_running, avg_iter_time, last_iter, last_iter_time, switch_time, apps_restart_at_time, apps_restart_at_iter); + + // b. Compute last application to restart (this is restarting_at) + double last_to_finish = find_latest_restart_time(is_running, apps_restart_at_time); + + // c. If the number of iterations to skip is zero for any app, force reset of predictor tracking + if (!worth_switching) { + return (struct fast_forward_values) { + .status = FAST_FORWARD_restart, + .restarting_at = last_to_finish, + }; + } + + // 3. Set values for iteration to restart at and iterations to jump for each application + set_app_prediction_data(is_running, last_iter, apps_restart_at_iter, apps_restart_at_time); ready_to_skip = true; return (struct fast_forward_values) { From 553f4926836cf7afed55d88db5c63264242860e1 Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 10 Jun 2025 17:02:20 -0400 Subject: [PATCH 067/110] Removing old (hardcoded) application surrogate --- src/network-workloads/model-net-mpi-replay.c | 227 ++----------------- 1 file changed, 13 insertions(+), 214 deletions(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 72b8f75f..0c3d9ed1 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -43,7 +43,6 @@ #define NEAR_ZERO .0001 //timestamp for use to be 'close to zero' but still allow progress, zero offset events are hard on the PDES engine #define OUTPUT_MARKS 0 #define LP_DEBUG 0 -#define HARD_CODED_AVG_ITER_PREDICTOR 0 static int msg_size_hash_compare( void *key, struct qhash_head *link); @@ -106,7 +105,6 @@ int period_count[MAX_JOBS]; double period_time[MAX_JOBS][MAX_PERIODS_PER_APP]; float period_interval[MAX_JOBS][MAX_PERIODS_PER_APP]; char file_name_of_job[MAX_JOBS][8192]; -char skipping_iterations_file[8192]; tw_stime max_elapsed_time_per_job[MAX_JOBS] = {0}; @@ -168,9 +166,6 @@ static int enable_debug = 0; // Surrogate variables struct app_iteration_predictor *iter_predictor = NULL; static int nw_id_counter = 0; -// We can skip multiple iterations using an average as our predicted iteration time. This will skip ahead to a future step in the simulation -static struct AvgSurrogateSwitchingTimesForApp *skip_iter_config; -static size_t skip_iter_config_size = 0; /* set group context */ struct codes_mctx mapping_context; @@ -389,10 +384,6 @@ struct nw_state char output_buf[512]; char col_stats[64]; struct ross_model_sample ross_sample; - - // Configuration to tell the node when to skip some iterations - struct AvgSurrogateSwitchingTimesForApp *switch_config; - size_t switch_config_size; }; /* data for handling reverse computation. @@ -488,11 +479,6 @@ struct nw_message struct { int64_t saved_num_bytes; } mpi_ack; - - // For SURR_SKIP_ITERATION - struct { - struct AvgSurrogateSwitchingTimesForApp * config_used; - } surr; } rc; }; @@ -1199,75 +1185,13 @@ void arrive_syn_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * lp) } } -// Surrogate switiching structure -struct AvgSurrogateSwitchingTimesForApp { - int app_id; - int skip_at_iter; - int resume_at_iter; - double time_per_iter; - bool done; // This is a flag to indicate whethe we already completed this skipping stage -}; - -static int comp_AvgSurrogateSwitchingTimesForApp( - struct AvgSurrogateSwitchingTimesForApp *left, - struct AvgSurrogateSwitchingTimesForApp *right -) { - if (left->app_id < right->app_id) { - return -1; - } - if (left->app_id > right->app_id) { - return 1; - } - // else: left->app_id == right->app_id - - if (left->skip_at_iter < right->skip_at_iter) { - return -1; - } - if (left->skip_at_iter > right->skip_at_iter) { - return 1; - } - - return 0; -} - -static int iters_skipped(struct AvgSurrogateSwitchingTimesForApp * avgSur) { - return avgSur->resume_at_iter - avgSur->skip_at_iter; -} - -static struct AvgSurrogateSwitchingTimesForApp * get_switch_config(struct nw_state * s) { - if (s->switch_config == NULL) { - return NULL; - } - for (int i=0; i < s->switch_config_size; i++) { - struct AvgSurrogateSwitchingTimesForApp * jump = &s->switch_config[i]; - assert(jump->app_id == s->app_id); - if (!jump->done) { - return jump; - } - } - return NULL; -} - -static void skip_iteration_rc(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) { - if (HARD_CODED_AVG_ITER_PREDICTOR) { - m->rc.surr.config_used->done = false; - } -} +// We never rollback all op messages properly. This is because we have not found any situation where we have to fully rollback a SURR_SKIP_ITERATION event. Any event that schedules a SURR_SKIP_ITERATION event will have been completed long before the SURR_SKIP_ITERATION event is processed. +static void skip_to_iteration_rc(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) {} static void skip_to_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * m) { struct codes_workload_op mpi_op; - int resume_at_iter; - - if (HARD_CODED_AVG_ITER_PREDICTOR) { - struct AvgSurrogateSwitchingTimesForApp * switch_config = get_switch_config(s); - assert(switch_config != NULL); - resume_at_iter = switch_config->resume_at_iter; - m->rc.surr.config_used = switch_config; - switch_config->done = true; - } else { - resume_at_iter = m->fwd.resume_at_iter; - } + int resume_at_iter = m->fwd.resume_at_iter; // consuming all events until indicated iteration is reached bool reached_end = false; @@ -1297,20 +1221,6 @@ static void skip_to_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * tw_event_send(e); } -static bool have_we_hit_surrogate_switch(struct nw_state* s, struct codes_workload_op * mpi_op) { - struct AvgSurrogateSwitchingTimesForApp * switch_config = get_switch_config(s); - if (switch_config != NULL) { - return mpi_op->u.send.tag == switch_config->skip_at_iter; - } - return false; -} - -static double time_to_skip_iterations(struct nw_state* s) { - struct AvgSurrogateSwitchingTimesForApp * switch_config = get_switch_config(s); - assert(switch_config != NULL); - return switch_config->time_per_iter * iters_skipped(switch_config); -} - /* Debugging functions, may generate unused function warning */ /*static void print_waiting_reqs(uint32_t * reqs, int count) { @@ -2756,30 +2666,6 @@ void nw_test_init(nw_state* s, tw_lp* lp) } } - if (skip_iter_config_size > 0) { - size_t size = 0; - // Finding number of times to skip for this job - for (size_t i = 0; i < skip_iter_config_size; i++) { - if (lid.job == skip_iter_config[i].app_id) { - size++; - } - } - // Constructing switch_config - s->switch_config_size = size; - if (size > 0) { - s->switch_config = malloc(size * sizeof(struct AvgSurrogateSwitchingTimesForApp)); - size_t j = 0; - for (size_t i = 0; i < skip_iter_config_size; i++) { - if (lid.job == skip_iter_config[i].app_id) { - s->switch_config[j] = skip_iter_config[i]; - j++; - } - } - } - } else { - s->switch_config = NULL; - s->switch_config_size = 0; - } if (iter_predictor && !am_i_synthetic) { int const ending_iter = codes_workload_get_final_iteration(s->wrkld_id, s->app_id, s->local_rank); if (ending_iter == -1) { @@ -3177,28 +3063,16 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l m->rc.mpi_next.mark.saved_marker_time = tw_now(lp); int iteration_i = mpi_op->u.send.tag; - if (HARD_CODED_AVG_ITER_PREDICTOR) { - // If we have reached the surrogate switch time, skip next iteration(s) - if (have_we_hit_surrogate_switch(s, mpi_op)) { - tw_event *e = tw_event_new(lp->gid, time_to_skip_iterations(s), lp); - nw_message* msg = (nw_message*) tw_event_data(e); - msg->msg_type = SURR_SKIP_ITERATION; - tw_event_send(e); - } else { - codes_issue_next_event(lp); - } + if (iter_predictor && iter_predictor->model.have_we_hit_switch(lp, s->nw_id_in_pe, iteration_i)) { + bf->c13 = 1; + struct iteration_pred iter_pred = iter_predictor->model.predict(lp, s->nw_id_in_pe); + tw_event *e = tw_event_new(lp->gid, iter_pred.restart_at - tw_now(lp), lp); + nw_message* msg = (nw_message*) tw_event_data(e); + msg->msg_type = SURR_SKIP_ITERATION; + msg->fwd.resume_at_iter = iter_pred.resume_at_iter; + tw_event_send(e); } else { - if (iter_predictor && iter_predictor->model.have_we_hit_switch(lp, s->nw_id_in_pe, iteration_i)) { - bf->c13 = 1; - struct iteration_pred iter_pred = iter_predictor->model.predict(lp, s->nw_id_in_pe); - tw_event *e = tw_event_new(lp->gid, iter_pred.restart_at - tw_now(lp), lp); - nw_message* msg = (nw_message*) tw_event_data(e); - msg->msg_type = SURR_SKIP_ITERATION; - msg->fwd.resume_at_iter = iter_pred.resume_at_iter; - tw_event_send(e); - } else { - codes_issue_next_event(lp); - } + codes_issue_next_event(lp); } } break; @@ -3337,10 +3211,6 @@ void nw_test_finalize(nw_state* s, tw_lp* lp) rc_stack_destroy(s->matched_reqs); rc_stack_destroy(s->processed_ops); rc_stack_destroy(s->processed_wait_op); - - if (s->switch_config != NULL) { - free(s->switch_config); - } } void nw_test_event_handler_rc(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp) @@ -3416,7 +3286,7 @@ void nw_test_event_handler_rc(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * l break; case SURR_SKIP_ITERATION: - skip_iteration_rc(s, lp, bf, m); + skip_to_iteration_rc(s, lp, bf, m); break; } } @@ -3669,9 +3539,6 @@ static bool check_nw_lp_state(nw_state * before, nw_state const * after) { is_same &= (strcmp(before->output_buf, after->output_buf) == 0); is_same &= (strcmp(before->col_stats, after->col_stats) == 0); - // Compare switch configuration size - is_same &= (before->switch_config_size == after->switch_config_size); - // Complex elements is_same &= are_qlist_equal(&before->arrival_queue, &after->arrival_queue, QLIST_OFFSET(mpi_msgs_queue, ql), (bool (*) (void *, void *)) compare_mpi_msg_queues); is_same &= are_qlist_equal(&before->pending_recvs_queue, &after->pending_recvs_queue, QLIST_OFFSET(mpi_msgs_queue, ql), (bool (*) (void *, void *)) compare_mpi_msg_queues); @@ -3691,7 +3558,6 @@ static bool check_nw_lp_state(nw_state * before, nw_state const * after) { // - msg_sz_table // Pointers used in some data collection (IO) or outside of PDES loop // - mpi_wkld_samples - // - switch_config // There is no need to implement msg_sz_table as all values are already // accounted for in msg_sz_list. We can safely ignore all values in msg_sz_list @@ -3819,10 +3685,6 @@ static void print_nw_lp_state(FILE * out, char const * prefix, nw_state * state) fprintf(out, "%s | | comm_time = %g\n", prefix, state->ross_sample.comm_time); fprintf(out, "%s | | max_time = %g\n", prefix, state->ross_sample.max_time); fprintf(out, "%s | | avg_msg_time = %g\n", prefix, state->ross_sample.avg_msg_time); - - // Configuration - fprintf(out, "%s |* switch_config = %p\n", prefix, state->switch_config); - fprintf(out, "%s | switch_config_size = %zu\n", prefix, state->switch_config_size); } static char const * const MPI_NW_EVENTS_to_string(enum MPI_NW_EVENTS event_type) { @@ -3927,10 +3789,6 @@ static void print_nw_message(FILE * out, char const * prefix, nw_state* s, struc fprintf(out, "%s | | mpi_ack.saved_num_bytes = %ld\n", prefix, msg->rc.mpi_ack.saved_num_bytes); break; - case SURR_SKIP_ITERATION: - fprintf(out, "%s | | surr.config_used = %p\n", prefix, msg->rc.surr.config_used); - break; - default: break; } @@ -3971,7 +3829,6 @@ const tw_optdef app_opt [] = TWOPT_CHAR("cortex-class", cortex_class, "Python class implementing the CoRtEx translator"), TWOPT_CHAR("cortex-gen", cortex_gen, "Python function to pre-generate MPI events"), #endif - TWOPT_CHAR("skipping-iterations-file", skipping_iterations_file, "Configuration file name for which steps to skip"), TWOPT_END() }; @@ -4300,60 +4157,6 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) jobmap_ctx = codes_jobmap_configure(CODES_JOBMAP_IDENTITY, &jobmap_ident_p); } - - // Loading skipping iterations configuration - if(strlen(skipping_iterations_file) > 0) { - FILE *file = fopen(skipping_iterations_file, "r"); - if(!file) { - tw_error(TW_LOC, "\n Could not open file %s ", workloads_conf_file); - } - - // Finding number of skipping iteration rows - int i = 0; - for(; !feof(file); i++) { - struct AvgSurrogateSwitchingTimesForApp skip_row; - - int ref = fscanf(file, "%d %d %d %lf", &skip_row.app_id, &skip_row.skip_at_iter, &skip_row.resume_at_iter, &skip_row.time_per_iter); - - if (ref != 4) { // We couldn't read all four values - fprintf(stderr, "Warning: Couldn't read a row of 'skipping-iterations-file'. Stopping after reading %d rows.\n", i); - break; - } - } - - skip_iter_config_size = i; - - skip_iter_config = malloc(skip_iter_config_size * sizeof(struct AvgSurrogateSwitchingTimesForApp)); - - // Loading in memory all times to skip iterations - fseek(file, 0, SEEK_SET); - for(i = 0; i < skip_iter_config_size; i++) { - struct AvgSurrogateSwitchingTimesForApp *skip_row = &skip_iter_config[i]; - - fscanf(file, "%d %d %d %lf", &skip_row->app_id, &skip_row->skip_at_iter, &skip_row->resume_at_iter, &skip_row->time_per_iter); - skip_row->done = false; - } - fclose(file); - - // Sorting. To skip iterations we asume that all skips for a specific job appear in increasing order - qsort( - skip_iter_config, - skip_iter_config_size, - sizeof(struct AvgSurrogateSwitchingTimesForApp), - (int (*)(const void *, const void *)) comp_AvgSurrogateSwitchingTimesForApp); - - // Printing configuration - if(!g_tw_mynode && skip_iter_config_size) { - printf("\n\nConfiguration for skipping selected iterations of one or more jobs has been loaded.\n"); - printf("| job_id skip_at_iter resume_at_iter time_per_iter\n"); - for (size_t i=0; iapp_id, skip_row->skip_at_iter, skip_row->resume_at_iter, skip_row->time_per_iter); - } - printf("\n"); - } - } - MPI_Comm_rank(MPI_COMM_CODES, &rank); MPI_Comm_size(MPI_COMM_CODES, &nprocs); @@ -4554,10 +4357,6 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) if(alloc_spec) codes_jobmap_destroy(jobmap_ctx); - if (skip_iter_config != NULL) { - free(skip_iter_config); - } - print_surrogate_stats(); free_application_surrogate(); From 650fd9ea2094e165f218d16a069ab558bbd2ca3c Mon Sep 17 00:00:00 2001 From: helq Date: Wed, 11 Jun 2025 10:48:22 -0400 Subject: [PATCH 068/110] Refactoring strategy to freeze network in network director --- codes/model-net-lp.h | 2 +- codes/surrogate/network-surrogate.h | 6 +- src/networks/model-net/core/model-net-lp.c | 4 +- src/networks/model-net/dragonfly-dally.C | 48 +++++------- src/surrogate/network-surrogate.c | 87 ++++------------------ 5 files changed, 39 insertions(+), 108 deletions(-) diff --git a/codes/model-net-lp.h b/codes/model-net-lp.h index a7585ce4..0a20a2f9 100644 --- a/codes/model-net-lp.h +++ b/codes/model-net-lp.h @@ -136,7 +136,7 @@ void model_net_method_switch_to_highdef(void); // It will call the function (pointer) on the internal structure/network model. // The lp parameter has to be a model-net lp. The function pointer has to coincide with the underlying subtype -void model_net_method_call_inner(tw_lp * lp, void (*) (void * inner, tw_lp * lp, tw_event **), tw_event **); +void model_net_method_call_inner(tw_lp * lp, void (*) (void * inner, tw_lp * lp, void * data), void * data); /// The following functions/data structures should not need to be used by /// model developers - they are just provided so other internal components can diff --git a/codes/surrogate/network-surrogate.h b/codes/surrogate/network-surrogate.h index a6060ea1..9a9b2680 100644 --- a/codes/surrogate/network-surrogate.h +++ b/codes/surrogate/network-surrogate.h @@ -42,14 +42,16 @@ struct director_data { // Parameters: `data` corresponds to the lp sub-state, lp is the lp pointer, and the array of events in queue (to be processed) typedef void (*model_switch_f) (void * data, tw_lp * lp, tw_event **); typedef bool (*model_ask_if_freeze_f) (tw_lp * lp, tw_event * event); // Determines whether the event should be "frozen" or should be allowed to run during surrogate-mode +typedef void (*model_check_event_f) (void * state, tw_lp * lp, tw_event * event); // Determines whether the event should be "frozen" or should be allowed to run during surrogate-mode struct lp_types_switch { char lpname[MAX_NAME_LENGTH]; bool trigger_idle_modelnet; // Trigger idle events for model-net (prevents a model to be stuck in a schedule loop if it is to process packets during surrogate-mode). If this is true and the lpname does not start with 'modelnet_', the behaviour is undefined model_switch_f highdef_to_surrogate; model_switch_f surrogate_to_highdef; - model_ask_if_freeze_f should_event_be_frozen; // NULL means event from LP type shouldn't be frozen - model_ask_if_freeze_f should_event_be_deleted; // NULL means event from LP type shouldn't be deleted + model_ask_if_freeze_f should_event_be_frozen; // true means event from LP type shouldn't be frozen + model_ask_if_freeze_f should_event_be_deleted; // true means event from LP type shouldn't be deleted + model_check_event_f check_event_in_queue; }; struct switch_at_struct { diff --git a/src/networks/model-net/core/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c index b513675b..1595f480 100644 --- a/src/networks/model-net/core/model-net-lp.c +++ b/src/networks/model-net/core/model-net-lp.c @@ -1498,10 +1498,10 @@ void model_net_method_switch_to_highdef_lp(tw_lp * lp) { ns->in_sched_recv_loop |= ns->sched_recv_loop_pre_surrogate; } -void model_net_method_call_inner(tw_lp * lp, void (*fun) (void * inner, tw_lp * lp, tw_event **), tw_event ** lp_events) { +void model_net_method_call_inner(tw_lp * lp, void (*fun) (void * inner, tw_lp * lp, void * data), void * data) { model_net_base_state * const ns = (model_net_base_state*) lp->cur_state; - fun(ns->sub_state, lp, lp_events); + fun(ns->sub_state, lp, data); } int model_net_get_event_type_lp(model_net_wrap_msg * msg) { diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 0fec30f6..3e2ce29d 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -198,6 +198,7 @@ static void dragonfly_dally_terminal_highdef_to_surrogate(terminal_state * s, tw static void dragonfly_dally_terminal_surrogate_to_highdef(terminal_state * s, tw_lp * lp, tw_event **); static bool dragonfly_dally_terminal_should_event_be_frozen(tw_lp * lp, tw_event * event); static bool dragonfly_dally_router_should_event_be_frozen(tw_lp * lp, tw_event * event); +static void dragonfly_dally_terminal_pre_surrogate_switch_event_queue( terminal_state * s, tw_lp * lp, tw_event * event); // // ==== END OF Parameters to tune surrogate mode ==== @@ -2449,6 +2450,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params) .surrogate_to_highdef = (model_switch_f) dragonfly_dally_terminal_surrogate_to_highdef, .should_event_be_frozen = dragonfly_dally_terminal_should_event_be_frozen, .should_event_be_deleted = NULL, + .check_event_in_queue = (model_check_event_f) dragonfly_dally_terminal_pre_surrogate_switch_event_queue, }, {.lpname = "modelnet_dragonfly_dally_router", .trigger_idle_modelnet = false, @@ -2456,6 +2458,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params) .surrogate_to_highdef = NULL, .should_event_be_frozen = dragonfly_dally_router_should_event_be_frozen, .should_event_be_deleted = NULL, + .check_event_in_queue = NULL, }, 0 } @@ -3017,37 +3020,27 @@ static void feed_packet_to_predictor(terminal_state * s, tw_lp * lp, uint64_t pa } } -// Constructs a hashmap with all the T_NOTIFY events to be processed. -// The key of the list is the GID for the source terminal. The value of the -// hash is the end time -static map construct_map_of_NOTIFY_LATENCY_events( - tw_lp * lp, tw_event ** const terminal_events) { - // hash map to store T_NOTIFY events found (`packet_ID` and `travel_end_time`) - map notification_events_map; - - for (size_t i = 0; terminal_events && terminal_events[i] != NULL; i++) { - assert(terminal_events[i]->dest_lpid == lp->gid); - tw_event * event = terminal_events[i]; - int const event_type = model_net_get_event_type_lp((model_net_wrap_msg *) tw_event_data(event)); - // if event is T_NOTIFY, add event relevant data into hash map for T_NOTIFY event - if (event_type == MN_BASE_PASS) { - terminal_dally_message * msg = (terminal_dally_message *) - model_net_method_msg_from_tw_event(lp, (model_net_wrap_msg *) tw_event_data(event)); - if (msg->type == T_NOTIFY) { - assert(msg->notify_type == NOTIFY_LATENCY); - notification_events_map[msg->packet_ID] = msg->travel_end_time; - } +// We check an event that is in the event queue, thus we do not process it yet +static void dragonfly_dally_terminal_pre_surrogate_switch_event_queue( + terminal_state * s, tw_lp * lp, tw_event * event) { + int const event_type = model_net_get_event_type_lp((model_net_wrap_msg *) tw_event_data(event)); + // if event is T_NOTIFY, add event relevant data into hash map for T_NOTIFY event + if (event_type == MN_BASE_PASS) { + terminal_dally_message * msg = (terminal_dally_message *) + model_net_method_msg_from_tw_event(lp, (model_net_wrap_msg *) tw_event_data(event)); + assert(msg != NULL); + if (msg->type == T_NOTIFY) { + assert(msg->notify_type == NOTIFY_LATENCY); + feed_packet_to_predictor(s, lp, msg->packet_ID, msg->travel_end_time); + s->sent_packets.erase(msg->packet_ID); } } - - return notification_events_map; } // This function never rollsback because it's called at GVT static void dragonfly_dally_terminal_highdef_to_surrogate( terminal_state * s, tw_lp * lp, tw_event ** terminal_events) { - - auto notification_events_map = construct_map_of_NOTIFY_LATENCY_events(lp, terminal_events); + (void) terminal_events; if (s->arrival_of_last_packet.packet_ID != -1) { assert(s->sent_packets.count(s->arrival_of_last_packet.packet_ID) == 1); // packet_ID is in s->sent_packets @@ -3069,13 +3062,8 @@ static void dragonfly_dally_terminal_highdef_to_surrogate( assert(packet_ID == sent.start.packet_ID); - // Finding out whether the packet-latency is on the list of messages to be processed - bool const in_events_to_process = notification_events_map.count(packet_ID) == 1; - if (in_events_to_process) { - feed_packet_to_predictor(s, lp, packet_ID, notification_events_map[sent.start.packet_ID]); - // The packet has not been delievered. Send directly to destination and notify of zombie event - } else if (freeze_network_on_switch) { + if (freeze_network_on_switch) { struct packet_end predicted_end = terminal_predictor->predict(s->predictor_data, lp, s->terminal_id, &sent.start); diff --git a/src/surrogate/network-surrogate.c b/src/surrogate/network-surrogate.c index a8bc671c..8f12ec33 100644 --- a/src/surrogate/network-surrogate.c +++ b/src/surrogate/network-surrogate.c @@ -69,8 +69,18 @@ static void shift_events_to_future_pe(tw_pe * pe) { char const * lp_type_name; int rep_id, offset; // unused codes_mapping_get_lp_info2(next_event->dest_lpid, NULL, &lp_type_name, NULL, &rep_id, &offset); + bool const is_lp_modelnet = strncmp("modelnet_", lp_type_name, 9) == 0; struct lp_types_switch const * const lp_type_switch = get_type_switch(lp_type_name); + // "Processing" event + if (lp_type_switch && lp_type_switch->check_event_in_queue) { + if (is_lp_modelnet) { + model_net_method_call_inner(next_event->dest_lp, (void (*) (void *, tw_lp *, void *))lp_type_switch->check_event_in_queue, next_event); + } else { + lp_type_switch->check_event_in_queue(next_event->dest_lp->cur_state, next_event->dest_lp, next_event); + } + } + // shifting time stamps to the future for events to freeze bool deleted = false; if (lp_type_switch && lp_type_switch->should_event_be_frozen @@ -126,71 +136,6 @@ static void shift_events_to_future_pe(tw_pe * pe) { } -// Returns an array of size `g_tw_nlp`, where each element is a null-terminated -// array containing all the events that each LP has for processing -static tw_event *** order_events_per_lps(tw_pe * pe) { - // 0. Create array for linked list of size g_tw_nlp to store events per lp - tw_event ** lp_queue_events = (tw_event **) calloc(g_tw_nlp, sizeof(tw_event *)); - // 0b. Create simple array (size g_tw_lp) to store number of events per lp - size_t * num_lp_queue_events = (size_t *) calloc(g_tw_nlp, sizeof(size_t)); - - // 1. loop extracting events from queue - // a. check from which local lp does the event belong - // b. add event to reversed linked-list of given lp and increase lp counter - tw_event * next_event = tw_pq_dequeue(pe->pq); - size_t events_dequeued = 0; - while (next_event) { - // Filtering events to freeze - assert(next_event->prev == NULL); - - // finding out lp type - assert(tw_getlocal_lp(next_event->dest_lpid) == next_event->dest_lp); - tw_lpid const lpid = next_event->dest_lp->id; - - // store event in lp_queue_events - next_event->prev = lp_queue_events[lpid]; - lp_queue_events[lpid] = next_event; - num_lp_queue_events[lpid]++; - events_dequeued++; - - next_event = tw_pq_dequeue(pe->pq); - } - - // 2. create array (triple pointer type, **) of size `g_tw_nlp + total events` - // to store events per lp, null-terminated - tw_event *** lps_events = (tw_event ** *) calloc(g_tw_nlp, sizeof(tw_event **)); - tw_event ** all_events_mem = (tw_event * *) calloc(g_tw_nlp + events_dequeued, sizeof(tw_event *)); - - // 3. loop through each linked-list insert each event back into the - // queue and store address copy into lp array - size_t event_i = 0; - for (size_t lpid = 0; lpid < g_tw_nlp; lpid++) { - lps_events[lpid] = &all_events_mem[event_i]; - - tw_event * dequed_events = lp_queue_events[lpid]; - while (dequed_events) { - // event address copy - all_events_mem[event_i] = dequed_events; - - // placing back into queue - tw_event * const prev_event = dequed_events; - dequed_events = dequed_events->prev; - prev_event->prev = NULL; - tw_pq_enqueue(pe->pq, prev_event); - - event_i++; - } - event_i++; - } - assert(event_i == g_tw_nlp + events_dequeued); - - assert(g_tw_nlp > 0 && lps_events[0] == all_events_mem); - free(lp_queue_events); - free(num_lp_queue_events); - return lps_events; -} - - // Switching from a (vanilla) high-def simulation to surrogate mode // consists of: // - Cancel all events that have to be cancelled and clean everything @@ -210,7 +155,6 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe) { printf("PE %lu - AVL size %d (before shifting events)\n", g_tw_mynode, pe->avl_tree_size); shift_events_to_future_pe(pe); printf("PE %lu - AVL size %d (after shifting events to future)\n", g_tw_mynode, pe->avl_tree_size); - tw_event *** lps_events = order_events_per_lps(pe); // Going through all LPs in PE and running their specific functions for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) { @@ -241,11 +185,11 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe) { assert(is_lp_modelnet); model_net_method_switch_to_surrogate_lp(lp); } - if (lp_type_switch->surrogate_to_highdef) { + if (lp_type_switch->highdef_to_surrogate) { if (is_lp_modelnet) { - model_net_method_call_inner(lp, lp_type_switch->highdef_to_surrogate, lps_events[local_lpid]); + model_net_method_call_inner(lp, (void (*) (void *, tw_lp *, void *))lp_type_switch->highdef_to_surrogate, NULL); } else { - lp_type_switch->highdef_to_surrogate(lp->cur_state, lp, lps_events[local_lpid]); + lp_type_switch->highdef_to_surrogate(lp->cur_state, lp, NULL); } } } @@ -256,9 +200,6 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe) { tw_scheduler_rollback_and_cancel_events_pe(pe); } - assert(lps_events[0] != NULL); - free(lps_events[0]); - free(lps_events); } @@ -302,7 +243,7 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe) { } if (lp_type_switch->surrogate_to_highdef) { if (is_lp_modelnet) { - model_net_method_call_inner(lp, lp_type_switch->surrogate_to_highdef, NULL); + model_net_method_call_inner(lp, (void (*) (void *, tw_lp *, void *))lp_type_switch->surrogate_to_highdef, NULL); } else { lp_type_switch->surrogate_to_highdef(lp->cur_state, lp, NULL); } From 7db95be1c612948f517ff1ade84660ee7937cad0 Mon Sep 17 00:00:00 2001 From: helq Date: Wed, 11 Jun 2025 13:23:08 -0400 Subject: [PATCH 069/110] Refactor network director to use separate queue for frozen events instead of timestamp manipulation --- codes/surrogate/network-surrogate.h | 3 + src/surrogate/network-surrogate.c | 135 +++++++++++++++++++--------- 2 files changed, 98 insertions(+), 40 deletions(-) diff --git a/codes/surrogate/network-surrogate.h b/codes/surrogate/network-surrogate.h index 9a9b2680..2f86ac21 100644 --- a/codes/surrogate/network-surrogate.h +++ b/codes/surrogate/network-surrogate.h @@ -65,6 +65,9 @@ extern struct switch_at_struct switch_network_at; // Main function responsible for switching between high-fidelity and (network) surrogate void network_director(tw_pe * pe); +// Function for application director to use network freezing machinery +void surrogate_switch_network_model(tw_pe * pe); + #ifdef __cplusplus } #endif diff --git a/src/surrogate/network-surrogate.c b/src/surrogate/network-surrogate.c index 8f12ec33..13fccb37 100644 --- a/src/surrogate/network-surrogate.c +++ b/src/surrogate/network-surrogate.c @@ -8,6 +8,10 @@ double surrogate_switching_time = 0.0; double time_in_surrogate = 0.0; static double surrogate_time_last = 0.0; +// === Frozen events system for separate queue approach +static tw_event *frozen_events_head = NULL; // Head of frozen events linked list +static double frozen_events_switch_time = 0.0; // Time when we switched to surrogate mode + // === Director functionality // @@ -22,13 +26,17 @@ static struct lp_types_switch const * get_type_switch(char const * const name) { } -static void shift_events_to_future_pe(tw_pe * pe) { +static void freeze_events_to_separate_queue_pe(tw_pe * pe) { #ifdef USE_RAND_TIEBREAKER tw_event_sig gvt_sig = pe->GVT_sig; tw_stime gvt = gvt_sig.recv_ts; #else tw_stime gvt = pe->GVT; #endif + + // Store the time when we switch to surrogate mode + frozen_events_switch_time = gvt; + tw_event * next_event = tw_pq_dequeue(pe->pq); // If there aren't any events left to process, then this PE has nothing to do @@ -36,24 +44,16 @@ static void shift_events_to_future_pe(tw_pe * pe) { return; } - // We have to put the events back into the queue after we switch back, but if we never - // switch back they will never get to be processed and thus we can clean them - double switch_offset = g_tw_ts_end; - if (switch_network_at.current_i < switch_network_at.total) { - double const next_switch = switch_network_at.time_stampts[switch_network_at.current_i + 1]; - double const pre_switch_time = gvt; - switch_offset = next_switch - pre_switch_time; - assert(pre_switch_time < next_switch); - //printf("gvt=%f next_switch=%f switch_offset=%f\n", pre_switch_time, next_switch, switch_offset); - } - assert(0 <= switch_network_at.current_i && switch_network_at.current_i < switch_network_at.total); - double const current_switch_time = switch_network_at.time_stampts[switch_network_at.current_i]; - assert(current_switch_time <= gvt); + tw_event * dequed_events = NULL; // Linked list of non-frozen events, to be placed back in the queue + int events_processed = 0; // Total events processed from queue + int events_enqueued = 0; // Events put back in queue + int events_frozen = 0; // Events moved to frozen queue + int events_deleted = 0; // Events deleted - tw_event * dequed_events = NULL; // Linked list of workload events, to be placed again in the queue - int events_dequeued = 0; // for stats on code correctness // Traversing all events stored in the queue while (next_event) { + events_processed++; + // Filtering events to freeze assert(next_event->prev == NULL); #ifdef USE_RAND_TIEBREAKER @@ -81,38 +81,35 @@ static void shift_events_to_future_pe(tw_pe * pe) { } } - // shifting time stamps to the future for events to freeze bool deleted = false; + bool frozen = false; + + // Check if event should be frozen (moved to separate queue) if (lp_type_switch && lp_type_switch->should_event_be_frozen && lp_type_switch->should_event_be_frozen(next_event->dest_lp, next_event)) { -#ifdef USE_RAND_TIEBREAKER - assert(next_event->recv_ts == next_event->sig.recv_ts); - next_event->recv_ts += switch_offset; - next_event->sig.recv_ts = next_event->recv_ts; -#else - next_event->recv_ts += switch_offset; -#endif - assert(next_event->recv_ts >= current_switch_time); + // Add to frozen events linked list (no timestamp manipulation here) + next_event->prev = frozen_events_head; + frozen_events_head = next_event; + frozen = true; + events_frozen++; // deleting event if we need to } else if (lp_type_switch && lp_type_switch->should_event_be_deleted && lp_type_switch->should_event_be_deleted(next_event->dest_lp, next_event)) { tw_event_free(pe, next_event); deleted = true; + events_deleted++; } - // store event in deque_events to inject immediately back to the queue - if (!deleted) { + // store event in dequed_events to inject immediately back to the queue + if (!deleted && !frozen) { next_event->prev = dequed_events; dequed_events = next_event; - events_dequeued++; - assert(next_event->recv_ts >= current_switch_time); } next_event = tw_pq_dequeue(pe->pq); } - int events_enqueued = 0; - // Reinjecting events into simulation + // Reinjecting non-frozen events into simulation while (dequed_events) { tw_event * const prev_event = dequed_events; dequed_events = dequed_events->prev; @@ -126,13 +123,60 @@ static void shift_events_to_future_pe(tw_pe * pe) { events_enqueued++; } - if (DEBUG_DIRECTOR > 0 && events_dequeued != events_enqueued) { - printf("PE %lu: Discrepancy on number of events processed %d (%d dequeued and %d enqueued)\n", - g_tw_mynode, events_dequeued - events_enqueued, events_dequeued, events_enqueued); + if (DEBUG_DIRECTOR > 0) { + printf("PE %lu: Processed %d events (%d enqueued, %d frozen, %d deleted)\n", + g_tw_mynode, events_processed, events_enqueued, events_frozen, events_deleted); } - // shifting time stamps of events in causality list (one list per KP) - // offset_future_events_in_causality_list(switch_offset, gvt); + // Sanity check: processed = enqueued + frozen + deleted + assert(events_processed == events_enqueued + events_frozen + events_deleted); +} + +static void unfreeze_events_from_separate_queue_pe(tw_pe * pe) { +#ifdef USE_RAND_TIEBREAKER + tw_stime current_gvt = pe->GVT_sig.recv_ts; +#else + tw_stime current_gvt = pe->GVT; +#endif + + // Calculate offset to adjust timestamps: current_gvt - switch_time + double time_offset = current_gvt - frozen_events_switch_time; + + int events_restored = 0; + + // Traverse the frozen events linked list and restore them to the main queue + while (frozen_events_head) { + tw_event * event_to_restore = frozen_events_head; + frozen_events_head = frozen_events_head->prev; + event_to_restore->prev = NULL; + + // Adjust timestamp: original_time + time_spent_in_surrogate +#ifdef USE_RAND_TIEBREAKER + assert(event_to_restore->recv_ts == event_to_restore->sig.recv_ts); + event_to_restore->recv_ts += time_offset; + event_to_restore->sig.recv_ts = event_to_restore->recv_ts; +#else + event_to_restore->recv_ts += time_offset; +#endif + + // Re-enqueue the event + tw_pq_enqueue(pe->pq, event_to_restore); + + // Re-add to hash table if it was a remote event + if (event_to_restore->event_id && event_to_restore->state.remote) { + tw_hash_insert(pe->hash_t, event_to_restore, event_to_restore->send_pe); + } + + events_restored++; + } + + if (DEBUG_DIRECTOR > 0 && events_restored > 0) { + printf("PE %lu: Restored %d frozen events with time offset %.6f\n", + g_tw_mynode, events_restored, time_offset); + } + + // Reset frozen events state + frozen_events_switch_time = 0.0; } @@ -152,9 +196,9 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe) { tw_error(TW_LOC, "Sorry, sending packets to the future hasn't been implement in this mode"); } - printf("PE %lu - AVL size %d (before shifting events)\n", g_tw_mynode, pe->avl_tree_size); - shift_events_to_future_pe(pe); - printf("PE %lu - AVL size %d (after shifting events to future)\n", g_tw_mynode, pe->avl_tree_size); + printf("PE %lu - AVL size %d (before freezing events)\n", g_tw_mynode, pe->avl_tree_size); + freeze_events_to_separate_queue_pe(pe); + printf("PE %lu - AVL size %d (after freezing events to separate queue)\n", g_tw_mynode, pe->avl_tree_size); // Going through all LPs in PE and running their specific functions for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) { @@ -210,6 +254,11 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe) { tw_stime gvt = pe->GVT; #endif + // Restore frozen events back to the main queue with timestamp adjustment + printf("PE %lu - AVL size %d (before injecting events into event queue again)\n", g_tw_mynode, pe->avl_tree_size); + unfreeze_events_from_separate_queue_pe(pe); + printf("PE %lu - AVL size %d (after defreezing events from separate queue)\n", g_tw_mynode, pe->avl_tree_size); + // Going through all LPs in PE and running their specific functions for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) { tw_lp * const lp = g_tw_lp[local_lpid]; @@ -259,7 +308,7 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe) { } -void switch_model(tw_pe * pe) { +static void switch_model(tw_pe * pe) { // Rollback if in optimistic mode if (g_tw_synchronization_protocol == OPTIMISTIC) { tw_scheduler_rollback_and_cancel_events_pe(pe); @@ -359,6 +408,12 @@ void network_director(tw_pe * pe) { } } } + +// === Function for application director to use network freezing machinery +void surrogate_switch_network_model(tw_pe * pe) { + // Simply expose the existing switch_model function for use by application director + switch_model(pe); +} // // === END OF Director functionality // vim: set tabstop=4 shiftwidth=4 expandtab : From c16965fc4591ceb74e614bdf4529dcaee68e9958 Mon Sep 17 00:00:00 2001 From: helq Date: Wed, 11 Jun 2025 17:07:28 -0400 Subject: [PATCH 070/110] Network surrogate should be enabled through a custom parameter --- src/networks/model-net/dragonfly-dally.C | 12 ++++++++---- src/surrogate/application-surrogate.c | 17 ++++++++++++++++- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 3e2ce29d..9f5a91c4 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -2434,11 +2434,15 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params) } // START Surrogate configuration - char director_mode[MAX_NAME_LENGTH]; - director_mode[0] = '\0'; - int director_mode_len = configuration_get_value(&config, "NETWORK_SURROGATE", "director_mode", anno, director_mode, MAX_NAME_LENGTH); + char enable_str[MAX_NAME_LENGTH]; + enable_str[0] = '\0'; + int const rc_enable = configuration_get_value(&config, "NETWORK_SURROGATE", "enable", anno, enable_str, MAX_NAME_LENGTH); + bool enable_network_surrogate = false; + if (rc_enable > 0) { + enable_network_surrogate = (strcmp(enable_str, "1") == 0 || strcmp(enable_str, "true") == 0); + } // if surrogate mode has been set up - if (director_mode_len > 0) { + if (enable_network_surrogate) { struct network_surrogate_config surr_conf = { .director = {.switch_surrogate = switch_surrogate, .is_surrogate_on = is_surrogate_on_fun}, .total_terminals = p->total_terminals, diff --git a/src/surrogate/application-surrogate.c b/src/surrogate/application-surrogate.c index 48677cfd..a95e5e19 100644 --- a/src/surrogate/application-surrogate.c +++ b/src/surrogate/application-surrogate.c @@ -15,7 +15,7 @@ static enum { #define gvt_for(pe) (pe->GVT) #endif -#define master_printf(str, ...) if (g_tw_mynode == 0) { printf(str, __VA_ARGS__); } +#define master_printf(...) if (g_tw_mynode == 0) { printf(__VA_ARGS__); } static void application_director_pre_switch(tw_pe * pe) { // Scheduling next GVT hook call if it is not scheduled every tw_trigger_gvt_hook_every @@ -32,6 +32,14 @@ static void application_director_pre_switch(tw_pe * pe) { case FAST_FORWARD_switching: tw_trigger_gvt_hook_at(restarting_at + 1); // + 1 to force director to run right after we have fully fast-forward master_printf("Triggering switch to application iteration surrogate mode at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe)); + + // TODO: Fix network surrogate (it's buggy) and enable this code + // Freeze network events if configured + //if (freeze_network_on_app_switch) { + // master_printf("Freezing network events for application surrogate mode\n"); + // surrogate_switch_network_model(pe); + //} + director_state = POST_JUMP_switched; break; @@ -54,6 +62,13 @@ static void application_director_post_switch(tw_pe * pe) { if (director_state == POST_JUMP_switched) { master_printf("Back to full high-fidelity application iteration mode at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe)); + + // Unfreeze network events if they were frozen + //if (freeze_network_on_app_switch) { + // master_printf("Unfreezing network events after application surrogate mode\n"); + // surrogate_switch_network_model(pe); + // // TODO: reset network predictors + //} } else { master_printf("Resetting predictor at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe)); } From 9835040e66ad720cb5882e260041387fce69c100 Mon Sep 17 00:00:00 2001 From: helq Date: Thu, 12 Jun 2025 17:03:52 -0400 Subject: [PATCH 071/110] Bug fix - tw_now has been moved out of commit time Silly (naive and common) bug has been squashed. This bug is very common and very easy to make when doing anything in ROSS. --- codes/net/dragonfly-dally.h | 2 ++ src/networks/model-net/dragonfly-dally.C | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/codes/net/dragonfly-dally.h b/codes/net/dragonfly-dally.h index 504446b0..929f6952 100644 --- a/codes/net/dragonfly-dally.h +++ b/codes/net/dragonfly-dally.h @@ -134,6 +134,8 @@ struct terminal_dally_message //Xin: for busy time recording tw_stime last_bufupdate_time; + + tw_stime saved_processing_time; }; void print_terminal_dally_message(FILE * out, char const * prefix, void * s, struct terminal_dally_message * msg); diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 9f5a91c4..cc0a4c85 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -3333,7 +3333,7 @@ static void terminal_commit_packet_generate(terminal_state * s, tw_bf * bf, term sent.start.packet_ID = msg->packet_ID; sent.start.dest_terminal_lpid = msg->dest_terminal_lpid; sent.start.dfdally_dest_terminal_id = msg->dfdally_dest_terminal_id; - sent.start.travel_start_time = tw_now(lp); + sent.start.travel_start_time = msg->saved_processing_time; sent.start.workload_injection_time = msg->msg_start_time; sent.start.processing_packet_delay = processing_packet_delay; sent.start.packet_size = msg->packet_size; @@ -3458,6 +3458,8 @@ static void terminal_dally_commit(terminal_state * s, uint64_t packet_ID = msg->packet_ID; if (s->sent_packets.count(packet_ID) == 1) { // packet_ID is in s->sent_packets + auto sent = s->sent_packets[packet_ID]; + assert(msg->travel_end_time > sent.start.travel_start_time); if (packet_ID == s->last_packet_sent_id) { // packet_ID is last, we cannot compute the next_packet_delay assert(s->arrival_of_last_packet.packet_ID == -1); s->arrival_of_last_packet.packet_ID = packet_ID; @@ -4212,6 +4214,7 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa s->packet_gen++; s->total_gen_size += msg->packet_size; + msg->saved_processing_time = tw_now(lp); tw_stime ts, injection_ts, nic_ts; From bfdfba969209d06eababe4d7c54a3d0337b68aab Mon Sep 17 00:00:00 2001 From: helq Date: Thu, 12 Jun 2025 17:08:10 -0400 Subject: [PATCH 072/110] Hooking network surrogate to application surrogate --- codes/surrogate/application-surrogate.h | 1 + src/surrogate/application-surrogate.c | 25 +++++++++++++++---------- src/surrogate/init.c | 7 ++++++- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/codes/surrogate/application-surrogate.h b/codes/surrogate/application-surrogate.h index 111df4ef..cfda05bf 100644 --- a/codes/surrogate/application-surrogate.h +++ b/codes/surrogate/application-surrogate.h @@ -28,6 +28,7 @@ struct application_director_config { // To use when APP_DIRECTOR_OPTS_call_every_ns double call_every_ns; }; + bool use_network_surrogate; }; // Main function responsible for switching between high-fidelity and (application iteration) surrogate diff --git a/src/surrogate/application-surrogate.c b/src/surrogate/application-surrogate.c index a95e5e19..4e848e23 100644 --- a/src/surrogate/application-surrogate.c +++ b/src/surrogate/application-surrogate.c @@ -1,8 +1,13 @@ #include "surrogate/application-surrogate.h" #include +#include "surrogate/network-surrogate.h" static struct app_iteration_predictor * iter_predictor; -static struct application_director_config conf = {.option = APP_DIRECTOR_OPTS_call_every_ns, .every_n_gvt = 1000000}; +static struct application_director_config conf = { + .option = APP_DIRECTOR_OPTS_call_every_ns, + .every_n_gvt = 1000000, + .use_network_surrogate = false +}; static enum { PRE_JUMP = 0, POST_JUMP_switched, // Switched to surrogate-mode @@ -35,10 +40,10 @@ static void application_director_pre_switch(tw_pe * pe) { // TODO: Fix network surrogate (it's buggy) and enable this code // Freeze network events if configured - //if (freeze_network_on_app_switch) { - // master_printf("Freezing network events for application surrogate mode\n"); - // surrogate_switch_network_model(pe); - //} + if (conf.use_network_surrogate) { + master_printf("Switching on network surrogate\n"); + surrogate_switch_network_model(pe); + } director_state = POST_JUMP_switched; break; @@ -64,11 +69,11 @@ static void application_director_post_switch(tw_pe * pe) { master_printf("Back to full high-fidelity application iteration mode at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe)); // Unfreeze network events if they were frozen - //if (freeze_network_on_app_switch) { - // master_printf("Unfreezing network events after application surrogate mode\n"); - // surrogate_switch_network_model(pe); - // // TODO: reset network predictors - //} + if (conf.use_network_surrogate) { + master_printf("Switching off network surrogate\n"); + surrogate_switch_network_model(pe); + // TODO: reset network predictors and ask not to gather any data for 1 ms + } } else { master_printf("Resetting predictor at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe)); } diff --git a/src/surrogate/init.c b/src/surrogate/init.c index c6a0a6aa..67ab25ee 100644 --- a/src/surrogate/init.c +++ b/src/surrogate/init.c @@ -232,13 +232,15 @@ static struct application_director_config load_director_config(void) { break; } + config.use_network_surrogate = is_network_surrogate_configured; + return config; } void application_surrogate_configure( int num_terminals_in_pe, int num_apps, - struct app_iteration_predictor ** iter_pred //!< pointer to save application iteration predictor. Caller must free it + struct app_iteration_predictor ** iter_pred ) { char num_iters_str[MAX_NAME_LENGTH]; num_iters_str[0] = '\0'; @@ -267,6 +269,9 @@ void application_surrogate_configure( } else { master_printf(" Director - mode: every-n-nanoseconds, call_every_ns: %e\n", app_dir_config.call_every_ns); } + if (is_network_surrogate_configured) { + master_printf(" The network director has been replaced by the application director. The application director will trigger the network surrogate on and off.\n"); + } master_printf("\n"); } From 8e9521bc8961e1eeb58cce57137dd685ddaae4ca Mon Sep 17 00:00:00 2001 From: helq Date: Thu, 12 Jun 2025 17:10:29 -0400 Subject: [PATCH 073/110] Wrap dummy event logic with compile-time flag for simulation reproducibility and determinism --- src/networks/model-net/dragonfly-dally.C | 27 +++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index cc0a4c85..c4bc8331 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -76,6 +76,9 @@ #define LP_CONFIG_NM_ROUT (model_net_lp_config_names[DRAGONFLY_DALLY_ROUTER]) #define LP_METHOD_NM_ROUT (model_net_method_names[DRAGONFLY_DALLY_ROUTER]) +// If we have configured the network surrogate, then we will collect packet delay data, which is done via the scheduling of an event. This additional event will shift the random generator and thus the same model will behave differently from the start when compared with the one where the surrogate is not setup. If one wants to test both scenarios (with and without the surrogate) and maintain determinism in high-fidelity, one has to enable this option +#define ALWAYS_DETERMINISTIC_NETWORK 0 + /* handles terminal and router events like packet generate/send/receive/buffer */ typedef struct terminal_state terminal_state; typedef struct router_state router_state; @@ -368,7 +371,9 @@ enum event_t R_SNAPSHOT, //used for timed statistic outputs T_NOTIFY, // used to notify a source or destination terminal about packets status (useful for informing about latency, zombie packet or delete a zombie packet) T_ARRIVE_PREDICTED, // this event is generated by a latency predictor instead of traversing the network +#if ALWAYS_DETERMINISTIC_NETWORK T_VACUOUS_EVENT, // nothing happens with this event, it's just ment to be a dummy event that allows us to keep the number of events produced in a simulation the same regardless of whether packet latency is activated (can be safely removed) +#endif /* ALWAYS_DETERMINISTIC_NETWORK */ }; // Types of notifications between terminals @@ -3472,8 +3477,10 @@ static void terminal_dally_commit(terminal_state * s, } break; +#if ALWAYS_DETERMINISTIC_NETWORK case T_VACUOUS_EVENT: break; +#endif /* ALWAYS_DETERMINISTIC_NETWORK */ default: printf("\n LP %d Terminal message type not supported %d ", (int)lp->gid, msg->type); @@ -4904,6 +4911,7 @@ static void process_terminal_notification_event(terminal_state * s, tw_bf * bf, } } +#if ALWAYS_DETERMINISTIC_NETWORK // This function triggers an event that is completely ignored when processed later. The number of events produced by a terminal/router DOES alter the simulation results. (The number of events processed by an LP shouldn't be a parameter to the simulation itself, but it is weirdly). static void vacuous_msg_to_itself(terminal_state * s, terminal_dally_message * msg, tw_lp * lp) { @@ -4917,6 +4925,7 @@ static void vacuous_msg_to_itself(terminal_state * s, terminal_dally_message * m new_msg->magic = terminal_magic_num; tw_event_send(e); } +#endif /* ALWAYS_DETERMINISTIC_NETWORK */ //used by packet_arrive() static void send_remote_event(terminal_state * s, terminal_dally_message * msg, tw_lp * lp, tw_bf * bf, char * event_data, int remote_event_size) @@ -5495,12 +5504,14 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message //printf("Good day sir, not a zombie! LPID=%d packet_ID = %d dfdally_src_terminal_id = %d\n", lp->gid, msg->packet_ID, msg->dfdally_src_terminal_id); if (packet_latency_f || dally_surrogate_configured) { notify_src_lp_on_total_latency(lp, msg); - //} else { - // // This vacuous msg is necessary just to keep simulations with and without the latency notification the same. Notifying the latency does not impact - // // the simulation (unless the data is fed to a predictor, later to be used). If the latency notification is deactivated, the simulation will produce - // // the same number of events (a bit wasteful), a parameter that model-net or dragonfly-dally for some reason use :S - // vacuous_msg_to_itself(s, msg, lp); +#if ALWAYS_DETERMINISTIC_NETWORK + } else { + // This vacuous msg is necessary just to keep simulations with and without the latency notification the same. Notifying the latency does not impact + // the simulation (unless the data is fed to a predictor, later to be used). If the latency notification is deactivated, the simulation will produce + // the same number of events (a bit wasteful), a parameter that model-net or dragonfly-dally for some reason use :S + vacuous_msg_to_itself(s, msg, lp); } +#endif /* if ALWAYS_DETERMINISTIC_NETWORK */ } // if the message is complete (ie, this `msg` is the last piece of the message) @@ -6893,8 +6904,10 @@ terminal_dally_event( terminal_state * s, process_terminal_notification_event(s, bf, msg, lp); break; +#if ALWAYS_DETERMINISTIC_NETWORK case T_VACUOUS_EVENT: break; +#endif /* ALWAYS_DETERMINISTIC_NETWORK */ default: printf("\n LP %d Terminal message type not supported %d ", (int)lp->gid, msg->type); @@ -7004,8 +7017,10 @@ static void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, term process_terminal_notification_event_rc(s, bf, msg, lp); break; +#if ALWAYS_DETERMINISTIC_NETWORK case T_VACUOUS_EVENT: break; +#endif /* ALWAYS_DETERMINISTIC_NETWORK */ default: tw_error(TW_LOC, "\n Invalid terminal event type %d ", msg->type); @@ -8120,7 +8135,9 @@ char const * const string_event_t(enum event_t type) { case R_SNAPSHOT: return "R_SNAPSHOT"; case T_NOTIFY: return "T_NOTIFY"; case T_ARRIVE_PREDICTED: return "T_ARRIVE_PREDICTED"; +#if ALWAYS_DETERMINISTIC_NETWORK case T_VACUOUS_EVENT: return "T_VACUOUS_EVENT"; +#endif /* ALWAYS_DETERMINISTIC_NETWORK */ default: return "UNKNOWN TYPE!!"; } } From 10edcecd1e9247488b9f4df0f5e1fdbdcc1b662e Mon Sep 17 00:00:00 2001 From: helq Date: Thu, 12 Jun 2025 17:26:11 -0400 Subject: [PATCH 074/110] Modifying tests. They all pass now! --- doc/example/tutorial-ping-pong-surrogate.conf.in | 2 ++ tests/CMakeLists.txt | 5 ++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/example/tutorial-ping-pong-surrogate.conf.in b/doc/example/tutorial-ping-pong-surrogate.conf.in index 04d2c94f..fd53f4d1 100644 --- a/doc/example/tutorial-ping-pong-surrogate.conf.in +++ b/doc/example/tutorial-ping-pong-surrogate.conf.in @@ -59,6 +59,8 @@ PARAMS router_buffer_snapshots=( ${BUFFER_SNAPSHOTS} ); } NETWORK_SURROGATE { + enable="1"; # Options: 0 or 1 + # determines the director switching from surrogate to high-def simulation strategy director_mode="at-fixed-virtual-times"; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 238d988f..886dcf59 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -91,9 +91,8 @@ set(test-shell-files example-ping-pong-surrogate-2.sh example-ping-pong-surrogate-3.sh example-ping-pong-no-logging.sh - # These are aspirational unit tests. The switching mechanism is not fully deterministic - #example-ping-pong-surrogate-determinism-1.sh # bug: not all processed events are commited before the switch happens, this might alter the behaviour of the predictor, thus the simulation diverges at switch (no longer deterministic) - #example-ping-pong-surrogate-determinism-2.sh # bug: incoming packets (`T_ARRIVE_PREDICTED` events scheduled by `dragonfly_dally_terminal_highdef_to_surrogate`) might tie some times, the tie is not resolved deterministically + example-ping-pong-surrogate-determinism-1.sh + example-ping-pong-surrogate-determinism-2.sh ) foreach(testname ${test-shell-files}) From 9126863eecc6ced2bd63a24d4b46e591b232f2de Mon Sep 17 00:00:00 2001 From: helq Date: Fri, 13 Jun 2025 09:25:22 -0400 Subject: [PATCH 075/110] Adding missing garbage collection and print statement --- src/networks/model-net/dragonfly-dally.C | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index c4bc8331..d9345c8f 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -5711,6 +5711,7 @@ static void dragonfly_dally_terminal_final( terminal_state * s, qhash_finalize(s->rank_tbl); rc_stack_destroy(s->st); + rc_stack_destroy(s->cc_st); //TODO FREE THESE CORRECTLY for(int i = 0; i < s->params->num_rails; i++) { @@ -5778,6 +5779,7 @@ void dragonfly_dally_router_final(router_state * s, tw_lp * lp){ fclose(dragonfly_rtr_bw_log); rc_stack_destroy(s->st); + rc_stack_destroy(s->cc_st); const dragonfly_param *p = s->params; int written = 0; @@ -6868,6 +6870,7 @@ terminal_dally_event( terminal_state * s, } } else { rc_stack_gc(lp, s->st); + rc_stack_gc(lp, s->cc_st); } switch(msg->type) { @@ -6924,6 +6927,7 @@ static void router_dally_event(router_state * s, tw_bf * bf, terminal_dally_mess s->fwd_events++; s->ross_rsample.fwd_events++; rc_stack_gc(lp, s->st); + rc_stack_gc(lp, s->cc_st); msg->last_received_time = s->last_time; s->last_time = tw_now(lp); @@ -8270,6 +8274,7 @@ void print_terminal_dally_message(FILE * out, char const * prefix, void * s, str fprintf(out, "%s | saved_fin_chunks_ross = %g\n", prefix, msg->saved_fin_chunks_ross); fprintf(out, "%s | saved_last_in_queue_time = %g\n", prefix, msg->saved_last_in_queue_time); fprintf(out, "%s | saved_next_packet_delay = %g\n", prefix, msg->saved_next_packet_delay); + fprintf(out, "%s | saved_processing_time = %g\n", prefix, msg->saved_processing_time); fprintf(out, "%s | msg_new_mn_event = %g\n", prefix, msg->msg_new_mn_event); fprintf(out, "%s | last_received_time = %g\n", prefix, msg->last_received_time); fprintf(out, "%s | last_sent_time = %g\n", prefix, msg->last_sent_time); From cd766b14813f487301f83c70cbbae40da00cec0b Mon Sep 17 00:00:00 2001 From: helq Date: Fri, 13 Jun 2025 11:20:46 -0400 Subject: [PATCH 076/110] Refactoring some of the common values between surrogates --- codes/surrogate/application-surrogate.h | 2 + codes/surrogate/init.h | 24 +++--- codes/surrogate/network-surrogate.h | 25 +++---- doc/example/tutorial-synthetic-ping-pong.c | 1 + src/network-workloads/model-net-mpi-replay.c | 2 +- src/networks/model-net/dragonfly-dally.C | 5 +- src/surrogate/application-surrogate.c | 20 +++-- src/surrogate/init.c | 73 ++++++++++++------- src/surrogate/network-surrogate.c | 66 +++++++++++------ .../packet-latency-predictor/average.c | 15 ++-- 10 files changed, 136 insertions(+), 97 deletions(-) diff --git a/codes/surrogate/application-surrogate.h b/codes/surrogate/application-surrogate.h index cfda05bf..9fded3db 100644 --- a/codes/surrogate/application-surrogate.h +++ b/codes/surrogate/application-surrogate.h @@ -34,6 +34,8 @@ struct application_director_config { // Main function responsible for switching between high-fidelity and (application iteration) surrogate void application_director_configure(struct application_director_config *, struct app_iteration_predictor *); +void application_director_finalize(void); + #ifdef __cplusplus } #endif diff --git a/codes/surrogate/init.h b/codes/surrogate/init.h index 28e90a8d..f095e29c 100644 --- a/codes/surrogate/init.h +++ b/codes/surrogate/init.h @@ -11,9 +11,6 @@ #include "codes/surrogate/app-iteration-predictor/common.h" #include "codes/surrogate/network-surrogate.h" -// A simple macro to clarify code a bit -#define PRINTF_ONCE(...) if (g_tw_mynode == 0) { fprintf(stderr, __VA_ARGS__); } - // Basic level of debugging is 1. It should be always turned on // because it tells us when a switch to or from surrogate-mode happened. // It can be deactivated (set to 0) if it ends up being too obnoxious @@ -31,31 +28,28 @@ extern "C" { * Variable definitions */ -void print_surrogate_stats(void); +// Time spent switching from high-fidelity to surrogate and viceversa +extern double surrogate_switching_time; +// Total time spent in surrogate mode (between switches) +extern double time_in_surrogate; +// Time at which we transitioned into surrogate (zero means that we are in high-fidelity) +extern double surrogate_time_last; -struct network_surrogate_config { - struct director_data director; //!< functionality needed by the director to switch back and forth from model-level surrogate-mode to (vanilla) high-definition simulation - int total_terminals; //!< total number of terminals - size_t n_lp_types; - struct lp_types_switch lp_types[MAX_LP_TYPES]; -}; +void print_surrogate_stats(void); /** Loads surrogate configuration, including packet latency predictor. */ -void network_surrogate_configure( +bool network_surrogate_configure( char const * const annotation, struct network_surrogate_config * const config, struct packet_latency_predictor ** pl_pred //!< pointer to save packet latency predictor. Caller does not need to free pointer ); -extern struct network_surrogate_config net_surr_config; -extern bool is_network_surrogate_configured; - void application_surrogate_configure( int num_terminals_on_pe, int num_apps, struct app_iteration_predictor ** iter_pred //!< pointer to save application iteration predictor. No need to free pointer ); -void free_application_surrogate(void); +void surrogates_finalize(void); #ifdef __cplusplus } diff --git a/codes/surrogate/network-surrogate.h b/codes/surrogate/network-surrogate.h index 2f86ac21..a550464d 100644 --- a/codes/surrogate/network-surrogate.h +++ b/codes/surrogate/network-surrogate.h @@ -16,22 +16,11 @@ extern "C" { #endif -// Time spent switching from high-fidelity to surrogate and viceversa -extern double surrogate_switching_time; -// Total time spent in surrogate mode (between switches) -extern double time_in_surrogate; - -// When true (below), the network state will be frozen at switch time (from -// high-def to surrogate) and later reanimated on the switch back (from -// surrogate to high-def). If not, all events will be kept in the network while -// on surrogate mode, which means that the network will vacate completely -extern bool freeze_network_on_switch; - // Functions that director should have access to typedef void (*switch_surrogate_f) (void); // Switches back and forth from surrogate mode as defined by network model (e.g, by dragonfly-dally.C) typedef bool (*is_surrogate_on_f) (void); // Switches back and forth from surrogate mode as defined by network model (e.g, by dragonfly-dally.C) -struct director_data { +struct network_model_surrogate { switch_surrogate_f switch_surrogate; // this function switches the model to and from surrogate-mode on a PE basis. It has to be called on all PEs to switch the entire simulation to its surrogate version is_surrogate_on_f is_surrogate_on; // determines if the model has switched or not }; @@ -60,14 +49,20 @@ struct switch_at_struct { double * time_stampts; // list of precise timestamps at which to switch }; -extern struct switch_at_struct switch_network_at; +struct network_surrogate_config { + struct network_model_surrogate model; //!< functionality needed by the director to switch the model back and forth from high-fidelity to surrogate + int total_terminals; //!< total number of terminals + size_t n_lp_types; + struct lp_types_switch lp_types[MAX_LP_TYPES]; +}; -// Main function responsible for switching between high-fidelity and (network) surrogate -void network_director(tw_pe * pe); +void network_director_configure(struct network_surrogate_config *, struct switch_at_struct * switch_network_at, bool freeze_network_on_switch); // Function for application director to use network freezing machinery void surrogate_switch_network_model(tw_pe * pe); +void network_director_finalize(void); + #ifdef __cplusplus } #endif diff --git a/doc/example/tutorial-synthetic-ping-pong.c b/doc/example/tutorial-synthetic-ping-pong.c index 1aaf0528..e25ce7bc 100644 --- a/doc/example/tutorial-synthetic-ping-pong.c +++ b/doc/example/tutorial-synthetic-ping-pong.c @@ -356,6 +356,7 @@ int main(int argc, char **argv) model_net_report_stats(net_id); // Printing some stats + surrogates_finalize(); print_surrogate_stats(); tw_end(); diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 0c3d9ed1..fd28775f 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -4357,8 +4357,8 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) if(alloc_spec) codes_jobmap_destroy(jobmap_ctx); + surrogates_finalize(); print_surrogate_stats(); - free_application_surrogate(); #ifdef USE_RDAMARIS } // end if(g_st_ross_rank) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index d9345c8f..102a2de7 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -194,6 +194,7 @@ static void setup_packet_latency_path(char const * const dir_to_save); // static bool dally_surrogate_configured = false; static bool is_dally_surrogate_on = false; +static bool freeze_network_on_switch = false; static struct packet_latency_predictor * terminal_predictor = NULL; static void switch_surrogate(void); static bool is_surrogate_on_fun(void); @@ -2449,7 +2450,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params) // if surrogate mode has been set up if (enable_network_surrogate) { struct network_surrogate_config surr_conf = { - .director = {.switch_surrogate = switch_surrogate, .is_surrogate_on = is_surrogate_on_fun}, + .model = {.switch_surrogate = switch_surrogate, .is_surrogate_on = is_surrogate_on_fun}, .total_terminals = p->total_terminals, .n_lp_types = 2, .lp_types = { @@ -2472,7 +2473,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params) 0 } }; - network_surrogate_configure(anno, &surr_conf, &terminal_predictor); + freeze_network_on_switch = network_surrogate_configure(anno, &surr_conf, &terminal_predictor); if (terminal_predictor) { dally_surrogate_configured = true; } else { diff --git a/src/surrogate/application-surrogate.c b/src/surrogate/application-surrogate.c index 4e848e23..870794b6 100644 --- a/src/surrogate/application-surrogate.c +++ b/src/surrogate/application-surrogate.c @@ -1,6 +1,7 @@ #include "surrogate/application-surrogate.h" #include #include "surrogate/network-surrogate.h" +#include "surrogate/init.h" static struct app_iteration_predictor * iter_predictor; static struct application_director_config conf = { @@ -38,13 +39,12 @@ static void application_director_pre_switch(tw_pe * pe) { tw_trigger_gvt_hook_at(restarting_at + 1); // + 1 to force director to run right after we have fully fast-forward master_printf("Triggering switch to application iteration surrogate mode at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe)); - // TODO: Fix network surrogate (it's buggy) and enable this code - // Freeze network events if configured if (conf.use_network_surrogate) { - master_printf("Switching on network surrogate\n"); + master_printf("Switching network surrogate on\n"); surrogate_switch_network_model(pe); } + surrogate_time_last = tw_clock_read(); director_state = POST_JUMP_switched; break; @@ -63,24 +63,29 @@ static void application_director_post_switch(tw_pe * pe) { tw_trigger_gvt_hook_every(conf.every_n_gvt); } + double const start = tw_clock_read(); iter_predictor->director.reset(); + double const end = tw_clock_read(); + surrogate_switching_time += end - start; if (director_state == POST_JUMP_switched) { master_printf("Back to full high-fidelity application iteration mode at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe)); - // Unfreeze network events if they were frozen if (conf.use_network_surrogate) { - master_printf("Switching off network surrogate\n"); + master_printf("Switching network surrogate off\n"); surrogate_switch_network_model(pe); // TODO: reset network predictors and ask not to gather any data for 1 ms } + + time_in_surrogate += start - surrogate_time_last; + surrogate_time_last = 0.0; } else { master_printf("Resetting predictor at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe)); } director_state = PRE_JUMP; } -void application_director(tw_pe * pe) { +static void application_director(tw_pe * pe) { // Director is not called if the simulation has ended if (gvt_for(pe) >= g_tw_ts_end) { return; @@ -107,3 +112,6 @@ void application_director_configure(struct application_director_config * conf_, tw_trigger_gvt_hook_at(conf.call_every_ns); } } + +void application_director_finalize(void) { +} diff --git a/src/surrogate/init.c b/src/surrogate/init.c index 67ab25ee..2e93ed75 100644 --- a/src/surrogate/init.c +++ b/src/surrogate/init.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #ifdef USE_TORCH @@ -9,16 +10,21 @@ #define master_printf(...) if (g_tw_mynode == 0) { printf(__VA_ARGS__); } -bool freeze_network_on_switch = true; -struct network_surrogate_config net_surr_config = {0}; -bool is_network_surrogate_configured = false; -struct switch_at_struct switch_network_at; +// Shared times across network and application surrogates +double surrogate_switching_time = 0.0; +double time_in_surrogate = 0.0; +double surrogate_time_last = 0.0; + +static bool network_director_enabled = false; +static bool is_network_surrogate_configured = false; +static bool is_app_surrogate_configured = false; static struct packet_latency_predictor current_net_predictor = {0}; static struct app_iteration_predictor current_iter_predictor = {0}; // === Stats! void print_surrogate_stats(void) { + // Computing the time in surrogate only makes sense if we can switch the whole simulation all at once (like the network simulation does), and it doesn't work with the application surrogate as this doesn't switch the state of the simulation all at once if(is_network_surrogate_configured && g_tw_mynode == 0) { printf("\nTotal time spent on surrogate-mode: %.4f\n", (double) time_in_surrogate / g_tw_clock_rate); printf("Total time spent on switching from and to surrogate-mode: %.4f\n", (double) surrogate_switching_time / g_tw_clock_rate); @@ -28,7 +34,7 @@ void print_surrogate_stats(void) { // === All things Surrogate Configuration -void network_surrogate_configure( +bool network_surrogate_configure( char const * const anno, struct network_surrogate_config * const sc, struct packet_latency_predictor ** pl_pred @@ -37,21 +43,21 @@ void network_surrogate_configure( assert(0 < sc->n_lp_types && sc->n_lp_types <= MAX_LP_TYPES); is_network_surrogate_configured = true; - // This is the only place where the director data should be loaded and set up - net_surr_config = *sc; + struct switch_at_struct switch_network_at; // Determining which director mode to set up char director_mode[MAX_NAME_LENGTH]; director_mode[0] = '\0'; configuration_get_value(&config, "NETWORK_SURROGATE", "director_mode", anno, director_mode, MAX_NAME_LENGTH); if (strcmp(director_mode, "at-fixed-virtual-times") == 0) { - PRINTF_ONCE("\nNetwork surrogate activated switching at fixed virtual times: "); + master_printf("\nNetwork surrogate activated switching at fixed virtual times: "); // Loading timestamps char **timestamps; size_t len; configuration_get_multivalue(&config, "NETWORK_SURROGATE", "fixed_switch_timestamps", anno, ×tamps, &len); + network_director_enabled = true; switch_network_at.current_i = 0; switch_network_at.total = len; switch_network_at.time_stampts = malloc(len * sizeof(double)); @@ -63,20 +69,17 @@ void network_surrogate_configure( tw_error(TW_LOC, "Sequence `%s' could not be succesfully interpreted as a _double_.", timestamps[i]); } - PRINTF_ONCE("%g%s", switch_network_at.time_stampts[i], i == len-1 ? "" : ", "); + master_printf("%g%s", switch_network_at.time_stampts[i], i == len-1 ? "" : ", "); } - PRINTF_ONCE("\n"); - - // Injecting into ROSS the function to be called at GVT and the instant in time to trigger GVT - g_tw_gvt_hook = network_director; - - tw_trigger_gvt_hook_at(switch_network_at.time_stampts[0]); + master_printf("\n"); // freeing timestamps before it dissapears for (size_t i = 0; i < len; i++) { free(timestamps[i]); } free(timestamps); + } else if (strcmp(director_mode, "delegate-to-app-director") == 0) { + master_printf("\nNetwork surrogate enabled but director won't run. Network surrogate will be triggered by app director if present\n"); } else { tw_error(TW_LOC, "Unknown director mode `%s`", director_mode); } @@ -87,7 +90,7 @@ void network_surrogate_configure( configuration_get_value(&config, "NETWORK_SURROGATE", "packet_latency_predictor", anno, latency_pred_name, MAX_NAME_LENGTH); if (*latency_pred_name) { if (strcmp(latency_pred_name, "average") == 0) { - current_net_predictor = average_latency_predictor(net_surr_config.total_terminals); + current_net_predictor = average_latency_predictor(sc->total_terminals); *pl_pred = ¤t_net_predictor; #ifdef USE_TORCH @@ -116,20 +119,21 @@ void network_surrogate_configure( ")", latency_pred_name); } } else { - current_net_predictor = average_latency_predictor(net_surr_config.total_terminals); + current_net_predictor = average_latency_predictor(sc->total_terminals); *pl_pred = ¤t_net_predictor; - PRINTF_ONCE("Enabling average packet latency predictor (default behaviour)\n"); + master_printf("Enabling average packet latency predictor (default behaviour)\n"); } // Finding out whether to ignore some packet latencies int rc = configuration_get_value_double(&config, "NETWORK_SURROGATE", "ignore_until", anno, &ignore_until); if (rc) { ignore_until = -1; // any negative number disables ignore_until, all packet latencies will be considered - PRINTF_ONCE("`ignore_until` disabled (all packet latencies will be used in training the predictor)\n"); + master_printf("`ignore_until` disabled (all packet latencies will be used in training the predictor)\n"); } else { - PRINTF_ONCE("ignore_until=%g a packet delievered before this time stamp will not be used in training any predictor\n", ignore_until); + master_printf("ignore_until=%g a packet delievered before this time stamp will not be used in training any predictor\n", ignore_until); } + bool freeze_network_on_switch = true; // Determining which predictor to set up and return char network_treatment_name[MAX_NAME_LENGTH]; network_treatment_name[0] = '\0'; @@ -137,22 +141,26 @@ void network_surrogate_configure( if (*network_treatment_name) { if (strcmp(network_treatment_name, "freeze") == 0) { freeze_network_on_switch = true; - PRINTF_ONCE("The network will be frozen on switch to surrogate\n"); + master_printf("The network will be frozen on switch to surrogate\n"); } else if (strcmp(network_treatment_name, "nothing") == 0) { freeze_network_on_switch = false; - PRINTF_ONCE("The network will be left alone on switch to surrogate (it will run on the background until it empties by itself)\n"); + master_printf("The network will be left alone on switch to surrogate (it will run on the background until it empties by itself)\n"); } else { tw_error(TW_LOC, "Unknown network treatment `%s` (possibilities include: frezee or nothing)", network_treatment_name); } } else { freeze_network_on_switch = true; - PRINTF_ONCE("The network will be frozen on switch to surrogate (default behaviour)\n"); + master_printf("The network will be frozen on switch to surrogate (default behaviour)\n"); } + network_director_configure(sc, network_director_enabled ? &switch_network_at: NULL, freeze_network_on_switch); + //surr_config.director.switch_surrogate(); if (DEBUG_DIRECTOR && g_tw_mynode == 0) { - fprintf(stderr, "Simulation starting on %s mode\n", net_surr_config.director.is_surrogate_on() ? "surrogate" : "high-fidelity"); + fprintf(stderr, "Simulation starting on network %s mode\n", sc->model.is_surrogate_on() ? "surrogate" : "high-fidelity"); } + + return freeze_network_on_switch; } static int load_and_validate_int_param(const char* param_name, int default_value) { @@ -258,6 +266,7 @@ void application_surrogate_configure( current_iter_predictor = avg_app_iteration_predictor(&predictor_config); application_director_configure(&app_dir_config, ¤t_iter_predictor); *iter_pred = ¤t_iter_predictor; + is_app_surrogate_configured = true; // Printing configuration summary master_printf("\nApplication surrogate configuration:\n"); @@ -269,13 +278,23 @@ void application_surrogate_configure( } else { master_printf(" Director - mode: every-n-nanoseconds, call_every_ns: %e\n", app_dir_config.call_every_ns); } - if (is_network_surrogate_configured) { + if (network_director_enabled) { master_printf(" The network director has been replaced by the application director. The application director will trigger the network surrogate on and off.\n"); } master_printf("\n"); } -void free_application_surrogate(void) { - free_avg_app_iteration_predictor(); +void surrogates_finalize(void) { + // TODO (helq): check that we are in fact still in surrogate (either network or application) + if (surrogate_time_last > 0) { // we likely didn't transitioned back from surrogate mode + time_in_surrogate += tw_clock_read() - surrogate_time_last; + } + if (is_network_surrogate_configured) { + network_director_finalize(); + } + if (is_app_surrogate_configured) { + application_director_finalize(); + free_avg_app_iteration_predictor(); + } } // === END OF All things Surrogate Configuration diff --git a/src/surrogate/network-surrogate.c b/src/surrogate/network-surrogate.c index 13fccb37..230c6ade 100644 --- a/src/surrogate/network-surrogate.c +++ b/src/surrogate/network-surrogate.c @@ -4,9 +4,11 @@ #include #include -double surrogate_switching_time = 0.0; -double time_in_surrogate = 0.0; -static double surrogate_time_last = 0.0; +static bool is_network_surrogate_configured = false; +static struct switch_at_struct switch_network_at = {0}; +static struct network_surrogate_config net_surr_config = {0}; +static bool freeze_network_on_switch = false; +static bool network_director_enabled = false; // === Frozen events system for separate queue approach static tw_event *frozen_events_head = NULL; // Head of frozen events linked list @@ -313,14 +315,14 @@ static void switch_model(tw_pe * pe) { if (g_tw_synchronization_protocol == OPTIMISTIC) { tw_scheduler_rollback_and_cancel_events_pe(pe); } - net_surr_config.director.switch_surrogate(); + net_surr_config.model.switch_surrogate(); if (DEBUG_DIRECTOR && g_tw_mynode == 0) { - printf("Switching to %s\n", net_surr_config.director.is_surrogate_on() ? "surrogate" : "high-fidelity"); + printf("Switching to network %s\n", net_surr_config.model.is_surrogate_on() ? "surrogate" : "high-fidelity"); } // "Freezing" network events and activating LP's switch functions if (freeze_network_on_switch) { - if (net_surr_config.director.is_surrogate_on()) { + if (net_surr_config.model.is_surrogate_on()) { model_net_method_switch_to_surrogate(); events_high_def_to_surrogate_switch(pe); } else { @@ -333,6 +335,7 @@ static void switch_model(tw_pe * pe) { void network_director(tw_pe * pe) { assert(is_network_surrogate_configured); + assert(network_director_enabled); #ifdef USE_RAND_TIEBREAKER tw_stime gvt = pe->GVT_sig.recv_ts; @@ -348,7 +351,7 @@ void network_director(tw_pe * pe) { } if (DEBUG_DIRECTOR == 3) { printf("GVT %d at %f in %s\n", i++, gvt, - net_surr_config.director.is_surrogate_on() ? "surrogate-mode" : "high-definition"); + net_surr_config.model.is_surrogate_on() ? "surrogate-mode" : "high-definition"); } } @@ -361,22 +364,18 @@ void network_director(tw_pe * pe) { // Do not process if the simulation ended if (gvt >= g_tw_ts_end) { - // If the simulation ended and the surrogate is still on, stop timer checking surrogate time - if (net_surr_config.director.is_surrogate_on()) { - time_in_surrogate += tw_clock_read() - surrogate_time_last; - } return; } // ---- Past this means that we are in fact switching ---- - bool const pre_switch_status = net_surr_config.director.is_surrogate_on(); + bool const pre_switch_status = net_surr_config.model.is_surrogate_on(); // Asking the director/model to switch if (DEBUG_DIRECTOR && g_tw_mynode == 0) { if (DEBUG_DIRECTOR == 2) { printf("\n"); } - printf("Switching at %f\n", gvt); + printf("Switching network at %f\n", gvt); } double const start = tw_clock_read(); @@ -391,28 +390,49 @@ void network_director(tw_pe * pe) { } if (DEBUG_DIRECTOR == 1 && g_tw_mynode == 0) { - printf("Switch completed!\n"); + printf("Network switch completed!\n"); } if (DEBUG_DIRECTOR > 1) { printf("PE %lu: Switch completed!\n", g_tw_mynode); } // Determining time in surrogate - if (pre_switch_status != net_surr_config.director.is_surrogate_on()) { - if (net_surr_config.director.is_surrogate_on()) { - // Start tracking time spent in surrogate mode - surrogate_time_last = end; - } else { - // We are done tracking time spent in surrogate mode - time_in_surrogate += start - surrogate_time_last; - } + if (net_surr_config.model.is_surrogate_on()) { + // Start tracking time spent in surrogate mode + surrogate_time_last = end; + } else { + // We are done tracking time spent in surrogate mode + time_in_surrogate += start - surrogate_time_last; + surrogate_time_last = 0.0; + } +} + +void network_director_configure(struct network_surrogate_config * sc, struct switch_at_struct * switch_network_at_, bool fnos) { + is_network_surrogate_configured = true; + // Injecting into ROSS the function to be called at GVT + if (switch_network_at_) { + network_director_enabled = true; + g_tw_gvt_hook = network_director; + switch_network_at = *switch_network_at_; + tw_trigger_gvt_hook_at(switch_network_at.time_stampts[0]); } + net_surr_config = *sc; + freeze_network_on_switch = fnos; } -// === Function for application director to use network freezing machinery +void network_director_finalize(void) { + if (network_director_enabled) { + free(switch_network_at.time_stampts); + } +} + +// === Function for application director to use switch to surrogate machinery void surrogate_switch_network_model(tw_pe * pe) { // Simply expose the existing switch_model function for use by application director + double const start = tw_clock_read(); switch_model(pe); + double const end = tw_clock_read(); + surrogate_switching_time += end - start; } // // === END OF Director functionality diff --git a/src/surrogate/packet-latency-predictor/average.c b/src/surrogate/packet-latency-predictor/average.c index 2b8af6ea..c6553563 100644 --- a/src/surrogate/packet-latency-predictor/average.c +++ b/src/surrogate/packet-latency-predictor/average.c @@ -1,7 +1,8 @@ -#include #include +#include double ignore_until = 0; +static int num_terminals = 0; // === Average packet latency functionality @@ -14,22 +15,18 @@ struct aggregated_latency_one_terminal { struct latency_surrogate { struct aggregated_latency_one_terminal aggregated_next_packet_delay; struct aggregated_latency_one_terminal aggregated_latency_for_all; - unsigned int num_terminals; struct aggregated_latency_one_terminal aggregated_latency[]; }; static void init_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal) { (void) lp; (void) src_terminal; - assert(data->num_terminals == 0); assert(data->aggregated_latency_for_all.sum_latency == 0); assert(data->aggregated_latency_for_all.total_msgs == 0); assert(data->aggregated_latency[0].sum_latency == 0); assert(data->aggregated_latency[0].total_msgs == 0); assert(data->aggregated_next_packet_delay.total_msgs == 0); assert(data->aggregated_next_packet_delay.sum_latency == 0); - - data->num_terminals = net_surr_config.total_terminals; } static void feed_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int src_terminal, struct packet_start const * start, struct packet_end const * end) { @@ -42,7 +39,7 @@ static void feed_pred(struct latency_surrogate * data, tw_lp * lp, unsigned int unsigned int const dest_terminal = start->dfdally_dest_terminal_id; double const latency = end->travel_end_time - start->travel_start_time; - assert(dest_terminal < data->num_terminals); + assert(dest_terminal < num_terminals); assert(end->travel_end_time > start->travel_start_time); // For average latency per terminal @@ -64,7 +61,7 @@ static struct packet_end predict_latency(struct latency_surrogate * data, tw_lp (void) lp; unsigned int const dest_terminal = packet_dest->dfdally_dest_terminal_id; - assert(dest_terminal < data->num_terminals); + assert(dest_terminal < num_terminals); unsigned int const total_total_datapoints = data->aggregated_latency_for_all.total_msgs; if (total_total_datapoints == 0) { @@ -102,7 +99,9 @@ static void predict_latency_rc(struct latency_surrogate * data, tw_lp * lp) { } -struct packet_latency_predictor average_latency_predictor(int num_terminals) { +struct packet_latency_predictor average_latency_predictor(int num_terminals_) { + assert(num_terminals_ >= 0); + num_terminals = num_terminals_; return (struct packet_latency_predictor) { .init = (init_pred_lat_f) init_pred, .feed = (feed_pred_lat_f) feed_pred, From 4969737b762b82987c6ec36edfd4af19b57d28dc Mon Sep 17 00:00:00 2001 From: helq Date: Fri, 13 Jun 2025 11:26:15 -0400 Subject: [PATCH 077/110] Fixing position of bracket --- src/networks/model-net/dragonfly-dally.C | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 102a2de7..99b292ee 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -5511,8 +5511,8 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message // the simulation (unless the data is fed to a predictor, later to be used). If the latency notification is deactivated, the simulation will produce // the same number of events (a bit wasteful), a parameter that model-net or dragonfly-dally for some reason use :S vacuous_msg_to_itself(s, msg, lp); - } #endif /* if ALWAYS_DETERMINISTIC_NETWORK */ + } } // if the message is complete (ie, this `msg` is the last piece of the message) From b4b6362ec2495b8659dc745cfd2eb7bc3583e47d Mon Sep 17 00:00:00 2001 From: helq Date: Fri, 13 Jun 2025 12:39:25 -0400 Subject: [PATCH 078/110] Updating tests --- tests/example-ping-pong-surrogate-1.sh | 2 +- tests/example-ping-pong-surrogate-2.sh | 2 +- tests/example-ping-pong-surrogate-3.sh | 2 +- tests/example-ping-pong-surrogate-determinism-1.sh | 2 +- tests/example-ping-pong-surrogate-determinism-2.sh | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/example-ping-pong-surrogate-1.sh b/tests/example-ping-pong-surrogate-1.sh index 7f3a5f6d..6b9e4b8b 100755 --- a/tests/example-ping-pong-surrogate-1.sh +++ b/tests/example-ping-pong-surrogate-1.sh @@ -42,7 +42,7 @@ err=$? [[ $err -ne 0 ]] && exit $err # Checking that the surrogate switched properly -grep 'Switch completed' model-output-2.txt +grep 'Network switch completed' model-output-2.txt err=$? [[ $err -ne 0 ]] && exit $err diff --git a/tests/example-ping-pong-surrogate-2.sh b/tests/example-ping-pong-surrogate-2.sh index f987bedf..a37d309a 100755 --- a/tests/example-ping-pong-surrogate-2.sh +++ b/tests/example-ping-pong-surrogate-2.sh @@ -42,7 +42,7 @@ err=$? [[ $err -ne 0 ]] && exit $err # Checking that the surrogate switched properly -grep 'Switch completed' model-output-2.txt +grep 'Network switch completed' model-output-2.txt err=$? [[ $err -ne 0 ]] && exit $err diff --git a/tests/example-ping-pong-surrogate-3.sh b/tests/example-ping-pong-surrogate-3.sh index 19212e9e..ca04c245 100755 --- a/tests/example-ping-pong-surrogate-3.sh +++ b/tests/example-ping-pong-surrogate-3.sh @@ -43,7 +43,7 @@ err=$? [[ $err -ne 0 ]] && exit $err # Checking that the surrogate switched properly -grep 'Switch completed' model-output-1.txt +grep 'Network switch completed' model-output-1.txt err=$? [[ $err -ne 0 ]] && exit $err diff --git a/tests/example-ping-pong-surrogate-determinism-1.sh b/tests/example-ping-pong-surrogate-determinism-1.sh index cd219272..5726aa0b 100755 --- a/tests/example-ping-pong-surrogate-determinism-1.sh +++ b/tests/example-ping-pong-surrogate-determinism-1.sh @@ -40,7 +40,7 @@ err=$? [[ $err -ne 0 ]] && exit $err # Checking that the surrogate switched properly -grep 'Switch completed' model-output-1.txt +grep 'Network switch completed' model-output-1.txt err=$? [[ $err -ne 0 ]] && exit $err diff --git a/tests/example-ping-pong-surrogate-determinism-2.sh b/tests/example-ping-pong-surrogate-determinism-2.sh index b86f0dfd..f7908a27 100755 --- a/tests/example-ping-pong-surrogate-determinism-2.sh +++ b/tests/example-ping-pong-surrogate-determinism-2.sh @@ -40,7 +40,7 @@ err=$? [[ $err -ne 0 ]] && exit $err # Checking that the surrogate switched properly -grep 'Switch completed' model-output-1.txt +grep 'Network switch completed' model-output-1.txt err=$? [[ $err -ne 0 ]] && exit $err From 36bc31780342db216a636d592e0cde25f68fe416 Mon Sep 17 00:00:00 2001 From: helq Date: Fri, 13 Jun 2025 16:02:27 -0400 Subject: [PATCH 079/110] Adding tests for UNION --- tests/CMakeLists.txt | 9 + .../conceptual.json | 6 +- .../dfdally-72-par.conf.in | 40 +- .../jacobi_MILC.workload.conf | 0 .../milc_skeleton.json | 2 +- .../rand_node0-1d-72-jacobi_MILC.alloc.conf | 0 tests/run-test.sh.in | 8 + .../union-workload-test-surrogate-parallel.sh | 101 +++++ tests/union-workload-test-surrogate.sh | 101 +++++ tmptest/README.md | 107 ------ tmptest/conf/dfdally-72-inter | Bin 576 -> 0 bytes tmptest/conf/dfdally-72-intra | Bin 144 -> 0 bytes tmptest/expected/iteration-logs | 360 ------------------ .../avg-all-reduce-time | 0 .../dragonfly-cn-stats | 73 ---- .../dragonfly-link-stats | 326 ---------------- .../model-net-category-all | 72 ---- .../model-net-category-high | 72 ---- .../mpi-replay-stats | 73 ---- tmptest/expected/tmptest-jacobiS_MILC.output | 240 ------------ 20 files changed, 260 insertions(+), 1330 deletions(-) rename {tmptest/conf => tests/conf/union-milc-jacobi-workload}/conceptual.json (94%) rename tmptest/conf/dfdally-72-par.conf => tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in (51%) rename tmptest/conf/jacobi_MILC.conf => tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf (100%) rename {tmptest/conf => tests/conf/union-milc-jacobi-workload}/milc_skeleton.json (92%) rename tmptest/conf/rand_node0-1d-72-jacobi_MILC.conf => tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf (100%) create mode 100644 tests/union-workload-test-surrogate-parallel.sh create mode 100644 tests/union-workload-test-surrogate.sh delete mode 100644 tmptest/README.md delete mode 100644 tmptest/conf/dfdally-72-inter delete mode 100644 tmptest/conf/dfdally-72-intra delete mode 100644 tmptest/expected/iteration-logs delete mode 100644 tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/avg-all-reduce-time delete mode 100644 tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-cn-stats delete mode 100644 tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-link-stats delete mode 100644 tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-all delete mode 100644 tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-high delete mode 100644 tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/mpi-replay-stats delete mode 100644 tmptest/expected/tmptest-jacobiS_MILC.output diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 886dcf59..a78e7210 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -95,6 +95,15 @@ set(test-shell-files example-ping-pong-surrogate-determinism-2.sh ) +configure_file(conf/union-milc-jacobi-workload/dfdally-72-par.conf.in conf/union-milc-jacobi-workload/dfdally-72-par.conf.in @ONLY) + +if(USE_UNION) + list(APPEND test-shell-files + union-workload-test-surrogate.sh + union-workload-test-surrogate-parallel.sh + ) +endif() + foreach(testname ${test-shell-files}) add_test(NAME ${testname} COMMAND "${CMAKE_CURRENT_BINARY_DIR}/run-test.sh" "${CMAKE_CURRENT_SOURCE_DIR}/${testname}" diff --git a/tmptest/conf/conceptual.json b/tests/conf/union-milc-jacobi-workload/conceptual.json similarity index 94% rename from tmptest/conf/conceptual.json rename to tests/conf/union-milc-jacobi-workload/conceptual.json index ad786bce..557c0bce 100644 --- a/tmptest/conf/conceptual.json +++ b/tests/conf/union-milc-jacobi-workload/conceptual.json @@ -40,9 +40,9 @@ "100", "100", "100", - "125000", - "5", - "2000000", + "50000", + "39", + "200000", "barrier" ] }, diff --git a/tmptest/conf/dfdally-72-par.conf b/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in similarity index 51% rename from tmptest/conf/dfdally-72-par.conf rename to tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in index 7f4679b6..3b72d00a 100644 --- a/tmptest/conf/dfdally-72-par.conf +++ b/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in @@ -38,16 +38,16 @@ PARAMS # bandwidth in GiB/s for compute node-router channels cn_bandwidth="5.25"; # ROSS message size - message_size="792"; + message_size="840"; # number of compute nodes connected to router, dictated by dragonfly config # file num_cns_per_router="2"; # number of global channels per router num_global_channels="2"; # network config file for intra-group connections - intra-group-connections="/home/helq/Research/HPC/code/kronos/kronos-merge/codes/tmptest/conf/dfdally-72-intra"; + intra-group-connections="@CMAKE_SOURCE_DIR@/src/network-workloads/conf/dragonfly-dally/dfdally-72-intra"; # network config file for inter-group connections - inter-group-connections="/home/helq/Research/HPC/code/kronos/kronos-merge/codes/tmptest/conf/dfdally-72-inter"; + inter-group-connections="@CMAKE_SOURCE_DIR@/src/network-workloads/conf/dragonfly-dally/dfdally-72-inter"; # routing protocol to be used routing="prog-adaptive"; minimal-bias="1"; @@ -62,3 +62,37 @@ PARAMS #offset for app_id: model-net-mpi-replay is 88, synthetic-dfly-plus is 24 offset="144"; } +NETWORK_SURROGATE { + enable="${NETWORK_SURR_ON}"; # either "0" or "1" +# determines the director switching from surrogate to high-def simulation strategy + director_mode="delegate-to-app-director"; + #director_mode="at-fixed-virtual-times"; + +# director configuration for: director_mode == "at-fixed-virtual-times" +# timestamps at which to switch to surrogate-mode and back + fixed_switch_timestamps=( "25.0e6", "400.0e6" ); + +# latency predictor to use. Options: average, torch-jit + packet_latency_predictor="average"; +# some workload models need some time to stabilize, a point where the network behaviour stabilizes. The predictor will ignore all packet latencies that arrive during this period + ignore_until="10.0e6"; + +# parameters for torch-jit latency predictor + torch_jit_mode="single-static-model-for-all-terminals"; + torch_jit_model_path=""; + +# selecting network treatment on switching to surrogate. Options: freeze, nothing + network_treatment_on_switch="nothing"; # freeze is buggy sadly. It freezes more events than it should +} +APPLICATION_SURROGATE { + enable="${APP_SURR_ON}"; # either 0 or 1 + + # Configuring director + director_mode="${APP_DIRECTOR_MODE}"; # Opts: "every-n-gvt", "every-n-nanoseconds" + director_num_gvt="${EVERY_N_GVT}"; + director_num_ns="${EVERY_NSECS}"; # 1^6 means 1ms + + # Configuring predictor + # Minimum number of iterations to collect data from before skipping ahead in the simulation + num_iters_to_collect="2"; +} diff --git a/tmptest/conf/jacobi_MILC.conf b/tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf similarity index 100% rename from tmptest/conf/jacobi_MILC.conf rename to tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf diff --git a/tmptest/conf/milc_skeleton.json b/tests/conf/union-milc-jacobi-workload/milc_skeleton.json similarity index 92% rename from tmptest/conf/milc_skeleton.json rename to tests/conf/union-milc-jacobi-workload/milc_skeleton.json index b3a42297..c43f648f 100644 --- a/tmptest/conf/milc_skeleton.json +++ b/tests/conf/union-milc-jacobi-workload/milc_skeleton.json @@ -4,7 +4,7 @@ "size": 36, "cfg": { "app": "milc", - "iteration_cnt": 5, + "iteration_cnt": 120, "compute_delay": 100, "dimension_cnt": 4, "dimension_sizes": [2,2,3,3], diff --git a/tmptest/conf/rand_node0-1d-72-jacobi_MILC.conf b/tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf similarity index 100% rename from tmptest/conf/rand_node0-1d-72-jacobi_MILC.conf rename to tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf diff --git a/tests/run-test.sh.in b/tests/run-test.sh.in index 81259559..be706268 100755 --- a/tests/run-test.sh.in +++ b/tests/run-test.sh.in @@ -11,6 +11,14 @@ export srcdir="${CMAKE_SOURCE_DIR}" export bindir="${CMAKE_BINARY_DIR}" export GENERATED_USING_CMAKE=1 +# Set Union and SWM install paths if available +if [ -n "${SWM_DATAROOTDIR}" ]; then + export SWM_DATAROOTDIR="${SWM_DATAROOTDIR}" +fi +if [ -n "${UNION_DATAROOTDIR}" ]; then + export UNION_DATAROOTDIR="${UNION_DATAROOTDIR}" +fi + # Creating temporal folder in order to save output without colliding with # some other process that would like to generate the same output mkdir -p testing-output diff --git a/tests/union-workload-test-surrogate-parallel.sh b/tests/union-workload-test-surrogate-parallel.sh new file mode 100644 index 00000000..c16deb8f --- /dev/null +++ b/tests/union-workload-test-surrogate-parallel.sh @@ -0,0 +1,101 @@ +#!/bin/bash + +if [[ -z $bindir ]] ; then + echo bindir variable not set + exit 1 +fi + +if [[ -z $UNION_DATAROOTDIR ]] ; then + echo UNION_DATAROOTDIR variable not set + exit 1 +fi + +if [[ -z $SWM_DATAROOTDIR ]] ; then + echo SWM_DATAROOTDIR variable not set + exit 1 +fi + +np=3 + +expfolder="$PWD" +export CONFIGS_PATH="$srcdir/tests/conf/union-milc-jacobi-workload" + +# Backing up and copying milc json! +tmpdir="$(TMPDIR="$PWD" mktemp -d)" +mv "$SWM_DATAROOTDIR/milc_skeleton.json" "$tmpdir/milc_skeleton.json" +cp "$CONFIGS_PATH/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$UNION_DATAROOTDIR/conceptual.json" "$tmpdir/conceptual.json" +cp "$CONFIGS_PATH/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" + +# Copying configuration files to keep as documentation +cp "$CONFIGS_PATH/milc_skeleton.json" "$expfolder" +cp "$CONFIGS_PATH/conceptual.json" "$expfolder" +cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder" +cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" + +# CODES config file +export PATH_TO_CONNECTIONS="$CONFIGS_PATH" +export NETWORK_SURR_ON=0 +export APP_SURR_ON=1 +export APP_DIRECTOR_MODE=every-n-nanoseconds +#export APP_DIRECTOR_MODE=every-n-gvt +export EVERY_N_GVT=500 +export EVERY_NSECS=1e6 +envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par.conf" + +# running simulation +cons_lookahead=200 +opt_lookahead=600 + +export PATH_TO_CODES_BUILD="$bindir" + +mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \ + --synch=3 \ + --batch=4 --gvt-interval=256 \ + --cons-lookahead=$cons_lookahead \ + --max-opt-lookahead=$opt_lookahead \ + --workload_type=conc-online \ + --lp-io-dir=lp-io-dir \ + --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \ + --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \ + -- "$expfolder/dfdally-72-par.conf" \ + > model-output.txt 2> model-output-error.txt + +err=$? + +# Setting milc json back +mv "$tmpdir/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$tmpdir/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" +rmdir "$tmpdir" + +[[ $err -ne 0 ]] && exit $err + +# Checking that there is actual output +grep 'Net Events Processed' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# Checking both milc and jacobi ran +grep 'MILC: Iteration 119/120' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +grep 'Jacobi3D: Completed 40 iterations' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +grep 'App 0: All non-synthetic workloads have completed' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# it transitioned into surrogacy +grep -e 'application iteration surrogate mode at GVT [0-9]* time' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# it transitioned back to high-fidelity +grep -e 'application iteration mode at GVT [0-9]* time' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +exit 0 diff --git a/tests/union-workload-test-surrogate.sh b/tests/union-workload-test-surrogate.sh new file mode 100644 index 00000000..59c0c067 --- /dev/null +++ b/tests/union-workload-test-surrogate.sh @@ -0,0 +1,101 @@ +#!/bin/bash + +if [[ -z $bindir ]] ; then + echo bindir variable not set + exit 1 +fi + +if [[ -z $UNION_DATAROOTDIR ]] ; then + echo UNION_DATAROOTDIR variable not set + exit 1 +fi + +if [[ -z $SWM_DATAROOTDIR ]] ; then + echo SWM_DATAROOTDIR variable not set + exit 1 +fi + +np=1 + +expfolder="$PWD" +export CONFIGS_PATH="$srcdir/tests/conf/union-milc-jacobi-workload" + +# Backing up and copying milc json! +tmpdir="$(TMPDIR="$PWD" mktemp -d)" +mv "$SWM_DATAROOTDIR/milc_skeleton.json" "$tmpdir/milc_skeleton.json" +cp "$CONFIGS_PATH/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$UNION_DATAROOTDIR/conceptual.json" "$tmpdir/conceptual.json" +cp "$CONFIGS_PATH/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" + +# Copying configuration files to keep as documentation +cp "$CONFIGS_PATH/milc_skeleton.json" "$expfolder" +cp "$CONFIGS_PATH/conceptual.json" "$expfolder" +cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder" +cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" + +# CODES config file +export PATH_TO_CONNECTIONS="$CONFIGS_PATH" +export NETWORK_SURR_ON=0 +export APP_SURR_ON=1 +export APP_DIRECTOR_MODE=every-n-nanoseconds +#export APP_DIRECTOR_MODE=every-n-gvt +export EVERY_N_GVT=500 +export EVERY_NSECS=1e6 +envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par.conf" + +# running simulation +cons_lookahead=200 +opt_lookahead=600 + +export PATH_TO_CODES_BUILD="$bindir" + +mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \ + --synch=1 \ + --batch=4 --gvt-interval=256 \ + --cons-lookahead=$cons_lookahead \ + --max-opt-lookahead=$opt_lookahead \ + --workload_type=conc-online \ + --lp-io-dir=lp-io-dir \ + --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \ + --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \ + -- "$expfolder/dfdally-72-par.conf" \ + > model-output.txt 2> model-output-error.txt + +err=$? + +# Setting milc json back +mv "$tmpdir/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$tmpdir/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" +rmdir "$tmpdir" + +[[ $err -ne 0 ]] && exit $err + +# Checking that there is actual output +grep 'Net Events Processed' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# Checking both milc and jacobi ran +grep 'MILC: Iteration 119/120' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +grep 'Jacobi3D: Completed 40 iterations' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +grep 'App 0: All non-synthetic workloads have completed' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# it transitioned into surrogacy +grep -e 'application iteration surrogate mode at GVT [0-9]* time' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# it transitioned back to high-fidelity +grep -e 'application iteration mode at GVT [0-9]* time' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +exit 0 diff --git a/tmptest/README.md b/tmptest/README.md deleted file mode 100644 index 22a7de5d..00000000 --- a/tmptest/README.md +++ /dev/null @@ -1,107 +0,0 @@ -# Union -Workload Manager for Integration of Conceptual as an Online Workload for CODES - - -# Installation - -### Installing Conceptual (mandatory) - -Download Conceptual at https://ccsweb.lanl.gov/~pakin/software/conceptual/download.html (version 1.5.1 or greater) - -```bash -tar xvf conceptual-1.5.1.tar.gz -cd conceptual-1.5.1 -PYTHON=python2 ./configure --prefix="$(realpath ./install)" LIBS=-lm -make -make install -``` - -### Installing Boost-Python (currently mandatory, we may remove this soon) - -Download boost at http://www.boost.org/users/download/ (version 1.68 or greater) - -```bash -tar xvf boost_1_68_0.tar.gz -cd boost_1_68_0 -./bootstrap.sh --prefix=/path/to/boost/install --with-libraries=python -./b2 install -``` - -### Installing Union -```bash -git clone https://github.com/SPEAR-UIC/Union -cd union -./prepare.sh -./configure --disable-shared --with-conceptual="$(realpath ../conceptual-1.5.1b/install)" --prefix="$(realpath ./install)" CC=mpicc CXX=mpicxx -make -make install -``` - -# Workload Simulation with CODES - -### Installing ROSS - -```bash -git clone https://github.com/carothersc/ROSS.git -mkdir build-ross -cd build-ross -cmake -DCMAKE_INSTALL_PREFIX:path=path/to/ross/install -DCMAKE_C_COMPILER=$(which mpicc) -DCMAKE_CXX_COMPILER=$(which mpicxx) ../ROSS -make install -``` - -### Installing Argobots - -```bash -git clone https://github.com/pmodels/argobots.git -./autogen.sh -./configure --prefix=/path/to/argobots/install -make -make install -``` - -### Installing SWM workloads - -```bash -git clone https://github.com/codes-org/SWM-workloads.git -cd swm -./prepare.sh -./configure --with-boost=/path/to/boost/install --prefix=/path/to/swm/install CC=mpicc CXX=mpicxx -make -make install -``` - -### Installing CODES (kronos-union branch) - -```bash -git clone https://github.com/codes-org/codes.git -cd codes -./prepare.sh -mkdir build -cd build -../configure \ - --disable-shared \ - --with-online=true \ - --with-boost=/path/to/boost/install \ - PKG_CONFIG_PATH=/home/development/kronos/kronos-merge/argobots/build/bin/lib/pkgconfig:/home/development/kronos/kronos-merge/ross/build/bin/lib/pkgconfig:/home/development/kronos/kronos-merge/Union/install/lib/pkgconfig:/home/development/kronos/kronos-merge/swm-workloads/swm/build/bin/lib/pkgconfig \ - --with-union=true \ - --prefix=/path/to/codes/install \ - CC=mpicc CXX=mpicxx - -make -make install -``` - -### Run Test Simulations -The tmptest directory includes all necessary configuration files to run the test simulation. - -Copy milc_skeleton.json to /path/to/swm/install/share/ -Copy conceptual.json to /path/to/union/install/share/ -Change the path for "intra-group-connections" and "intra-group-connections" in dfdally-72-par.conf -Run the following command: - -```bash -/home/helq/Research/HPC/code/kronos/kronos-merge/codes/build/src/model-net-mpi-replay --sync=1 --workload_type=conc-online --lp-io-use-suffix=1 --workload_conf_file="$(realpath ../conf/jacobi_MILC.conf)" --alloc_file="$(realpath ../conf/rand_node0-1d-72-jacobi_MILC.conf)" --lp-io-dir=tmptest-jacobiS_MILC -- "$(realpath ../conf/dfdally-72-par.conf)" > tmptest-jacobiS_MILC.output -``` - - - diff --git a/tmptest/conf/dfdally-72-inter b/tmptest/conf/dfdally-72-inter deleted file mode 100644 index f95b989c64812d8936d00541ad6808c590c2a0dd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 576 zcmXxhSxy5%6hqO107)Q_nR#f){`ZKF)Nbj@{bTvrHO3gr;Cl#H;c3=G^g5hpeTy!_ z=d4roCh*a1xD9;tIot(4`W0kNvqr`$@X=#f2R`~9?gJnF4YGXA+~l8+zJ$lXN56wE zeC;Hwx60fFKKc>%fsg(LU2M`$vU;n`Q{baN!B%|TOqXtzmlBTiZ;RZlRN`-YRnn-jsS9TXk!j>C!E9lGR&f{sV<~3E2Pu diff --git a/tmptest/conf/dfdally-72-intra b/tmptest/conf/dfdally-72-intra deleted file mode 100644 index 37ea2848b53d14494cf91262078ab9fe67ac4d9d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 144 xcmYj|fe8RG2*bMh|Icf{zyxW~G)Yd^xk7W6|39n)>v*FA+ktgV99Rdo{{RVK03`qb diff --git a/tmptest/expected/iteration-logs b/tmptest/expected/iteration-logs deleted file mode 100644 index 83179b2f..00000000 --- a/tmptest/expected/iteration-logs +++ /dev/null @@ -1,360 +0,0 @@ -ITERATION 0 node 53 job 1 rank 34 time 4512686.215045 -ITERATION 0 node 54 job 1 rank 18 time 4513749.276363 -ITERATION 0 node 4 job 1 rank 10 time 4517871.443080 -ITERATION 0 node 56 job 1 rank 25 time 4538165.950297 -ITERATION 0 node 51 job 1 rank 29 time 4540989.136935 -ITERATION 0 node 29 job 1 rank 33 time 4542208.517079 -ITERATION 0 node 64 job 1 rank 9 time 4543495.002481 -ITERATION 0 node 34 job 1 rank 31 time 4545035.368923 -ITERATION 0 node 58 job 1 rank 13 time 4545114.472426 -ITERATION 0 node 59 job 1 rank 15 time 4548339.211403 -ITERATION 0 node 35 job 1 rank 21 time 4554419.528244 -ITERATION 0 node 10 job 1 rank 3 time 4561111.532285 -ITERATION 0 node 60 job 1 rank 35 time 4562346.412949 -ITERATION 0 node 30 job 1 rank 27 time 4564867.979829 -ITERATION 0 node 16 job 1 rank 23 time 4567439.860843 -ITERATION 0 node 31 job 1 rank 2 time 4573504.960624 -ITERATION 0 node 22 job 1 rank 11 time 4574627.826180 -ITERATION 0 node 37 job 1 rank 14 time 4579930.283097 -ITERATION 0 node 27 job 1 rank 30 time 4587175.167871 -ITERATION 0 node 20 job 1 rank 5 time 4589753.028610 -ITERATION 0 node 57 job 1 rank 4 time 4593050.336040 -ITERATION 0 node 17 job 1 rank 32 time 4604738.524840 -ITERATION 0 node 19 job 1 rank 28 time 4607122.579619 -ITERATION 0 node 63 job 1 rank 24 time 4616051.899458 -ITERATION 0 node 44 job 1 rank 22 time 4629534.947693 -ITERATION 0 node 33 job 1 rank 12 time 4630801.551756 -ITERATION 0 node 14 job 1 rank 20 time 4634810.847629 -ITERATION 0 node 43 job 1 rank 17 time 4637426.227568 -ITERATION 0 node 50 job 1 rank 19 time 4645719.045367 -ITERATION 0 node 42 job 1 rank 1 time 4657866.214120 -ITERATION 0 node 0 job 1 rank 0 time 4662904.772657 -ITERATION 0 node 36 job 1 rank 26 time 4669557.114654 -ITERATION 0 node 47 job 1 rank 16 time 4682491.551200 -ITERATION 0 node 40 job 1 rank 7 time 4725442.291027 -ITERATION 0 node 41 job 1 rank 6 time 4726998.347349 -ITERATION 0 node 66 job 1 rank 8 time 4766523.879336 -ITERATION 0 node 67 job 0 rank 11 time 5565984.178337 -ITERATION 0 node 5 job 0 rank 23 time 5575875.812174 -ITERATION 0 node 9 job 0 rank 27 time 5579506.018872 -ITERATION 0 node 62 job 0 rank 31 time 5584465.243288 -ITERATION 0 node 45 job 0 rank 15 time 5618600.555287 -ITERATION 0 node 1 job 0 rank 19 time 5717281.741377 -ITERATION 0 node 70 job 0 rank 7 time 5721216.605169 -ITERATION 0 node 3 job 0 rank 6 time 5727197.597463 -ITERATION 0 node 68 job 0 rank 21 time 5728040.094749 -ITERATION 0 node 26 job 0 rank 29 time 5732165.171144 -ITERATION 0 node 46 job 0 rank 25 time 5736556.257391 -ITERATION 0 node 21 job 0 rank 13 time 5736620.081453 -ITERATION 0 node 2 job 0 rank 3 time 5742325.644168 -ITERATION 0 node 55 job 0 rank 17 time 5749700.425050 -ITERATION 0 node 25 job 0 rank 35 time 5767346.740335 -ITERATION 0 node 38 job 0 rank 2 time 5788871.660345 -ITERATION 0 node 28 job 0 rank 33 time 5811545.306529 -ITERATION 0 node 32 job 0 rank 22 time 5813306.845833 -ITERATION 0 node 49 job 0 rank 20 time 5816982.805562 -ITERATION 0 node 61 job 0 rank 24 time 5820868.818799 -ITERATION 0 node 52 job 0 rank 32 time 5824994.468838 -ITERATION 0 node 7 job 0 rank 28 time 5829521.253890 -ITERATION 0 node 23 job 0 rank 26 time 5830679.689756 -ITERATION 0 node 71 job 0 rank 16 time 5830833.289263 -ITERATION 0 node 18 job 0 rank 5 time 5831517.990821 -ITERATION 0 node 13 job 0 rank 34 time 5836011.317489 -ITERATION 0 node 69 job 0 rank 10 time 5848873.775015 -ITERATION 0 node 39 job 0 rank 8 time 5852519.517052 -ITERATION 0 node 65 job 0 rank 4 time 5853828.568648 -ITERATION 0 node 24 job 0 rank 1 time 5854716.580830 -ITERATION 0 node 12 job 0 rank 0 time 5859659.053022 -ITERATION 0 node 6 job 0 rank 18 time 5863641.542198 -ITERATION 0 node 15 job 0 rank 30 time 5891375.888033 -ITERATION 0 node 8 job 0 rank 14 time 5894381.087555 -ITERATION 0 node 48 job 0 rank 12 time 5895134.332795 -ITERATION 0 node 11 job 0 rank 9 time 5942306.311091 -ITERATION 1 node 66 job 1 rank 8 time 8297180.376270 -ITERATION 1 node 14 job 1 rank 20 time 8300140.917016 -ITERATION 1 node 64 job 1 rank 9 time 8300849.048957 -ITERATION 1 node 19 job 1 rank 28 time 8301305.476039 -ITERATION 1 node 63 job 1 rank 24 time 8301305.711761 -ITERATION 1 node 33 job 1 rank 12 time 8301733.793207 -ITERATION 1 node 10 job 1 rank 3 time 8302129.790711 -ITERATION 1 node 42 job 1 rank 1 time 8302470.511443 -ITERATION 1 node 47 job 1 rank 16 time 8303046.409280 -ITERATION 1 node 35 job 1 rank 21 time 8303817.637378 -ITERATION 1 node 37 job 1 rank 14 time 8303944.501174 -ITERATION 1 node 4 job 1 rank 10 time 8303944.806055 -ITERATION 1 node 43 job 1 rank 17 time 8304146.840707 -ITERATION 1 node 56 job 1 rank 25 time 8304344.924724 -ITERATION 1 node 59 job 1 rank 15 time 8305202.188843 -ITERATION 1 node 20 job 1 rank 5 time 8305346.120632 -ITERATION 1 node 31 job 1 rank 2 time 8305427.098141 -ITERATION 1 node 22 job 1 rank 11 time 8305464.857400 -ITERATION 1 node 16 job 1 rank 23 time 8305465.061184 -ITERATION 1 node 58 job 1 rank 13 time 8305909.329333 -ITERATION 1 node 44 job 1 rank 22 time 8306179.488712 -ITERATION 1 node 40 job 1 rank 7 time 8306815.016916 -ITERATION 1 node 36 job 1 rank 26 time 8307242.113385 -ITERATION 1 node 54 job 1 rank 18 time 8307242.230485 -ITERATION 1 node 0 job 1 rank 0 time 8307509.069980 -ITERATION 1 node 27 job 1 rank 30 time 8308069.836665 -ITERATION 1 node 41 job 1 rank 6 time 8308371.073239 -ITERATION 1 node 51 job 1 rank 29 time 8308510.790046 -ITERATION 1 node 57 job 1 rank 4 time 8308643.428061 -ITERATION 1 node 30 job 1 rank 27 time 8308762.164730 -ITERATION 1 node 17 job 1 rank 32 time 8308912.995878 -ITERATION 1 node 53 job 1 rank 34 time 8309964.008052 -ITERATION 1 node 34 job 1 rank 31 time 8311068.775441 -ITERATION 1 node 50 job 1 rank 19 time 8311331.100457 -ITERATION 1 node 29 job 1 rank 33 time 8311707.229825 -ITERATION 1 node 60 job 1 rank 35 time 8317686.769451 -ITERATION 1 node 8 job 0 rank 14 time 11235088.810240 -ITERATION 1 node 18 job 0 rank 5 time 11235317.411859 -ITERATION 1 node 15 job 0 rank 30 time 11237472.894623 -ITERATION 1 node 24 job 0 rank 1 time 11245995.227219 -ITERATION 1 node 61 job 0 rank 24 time 11248511.493893 -ITERATION 1 node 71 job 0 rank 16 time 11249063.598574 -ITERATION 1 node 2 job 0 rank 3 time 11249988.785889 -ITERATION 1 node 11 job 0 rank 9 time 11250588.161742 -ITERATION 1 node 21 job 0 rank 13 time 11251495.763839 -ITERATION 1 node 49 job 0 rank 20 time 11252879.124696 -ITERATION 1 node 38 job 0 rank 2 time 11253481.695522 -ITERATION 1 node 52 job 0 rank 32 time 11254930.185172 -ITERATION 1 node 26 job 0 rank 29 time 11259777.124063 -ITERATION 1 node 39 job 0 rank 8 time 11266061.281928 -ITERATION 1 node 32 job 0 rank 22 time 11270470.549169 -ITERATION 1 node 68 job 0 rank 21 time 11286125.385978 -ITERATION 1 node 48 job 0 rank 12 time 11294811.908230 -ITERATION 1 node 46 job 0 rank 25 time 11300192.747257 -ITERATION 1 node 12 job 0 rank 0 time 11317107.807555 -ITERATION 1 node 7 job 0 rank 28 time 11322189.096862 -ITERATION 1 node 67 job 0 rank 11 time 11333239.622073 -ITERATION 1 node 70 job 0 rank 7 time 11334381.123429 -ITERATION 1 node 45 job 0 rank 15 time 11339259.471444 -ITERATION 1 node 1 job 0 rank 19 time 11340523.986731 -ITERATION 1 node 62 job 0 rank 31 time 11345350.350138 -ITERATION 1 node 69 job 0 rank 10 time 11351306.613121 -ITERATION 1 node 23 job 0 rank 26 time 11361640.366497 -ITERATION 1 node 25 job 0 rank 35 time 11363595.692666 -ITERATION 1 node 3 job 0 rank 6 time 11372326.537226 -ITERATION 1 node 9 job 0 rank 27 time 11388501.387550 -ITERATION 1 node 6 job 0 rank 18 time 11391110.891033 -ITERATION 1 node 5 job 0 rank 23 time 11392483.556208 -ITERATION 1 node 65 job 0 rank 4 time 11392558.633627 -ITERATION 1 node 13 job 0 rank 34 time 11396149.449470 -ITERATION 1 node 55 job 0 rank 17 time 11417714.983024 -ITERATION 1 node 28 job 0 rank 33 time 11421840.318515 -ITERATION 2 node 58 job 1 rank 13 time 12316899.292981 -ITERATION 2 node 35 job 1 rank 21 time 12316899.576287 -ITERATION 2 node 64 job 1 rank 9 time 12316899.606397 -ITERATION 2 node 10 job 1 rank 3 time 12317127.807697 -ITERATION 2 node 33 job 1 rank 12 time 12320196.737845 -ITERATION 2 node 56 job 1 rank 25 time 12320196.913727 -ITERATION 2 node 59 job 1 rank 15 time 12320197.104361 -ITERATION 2 node 43 job 1 rank 17 time 12320197.230276 -ITERATION 2 node 66 job 1 rank 8 time 12320197.233370 -ITERATION 2 node 40 job 1 rank 7 time 12320425.112677 -ITERATION 2 node 31 job 1 rank 2 time 12320425.115126 -ITERATION 2 node 42 job 1 rank 1 time 12320426.025358 -ITERATION 2 node 51 job 1 rank 29 time 12321024.628471 -ITERATION 2 node 16 job 1 rank 23 time 12321024.717490 -ITERATION 2 node 22 job 1 rank 11 time 12321024.945440 -ITERATION 2 node 14 job 1 rank 20 time 12321025.218551 -ITERATION 2 node 41 job 1 rank 6 time 12321981.169000 -ITERATION 2 node 34 job 1 rank 31 time 12323494.411691 -ITERATION 2 node 0 job 1 rank 0 time 12323723.332787 -ITERATION 2 node 19 job 1 rank 28 time 12324322.073336 -ITERATION 2 node 30 job 1 rank 27 time 12324322.252770 -ITERATION 2 node 47 job 1 rank 16 time 12324322.491049 -ITERATION 2 node 29 job 1 rank 33 time 12324322.565767 -ITERATION 2 node 63 job 1 rank 24 time 12324322.568861 -ITERATION 2 node 50 job 1 rank 19 time 12325150.326534 -ITERATION 2 node 44 job 1 rank 22 time 12326063.258270 -ITERATION 2 node 20 job 1 rank 5 time 12326291.816256 -ITERATION 2 node 4 job 1 rank 10 time 12326891.771792 -ITERATION 2 node 37 job 1 rank 14 time 12326892.126731 -ITERATION 2 node 17 job 1 rank 32 time 12328447.826540 -ITERATION 2 node 60 job 1 rank 35 time 12329275.662024 -ITERATION 2 node 57 job 1 rank 4 time 12329589.123686 -ITERATION 2 node 54 job 1 rank 18 time 12330188.984627 -ITERATION 2 node 36 job 1 rank 26 time 12330189.079122 -ITERATION 2 node 27 job 1 rank 30 time 12331017.462222 -ITERATION 2 node 53 job 1 rank 34 time 12332573.069010 -ITERATION 3 node 64 job 1 rank 9 time 16846408.366519 -ITERATION 3 node 35 job 1 rank 21 time 16846408.565540 -ITERATION 3 node 58 job 1 rank 13 time 16848977.637086 -ITERATION 3 node 66 job 1 rank 8 time 16849705.670921 -ITERATION 3 node 56 job 1 rank 25 time 16849705.673849 -ITERATION 3 node 59 job 1 rank 15 time 16849705.765384 -ITERATION 3 node 43 job 1 rank 17 time 16849705.775639 -ITERATION 3 node 42 job 1 rank 1 time 16849934.209991 -ITERATION 3 node 40 job 1 rank 7 time 16849935.003134 -ITERATION 3 node 22 job 1 rank 11 time 16850534.153202 -ITERATION 3 node 16 job 1 rank 23 time 16850535.146788 -ITERATION 3 node 41 job 1 rank 6 time 16851491.059456 -ITERATION 3 node 34 job 1 rank 31 time 16853003.072714 -ITERATION 3 node 51 job 1 rank 29 time 16853102.972576 -ITERATION 3 node 0 job 1 rank 0 time 16853231.517421 -ITERATION 3 node 33 job 1 rank 12 time 16853284.355810 -ITERATION 3 node 47 job 1 rank 16 time 16853830.997593 -ITERATION 3 node 63 job 1 rank 24 time 16853831.006412 -ITERATION 3 node 29 job 1 rank 33 time 16853831.111130 -ITERATION 3 node 19 job 1 rank 28 time 16853831.239840 -ITERATION 3 node 20 job 1 rank 5 time 16854059.856564 -ITERATION 3 node 4 job 1 rank 10 time 16854659.040052 -ITERATION 3 node 44 job 1 rank 22 time 16856400.617505 -ITERATION 3 node 50 job 1 rank 19 time 16856401.051372 -ITERATION 3 node 57 job 1 rank 4 time 16857357.163993 -ITERATION 3 node 37 job 1 rank 14 time 16857724.883755 -ITERATION 3 node 54 job 1 rank 18 time 16857956.265580 -ITERATION 3 node 36 job 1 rank 26 time 16857956.347382 -ITERATION 3 node 17 job 1 rank 32 time 16859697.584190 -ITERATION 3 node 53 job 1 rank 34 time 16860340.349963 -ITERATION 3 node 60 job 1 rank 35 time 16860526.386863 -ITERATION 3 node 30 job 1 rank 27 time 16862439.401431 -ITERATION 3 node 27 job 1 rank 30 time 16862631.359863 -ITERATION 3 node 10 job 1 rank 3 time 16874057.983505 -ITERATION 3 node 31 job 1 rank 2 time 16877355.290935 -ITERATION 3 node 14 job 1 rank 20 time 16894510.197562 -ITERATION 2 node 5 job 0 rank 23 time 17110727.735133 -ITERATION 2 node 32 job 0 rank 22 time 17114852.209292 -ITERATION 2 node 62 job 0 rank 31 time 17114852.599159 -ITERATION 2 node 9 job 0 rank 27 time 17114852.774364 -ITERATION 2 node 68 job 0 rank 21 time 17114853.074428 -ITERATION 2 node 49 job 0 rank 20 time 17118149.308069 -ITERATION 2 node 45 job 0 rank 15 time 17118149.906489 -ITERATION 2 node 46 job 0 rank 25 time 17118150.389777 -ITERATION 2 node 25 job 0 rank 35 time 17118150.407391 -ITERATION 2 node 70 job 0 rank 7 time 17118379.051555 -ITERATION 2 node 15 job 0 rank 30 time 17118977.516634 -ITERATION 2 node 23 job 0 rank 26 time 17118977.597908 -ITERATION 2 node 67 job 0 rank 11 time 17118978.109855 -ITERATION 2 node 26 job 0 rank 29 time 17118978.321161 -ITERATION 2 node 8 job 0 rank 14 time 17121361.601018 -ITERATION 2 node 61 job 0 rank 24 time 17121446.508622 -ITERATION 2 node 11 job 0 rank 9 time 17121447.697106 -ITERATION 2 node 28 job 0 rank 33 time 17121448.334170 -ITERATION 2 node 7 job 0 rank 28 time 17122274.470849 -ITERATION 2 node 13 job 0 rank 34 time 17122274.835607 -ITERATION 2 node 1 job 0 rank 19 time 17122275.742882 -ITERATION 2 node 18 job 0 rank 5 time 17122503.338803 -ITERATION 2 node 2 job 0 rank 3 time 17122504.165793 -ITERATION 2 node 3 job 0 rank 6 time 17122504.387146 -ITERATION 2 node 69 job 0 rank 10 time 17123102.933399 -ITERATION 2 node 21 job 0 rank 13 time 17123103.656652 -ITERATION 2 node 39 job 0 rank 8 time 17124743.815952 -ITERATION 2 node 52 job 0 rank 32 time 17125571.717580 -ITERATION 2 node 55 job 0 rank 17 time 17125573.669661 -ITERATION 2 node 24 job 0 rank 1 time 17125800.744680 -ITERATION 2 node 38 job 0 rank 2 time 17125801.473223 -ITERATION 2 node 48 job 0 rank 12 time 17126399.806339 -ITERATION 2 node 6 job 0 rank 18 time 17126400.171098 -ITERATION 2 node 65 job 0 rank 4 time 17126628.674394 -ITERATION 2 node 71 job 0 rank 16 time 17128869.024909 -ITERATION 2 node 12 job 0 rank 0 time 17129926.080271 -ITERATION 4 node 4 job 1 rank 10 time 21068851.547471 -ITERATION 4 node 44 job 1 rank 22 time 21071037.824821 -ITERATION 4 node 54 job 1 rank 18 time 21073270.434306 -ITERATION 4 node 37 job 1 rank 14 time 21097612.783438 -ITERATION 4 node 27 job 1 rank 30 time 21102424.565486 -ITERATION 4 node 20 job 1 rank 5 time 21108249.546302 -ITERATION 4 node 33 job 1 rank 12 time 21110027.342829 -ITERATION 4 node 42 job 1 rank 1 time 21110256.073660 -ITERATION 4 node 57 job 1 rank 4 time 21111546.853731 -ITERATION 4 node 66 job 1 rank 8 time 21113501.882887 -ITERATION 4 node 14 job 1 rank 20 time 21113973.869283 -ITERATION 4 node 19 job 1 rank 28 time 21114152.678320 -ITERATION 4 node 58 job 1 rank 13 time 21115387.315874 -ITERATION 4 node 35 job 1 rank 21 time 21115387.760180 -ITERATION 4 node 10 job 1 rank 3 time 21115616.245472 -ITERATION 4 node 47 job 1 rank 16 time 21118843.796707 -ITERATION 4 node 63 job 1 rank 24 time 21119441.870921 -ITERATION 4 node 0 job 1 rank 0 time 21119597.570694 -ITERATION 4 node 17 job 1 rank 32 time 21127435.068013 -ITERATION 4 node 51 job 1 rank 29 time 21129060.332514 -ITERATION 4 node 36 job 1 rank 26 time 21143479.925267 -ITERATION 4 node 53 job 1 rank 34 time 21154862.434499 -ITERATION 4 node 31 job 1 rank 2 time 21156014.514993 -ITERATION 4 node 59 job 1 rank 15 time 21161740.594529 -ITERATION 4 node 40 job 1 rank 7 time 21161969.568395 -ITERATION 4 node 41 job 1 rank 6 time 21163525.624717 -ITERATION 4 node 16 job 1 rank 23 time 21173654.388369 -ITERATION 4 node 64 job 1 rank 9 time 21185533.014478 -ITERATION 4 node 43 job 1 rank 17 time 21189278.967485 -ITERATION 4 node 56 job 1 rank 25 time 21189811.925489 -ITERATION 4 node 34 job 1 rank 31 time 21192311.784227 -ITERATION 4 node 29 job 1 rank 33 time 21238412.751629 -ITERATION 4 node 22 job 1 rank 11 time 21253653.349842 -ITERATION 4 node 30 job 1 rank 27 time 21256950.657171 -ITERATION 4 node 50 job 1 rank 19 time 21257778.307985 -ITERATION 4 node 60 job 1 rank 35 time 21356838.612638 -ITERATION 3 node 49 job 0 rank 20 time 22283924.781893 -ITERATION 3 node 32 job 0 rank 22 time 22287221.893064 -ITERATION 3 node 61 job 0 rank 24 time 22287222.081563 -ITERATION 3 node 68 job 0 rank 21 time 22287222.429961 -ITERATION 3 node 7 job 0 rank 28 time 22288050.215038 -ITERATION 3 node 39 job 0 rank 8 time 22290519.388893 -ITERATION 3 node 15 job 0 rank 30 time 22291346.949060 -ITERATION 3 node 23 job 0 rank 26 time 22291347.443031 -ITERATION 3 node 26 job 0 rank 29 time 22291347.569684 -ITERATION 3 node 52 job 0 rank 32 time 22291347.632683 -ITERATION 3 node 24 job 0 rank 1 time 22291576.365970 -ITERATION 3 node 48 job 0 rank 12 time 22292175.550529 -ITERATION 3 node 5 job 0 rank 23 time 22292260.691330 -ITERATION 3 node 46 job 0 rank 25 time 22292261.166656 -ITERATION 3 node 8 job 0 rank 14 time 22293731.033444 -ITERATION 3 node 71 job 0 rank 16 time 22294644.940013 -ITERATION 3 node 13 job 0 rank 34 time 22294645.243202 -ITERATION 3 node 18 job 0 rank 5 time 22294873.423611 -ITERATION 3 node 2 job 0 rank 3 time 22294873.612687 -ITERATION 3 node 69 job 0 rank 10 time 22295472.778521 -ITERATION 3 node 21 job 0 rank 13 time 22295472.905174 -ITERATION 3 node 28 job 0 rank 33 time 22295558.271868 -ITERATION 3 node 11 job 0 rank 9 time 22295558.473985 -ITERATION 3 node 12 job 0 rank 0 time 22295701.701561 -ITERATION 3 node 9 job 0 rank 27 time 22296386.567242 -ITERATION 3 node 62 job 0 rank 31 time 22296386.592726 -ITERATION 3 node 38 job 0 rank 2 time 22298170.920117 -ITERATION 3 node 6 job 0 rank 18 time 22298770.578693 -ITERATION 3 node 65 job 0 rank 4 time 22298998.759202 -ITERATION 3 node 55 job 0 rank 17 time 22299683.607359 -ITERATION 3 node 45 job 0 rank 15 time 22299683.900055 -ITERATION 3 node 25 job 0 rank 35 time 22299683.915598 -ITERATION 3 node 70 job 0 rank 7 time 22299911.908515 -ITERATION 3 node 67 job 0 rank 11 time 22300511.902733 -ITERATION 3 node 1 job 0 rank 19 time 22303809.251089 -ITERATION 3 node 3 job 0 rank 6 time 22304037.244106 -ITERATION 4 node 46 job 0 rank 25 time 27793249.825077 -ITERATION 4 node 68 job 0 rank 21 time 27795614.122037 -ITERATION 4 node 11 job 0 rank 9 time 27795614.960123 -ITERATION 4 node 61 job 0 rank 24 time 27796442.932740 -ITERATION 4 node 9 job 0 rank 27 time 27796462.525210 -ITERATION 4 node 28 job 0 rank 33 time 27798203.258478 -ITERATION 4 node 49 job 0 rank 20 time 27799739.567011 -ITERATION 4 node 26 job 0 rank 29 time 27799739.840442 -ITERATION 4 node 39 job 0 rank 8 time 27799740.240069 -ITERATION 4 node 55 job 0 rank 17 time 27799740.572089 -ITERATION 4 node 67 job 0 rank 11 time 27800568.269665 -ITERATION 4 node 5 job 0 rank 23 time 27800568.290579 -ITERATION 4 node 52 job 0 rank 32 time 27800568.291383 -ITERATION 4 node 23 job 0 rank 26 time 27800568.764657 -ITERATION 4 node 25 job 0 rank 35 time 27800587.792281 -ITERATION 4 node 2 job 0 rank 3 time 27803265.147279 -ITERATION 4 node 21 job 0 rank 13 time 27803865.175933 -ITERATION 4 node 7 job 0 rank 28 time 27803865.393650 -ITERATION 4 node 71 job 0 rank 16 time 27803865.598713 -ITERATION 4 node 13 job 0 rank 34 time 27803865.779107 -ITERATION 4 node 1 job 0 rank 19 time 27803865.949695 -ITERATION 4 node 62 job 0 rank 31 time 27804693.839746 -ITERATION 4 node 69 job 0 rank 10 time 27804694.100148 -ITERATION 4 node 32 job 0 rank 22 time 27804694.147620 -ITERATION 4 node 38 job 0 rank 2 time 27806562.454709 -ITERATION 4 node 24 job 0 rank 1 time 27807391.000446 -ITERATION 4 node 48 job 0 rank 12 time 27807990.729141 -ITERATION 4 node 6 job 0 rank 18 time 27807991.114598 -ITERATION 4 node 45 job 0 rank 15 time 27807991.147076 -ITERATION 4 node 70 job 0 rank 7 time 27808219.847694 -ITERATION 4 node 15 job 0 rank 30 time 27808819.476032 -ITERATION 4 node 8 job 0 rank 14 time 27811203.560416 -ITERATION 4 node 12 job 0 rank 0 time 27811516.336037 -ITERATION 4 node 3 job 0 rank 6 time 27812345.183284 -ITERATION 4 node 18 job 0 rank 5 time 27812345.583060 -ITERATION 4 node 65 job 0 rank 4 time 27816470.918651 diff --git a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/avg-all-reduce-time b/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/avg-all-reduce-time deleted file mode 100644 index e69de29b..00000000 diff --git a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-cn-stats b/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-cn-stats deleted file mode 100644 index b06aa026..00000000 --- a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-cn-stats +++ /dev/null @@ -1,73 +0,0 @@ -# Format <# Packets finished> -2 0 39813200 39813200 36107.138499 461417.952062 3297.307430 9770 4.376561 17587784.331581 -3 1 30000200 30000200 39284.494061 434701.820268 2379.827009 7387 3.682144 18356179.307331 -7 2 30000240 30000240 62318.782725 640866.354244 2379.827009 7392 4.470373 19113473.070391 -8 3 30000040 30000040 40925.980015 515505.949898 3291.464725 7367 3.945975 17940780.437224 -12 4 39813520 39813520 34994.103721 469254.313408 3297.307430 9810 4.381040 16536392.029545 -13 5 30000200 30000200 34635.404728 544989.362677 2379.827009 7387 4.055909 14938820.108085 -17 6 30000200 30000200 44767.801473 576653.794469 2384.084484 7387 3.656288 18814126.770707 -18 7 30000200 30000200 51387.382471 546505.844848 3291.464725 7387 4.206444 22205724.648773 -22 8 30000200 30000200 56515.671426 456563.833012 2379.827009 7387 4.338568 15243839.322545 -23 9 30000200 30000200 62747.573596 732304.908878 2464.855722 7387 4.501963 22518624.337887 -27 10 39813600 39813600 38206.513802 595234.314223 3297.307430 9820 4.071996 15912055.432753 -28 11 30000200 30000200 57688.459239 460987.598043 3291.464725 7387 4.624882 21982147.565134 -32 12 30000040 30000040 48120.746995 460788.732751 1553.218006 7367 4.426904 21978848.089413 -33 13 30000200 30000200 48728.737706 528097.477557 2379.827009 7387 4.224042 17300716.492549 -37 14 39813520 39813520 42482.617734 504077.082789 3297.307430 9810 4.500408 16254727.926633 -38 15 30000200 30000200 55858.592637 544988.314605 2379.827009 7387 3.945986 10437047.462660 -42 16 39813520 39813520 42849.647075 408832.282689 2469.279268 9810 4.165036 15447231.594722 -43 17 39813520 39813520 38338.085471 341206.976962 2384.084484 9810 4.015087 16773305.735538 -47 18 30000240 30000240 42373.430620 629220.891776 3291.464725 7392 4.535173 15995328.015708 -48 19 39813520 39813520 40423.608697 315897.493880 2384.084484 9810 4.176860 14328352.862862 -52 20 39813600 39813600 43520.623476 541086.894327 3297.307430 9820 4.555601 13780387.971014 -53 21 30000200 30000200 44476.828958 349439.122320 2379.827009 7387 4.475430 18712483.166522 -57 22 39813520 39813520 46535.756921 308890.720074 3297.307430 9810 4.331906 12926592.020511 -58 23 30000200 30000200 33511.399102 322937.974241 2469.279268 7387 3.786517 18523348.301746 -62 24 30000240 30000240 36774.377417 461518.474685 1553.218006 7392 4.146239 19158269.234675 -63 25 30000200 30000200 41059.457855 278051.087009 2379.827009 7387 4.351157 19270573.497551 -67 26 30000200 30000200 40614.046110 381768.605366 2379.827009 7387 4.125220 19112482.493963 -68 27 39813520 39813520 41995.882873 371667.257947 2469.279268 9810 4.376962 14646355.081486 -72 28 30000200 30000200 31321.605985 365266.392194 2379.827009 7387 4.039123 19400082.478809 -73 29 39813520 39813520 32959.171255 451587.798712 3297.307430 9810 4.179613 16095202.695306 -77 30 39813520 39813520 37427.649193 351365.395737 2469.279268 9810 4.407339 15586679.540493 -78 31 39813200 39813200 41862.918330 387855.048741 3297.307430 9770 4.525077 16586112.004836 -82 32 30000200 30000200 45060.956994 427249.367222 3291.464725 7387 4.514146 19189374.516511 -83 33 39813520 39813520 49395.064664 469382.320131 2384.084484 9810 4.012232 17850768.194652 -87 34 39813520 39813520 44079.438999 342285.995523 2469.279268 9810 4.520387 15144572.054951 -88 35 39813520 39813520 38971.920099 300516.633956 3297.307430 9810 4.351682 16091038.039457 -92 36 39813520 39813520 39426.347282 309758.764907 2643.131510 9810 4.074822 15678132.783902 -93 37 39813520 39813520 34995.061880 346958.053999 2384.084484 9810 3.428746 16876971.125963 -97 38 30000040 30000040 35905.285204 495198.570009 2379.827009 7367 3.970273 22512609.412966 -98 39 30000200 30000200 42447.183602 377731.969067 2469.279268 7387 4.671179 22714730.942416 -102 40 39813600 39813600 34687.355890 335707.416147 1556.056322 9820 3.577800 14259163.848822 -103 41 39813200 39813200 38175.362430 382249.538761 1556.056322 9770 3.537769 13573738.064970 -107 42 39813600 39813600 44532.014760 369370.892239 3297.307430 9820 4.442770 15313358.299072 -108 43 39813520 39813520 38517.783131 536152.927723 2384.084484 9810 3.930887 16854266.067963 -112 44 39813520 39813520 48072.153707 551643.077958 3297.307430 9810 4.406932 13784797.518310 -113 45 30000200 30000200 38361.576186 337612.463862 3291.464725 7387 4.412481 22709598.612240 -117 46 30000200 30000200 47063.934511 323522.751165 3291.464725 7387 4.315690 15178598.555845 -118 47 39813520 39813520 32468.101294 470956.241085 2384.084484 9810 3.637717 16103495.317657 -122 48 30000200 30000200 34763.145246 355505.175562 2379.827009 7387 3.586165 14099343.817718 -123 49 30000200 30000200 32178.416645 415926.699100 1553.218006 7387 3.282523 13642975.324074 -127 50 39813520 39813520 36042.175157 333399.692314 2384.084484 9810 3.894903 15924037.373174 -128 51 39813520 39813520 47426.976684 384894.236551 3297.307430 9810 4.524771 14800606.559240 -132 52 30000200 30000200 50648.202686 683306.228547 3291.464725 7387 4.051712 18051629.450587 -133 53 39813520 39813520 43688.807523 397577.483030 2384.084484 9810 4.367992 15294739.661280 -137 54 39813520 39813520 41274.721778 410620.945162 2384.084484 9810 3.614985 15348607.758912 -138 55 30000200 30000200 47460.273307 393172.959339 3291.464725 7387 4.455665 22327681.850808 -142 56 39813520 39813520 30353.003214 423224.921981 2384.084484 9810 3.627217 14875339.838529 -143 57 39813200 39813200 41978.905805 362220.406331 3297.307430 9770 4.289662 13439725.026830 -147 58 39813520 39813520 32600.221245 394656.517832 1556.056322 9810 3.366769 17388431.077442 -148 59 39813520 39813520 34844.964516 437962.421017 1556.056322 9810 3.642712 15275599.714230 -152 60 39813520 39813520 38828.762813 380963.287056 2469.279268 9810 4.269521 14057550.559304 -153 61 30000200 30000200 38425.563290 437052.170191 3291.464725 7387 4.298227 18690662.547655 -157 62 30000200 30000200 37163.192817 486901.376592 3291.464725 7387 4.302423 22529024.421436 -158 63 39813520 39813520 32177.129431 313859.556601 2384.084484 9810 4.102345 16088278.649605 -162 64 39813520 39813520 36307.283720 473982.706414 2384.084484 9810 3.756575 13805423.148064 -163 65 30000040 30000040 39266.258111 541660.942240 3291.464725 7367 3.542012 22415755.472424 -167 66 39813520 39813520 35784.342668 486343.405857 2384.084484 9810 3.758104 17791245.347997 -168 67 30000200 30000200 35113.840119 558234.396436 2379.827009 7387 3.712739 15175940.132678 -172 68 30000200 30000200 46259.577398 529697.867518 3291.464725 7387 4.381752 11926841.048741 -173 69 30000200 30000200 41583.798735 474531.461375 2384.084484 7387 4.093001 18609998.026356 -177 70 30000240 30000240 30915.717460 551227.348696 2379.827009 7392 3.543561 18582548.588579 -178 71 30000200 30000200 36220.251632 387764.380366 2464.855722 7387 3.623392 14445475.496982 diff --git a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-link-stats b/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-link-stats deleted file mode 100644 index 7864d91b..00000000 --- a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/dragonfly-link-stats +++ /dev/null @@ -1,326 +0,0 @@ -# Format < dest_type> - -0 T 0 R CN 40017920 17587784.331581 8837 -1 T 0 R CN 30257152 18356179.307331 6359 -0 R 1 R L 43550704 4874052.951440 4664 -0 R 2 R L 45137320 4738075.959451 5558 -0 R 3 R L 47388976 5040625.986632 6123 -0 R 7 R G 47388976 5040625.986632 6123 -0 R 11 R G 41983848 4420584.424223 4472 -0 R 0 T CN 39813200 1311579.280498 3696 -0 R 1 T CN 30049352 627821.545675 1868 -2 T 1 R CN 30277632 19113473.070391 6755 -3 T 1 R CN 30175232 17940780.437224 6736 -1 R 0 R L 45287672 4986264.334545 5638 -1 R 2 R L 41987648 4053798.568995 5472 -1 R 3 R L 44837432 4310280.411427 4833 -1 R 14 R G 44837432 4310280.411427 4833 -1 R 18 R G 40681472 5840547.653511 5126 -1 R 2 T CN 30049392 1001716.940524 3112 -1 R 3 T CN 30049192 766538.802992 2606 -4 T 2 R CN 40181760 16536392.029545 8204 -5 T 2 R CN 30257152 14938820.108085 6439 -2 R 0 R L 45136984 3987872.001758 3633 -2 R 1 R L 33825320 5132507.548055 3802 -2 R 3 R L 48316064 5229299.015873 6004 -2 R 21 R G 48316064 5229299.015873 6004 -2 R 25 R G 38686264 4817680.782145 4226 -2 R 4 T CN 39813520 1501521.379594 4519 -2 R 5 T CN 30049352 556600.125602 1835 -6 T 3 R CN 30257152 18814126.770707 6964 -7 T 3 R CN 30257152 22205724.648773 6984 -3 R 0 R L 45473184 4549998.796036 5237 -3 R 1 R L 38859592 4033811.151346 4587 -3 R 2 R L 36569560 4293046.016019 3998 -3 R 28 R G 47455304 5109010.661862 5571 -3 R 32 R G 48449992 6234288.585361 7036 -3 R 6 T CN 30049352 821409.664124 2409 -3 R 7 T CN 30049352 969930.916156 3276 -8 T 4 R CN 30257152 15243839.322545 6972 -9 T 4 R CN 30257152 22518624.337887 6513 -4 R 5 R L 36860984 4721092.963003 4346 -4 R 6 R L 32135288 3583080.888814 3568 -4 R 7 R L 58009616 6102408.455236 8020 -4 R 11 R G 58009616 6102408.455236 8020 -4 R 15 R G 52375248 5544874.795086 7249 -4 R 8 T CN 30049352 1128444.125886 3975 -4 R 9 T CN 30049352 932709.088273 3051 -10 T 5 R CN 40222720 15912055.432753 8427 -11 T 5 R CN 30257152 21982147.565134 6710 -5 R 4 R L 53993696 6136982.915861 7661 -5 R 6 R L 42998872 5104126.663520 4877 -5 R 7 R L 46448488 5646843.629053 6270 -5 R 18 R G 46448488 5646843.629053 6270 -5 R 22 R G 51448536 5797029.042031 6137 -5 R 10 T CN 39813600 1349237.793212 4270 -5 R 11 T CN 30049352 1248376.533293 3652 -12 T 6 R CN 30175232 21978848.089413 7147 -13 T 6 R CN 30257152 17300716.492549 6711 -6 R 4 R L 40487440 5812805.144307 5433 -6 R 5 R L 36769744 5264234.631795 4172 -6 R 7 R L 36385608 4752852.854935 3298 -6 R 25 R G 36385608 4752852.854935 3298 -6 R 29 R G 40053384 5005853.637480 4483 -6 R 12 T CN 30049192 903354.038109 3055 -6 R 13 T CN 30049352 712938.420808 2057 -14 T 7 R CN 40181760 16254727.926633 8254 -15 T 7 R CN 30257152 10437047.462660 6653 -7 R 4 R L 34675992 4691030.184385 3995 -7 R 5 R L 43513016 4301021.299787 4870 -7 R 6 R L 35973648 3934225.668027 3041 -7 R 0 R G 42749416 4505780.268095 5345 -7 R 32 R G 38144576 4584423.547847 5166 -7 R 14 T CN 39813520 1387873.491301 4062 -7 R 15 T CN 30049352 1037738.118919 3344 -16 T 8 R CN 40181760 15447231.594722 8402 -17 T 8 R CN 40181760 16773305.735538 9077 -8 R 9 R L 37801352 5059668.991064 4600 -8 R 10 R L 37946560 6152744.148682 4069 -8 R 11 R L 55297048 5864020.845953 7606 -8 R 15 R G 55297048 5864020.845953 7606 -8 R 19 R G 45039696 4606939.448035 4488 -8 R 16 T CN 39813520 1003951.885126 3174 -8 R 17 T CN 39813520 1200737.527034 3904 -18 T 9 R CN 30277632 15995328.015708 7099 -19 T 9 R CN 40181760 14328352.862862 8584 -9 R 8 R L 49385696 6808787.447933 6557 -9 R 10 R L 33253400 5479264.726710 3535 -9 R 11 R L 52364520 6489474.843136 7766 -9 R 22 R G 52364520 6489474.843136 7766 -9 R 26 R G 49267624 6524810.669739 6733 -9 R 18 T CN 30049392 972448.000381 3110 -9 R 19 T CN 39813520 963301.128882 3233 -20 T 10 R CN 40222720 13780387.971014 8866 -21 T 10 R CN 30257152 18712483.166522 6914 -10 R 8 R L 38678080 5240244.555583 4041 -10 R 9 R L 49890448 5799177.766866 6788 -10 R 11 R L 48753776 5150370.905425 6152 -10 R 29 R G 48753776 5150370.905425 6152 -10 R 33 R G 41425648 6389805.183540 5013 -10 R 20 T CN 39813600 1377361.309928 4429 -10 R 21 T CN 30049352 1175058.415210 3308 -22 T 11 R CN 40181760 12926592.020511 8104 -23 T 11 R CN 30257152 18523348.301746 6773 -11 R 8 R L 40560208 4846836.691540 3797 -11 R 9 R L 34124224 4763859.453014 3292 -11 R 10 R L 38605016 5277021.621061 4322 -11 R 0 R G 40478280 5086777.914766 4111 -11 R 4 R G 49943824 5905101.806403 6563 -11 R 22 T CN 39813520 1424222.517660 4265 -11 R 23 T CN 30049352 1028261.496375 3530 -24 T 12 R CN 30277632 19158269.234675 7090 -25 T 12 R CN 30257152 19270573.497551 7116 -12 R 13 R L 32047456 3750265.764293 2849 -12 R 14 R L 50178184 7309130.184921 7564 -12 R 15 R L 58034400 5843698.462804 8436 -12 R 19 R G 58034400 5843698.462804 8436 -12 R 23 R G 44575160 6092245.688980 5725 -12 R 24 T CN 30049392 806566.838442 2811 -12 R 25 T CN 30049352 1213202.131881 3583 -26 T 13 R CN 30257152 19112482.493963 7014 -27 T 13 R CN 40181760 14646355.081486 8988 -13 R 12 R L 44527192 6423030.861185 6482 -13 R 14 R L 46636984 5807093.011567 6138 -13 R 15 R L 46974000 6554687.676421 5454 -13 R 26 R G 46974000 6554687.676421 5454 -13 R 30 R G 52447448 4836595.765817 6980 -13 R 26 T CN 30049352 1125329.914129 3288 -13 R 27 T CN 39813520 1512104.949816 4844 -28 T 14 R CN 30257152 19400082.478809 7092 -29 T 14 R CN 40181760 16095202.695306 9170 -14 R 12 R L 34606080 5950705.752400 4000 -14 R 13 R L 43123232 5680350.634564 5580 -14 R 15 R L 38261328 4981625.686206 3350 -14 R 1 R G 45992208 4930271.366009 5573 -14 R 33 R G 38261328 4981625.686206 3350 -14 R 28 T CN 30049352 637814.555154 2221 -14 R 29 T CN 39813520 1249106.103622 3833 -30 T 15 R CN 40181760 15586679.540493 9411 -31 T 15 R CN 40017920 16586112.004836 9223 -15 R 12 R L 51077496 6550042.514575 7713 -15 R 13 R L 47073208 4832118.412440 5895 -15 R 14 R L 45116952 4837185.355505 4833 -15 R 4 R G 52746688 6709131.321456 8350 -15 R 8 R G 51692976 6953577.442845 7000 -15 R 30 T CN 39813520 1283130.365744 4192 -15 R 31 T CN 39813200 1457038.449892 4765 -32 T 16 R CN 30257152 19189374.516511 7113 -33 T 16 R CN 40181760 17850768.194652 7596 -16 R 17 R L 54025952 5905546.258479 7114 -16 R 18 R L 34160632 4064251.783900 3198 -16 R 19 R L 45741616 6374696.422823 6186 -16 R 23 R G 45741616 6374696.422823 6186 -16 R 27 R G 41321480 5702481.030882 5627 -16 R 32 T CN 30049352 1532449.420789 4360 -16 R 33 T CN 39813520 1199169.584479 3474 -34 T 17 R CN 40181760 15144572.054951 9081 -35 T 17 R CN 40181760 16091038.039457 9000 -17 R 16 R L 46232920 5971728.548924 6388 -17 R 18 R L 44728608 6438892.891589 6745 -17 R 19 R L 51983616 5427979.947518 6365 -17 R 30 R G 51983616 5427979.947518 6365 -17 R 34 R G 44317208 5188154.376478 5087 -17 R 34 T CN 39813520 1485890.868749 4665 -17 R 35 T CN 39813520 1130833.183301 3732 -36 T 18 R CN 40181760 15678132.783902 8490 -37 T 18 R CN 40181760 16876971.125963 8122 -18 R 16 R L 46143600 5514466.847122 6984 -18 R 17 R L 42394152 3865669.619331 4840 -18 R 19 R L 45794064 5340218.948968 5313 -18 R 1 R G 45794064 5340218.948968 5313 -18 R 5 R G 49523608 5463627.902051 6131 -18 R 36 T CN 39813520 852504.013955 2683 -18 R 37 T CN 39813520 805790.300623 2486 -38 T 19 R CN 30175232 22512609.412966 6847 -39 T 19 R CN 30257152 22714730.942416 7121 -19 R 16 R L 34957960 5526707.603444 4004 -19 R 17 R L 47349408 6034758.983252 6333 -19 R 18 R L 46919416 5608445.885587 6075 -19 R 8 R G 44365616 5296334.999713 4604 -19 R 12 R G 56495544 6082973.442639 8366 -19 R 38 T CN 30049192 1092387.377044 3270 -19 R 39 T CN 30049352 1068280.894725 3140 -40 T 20 R CN 40222720 14259163.848822 8729 -41 T 20 R CN 40017920 13573738.064970 8591 -20 R 21 R L 39728944 4697512.855911 5353 -20 R 22 R L 41903320 5045588.986343 5348 -20 R 23 R L 52146088 7359369.800037 7983 -20 R 27 R G 52146088 7359369.800037 7983 -20 R 31 R G 47184128 5131759.262767 5973 -20 R 40 T CN 39813600 883030.699000 2892 -20 R 41 T CN 39813200 784350.806184 2677 -42 T 21 R CN 40222720 15313358.299072 9214 -43 T 21 R CN 40181760 16854266.067963 8718 -21 R 20 R L 39164632 5944643.875686 5577 -21 R 22 R L 38028176 4427084.605373 4537 -21 R 23 R L 34169112 4269694.137535 2856 -21 R 2 R G 48270464 5223801.490295 5763 -21 R 34 R G 34169112 4269694.137535 2856 -21 R 42 T CN 39813600 1364767.638329 4459 -21 R 43 T CN 39813520 857971.397901 2662 -44 T 22 R CN 40181760 13784797.518310 8666 -45 T 22 R CN 30257152 22709598.612240 6328 -22 R 20 R L 41212736 5760616.725519 4330 -22 R 21 R L 44899648 5571184.216750 5490 -22 R 23 R L 48191408 6286261.674063 6674 -22 R 5 R G 48191408 6286261.674063 6674 -22 R 9 R G 51169760 5237963.175649 7226 -22 R 44 T CN 39813520 1542534.243019 4936 -22 R 45 T CN 30049352 1265047.999267 3871 -46 T 23 R CN 30257152 15178598.555845 6169 -47 T 23 R CN 40181760 16103495.317657 8422 -23 R 20 R L 40333504 6603454.297476 5211 -23 R 21 R L 36006888 4843963.251430 3295 -23 R 22 R L 40413664 5142267.725370 4994 -23 R 12 R G 45082392 5304702.183129 5115 -23 R 16 R G 50801776 5131874.483025 7138 -23 R 46 T CN 30049352 1382693.916307 4027 -23 R 47 T CN 39813520 1038551.205006 3128 -48 T 24 R CN 30257152 14099343.817718 6106 -49 T 24 R CN 30257152 13642975.324074 6045 -24 R 25 R L 39016768 3857944.974915 4288 -24 R 26 R L 34965464 5003889.506839 4262 -24 R 27 R L 47190024 5454167.775527 6871 -24 R 31 R G 47190024 5454167.775527 6871 -24 R 35 R G 41646064 5992850.984984 5408 -24 R 48 T CN 30049352 667188.968666 2499 -24 R 49 T CN 30049352 881431.111138 2993 -50 T 25 R CN 40181760 15924037.373174 9323 -51 T 25 R CN 40181760 14800606.559240 8762 -25 R 24 R L 35274088 5828284.233935 3900 -25 R 26 R L 51652152 5817462.082088 7529 -25 R 27 R L 39214576 4326894.031401 4324 -25 R 2 R G 39214576 4326894.031401 4324 -25 R 6 R G 35298432 4524630.058634 3493 -25 R 50 T CN 39813520 966485.561719 2871 -25 R 51 T CN 39813520 1404936.336504 4156 -52 T 26 R CN 30257152 18051629.450587 6976 -53 T 26 R CN 40181760 15294739.661280 9009 -26 R 24 R L 38439736 6450489.640662 5018 -26 R 25 R L 43021968 4912180.431768 4539 -26 R 27 R L 49255576 5506754.761772 6905 -26 R 9 R G 49255576 5506754.761772 6905 -26 R 13 R G 46517152 4931759.758126 5515 -26 R 52 T CN 30049352 1089113.603708 3137 -26 R 53 T CN 39813520 1068108.929478 3544 -54 T 27 R CN 40181760 15348607.758912 8357 -55 T 27 R CN 30257152 22327681.850808 6890 -27 R 24 R L 31797088 5550718.157644 3272 -27 R 25 R L 52961704 5033719.493260 5860 -27 R 26 R L 40313048 4772700.110949 4746 -27 R 16 R G 46071704 4936679.422089 6250 -27 R 20 R G 51076808 5847799.836050 6668 -27 R 54 T CN 39813520 1055478.134521 3643 -27 R 55 T CN 30049352 1389171.083011 3761 -56 T 28 R CN 40181760 14875339.838529 8758 -57 T 28 R CN 40017920 13439725.026830 8197 -28 R 29 R L 36407792 4562925.391117 4531 -28 R 30 R L 54929192 5839356.894003 8374 -28 R 31 R L 47104064 5457596.779440 5298 -28 R 3 R G 44037240 3666217.594051 4157 -28 R 35 R G 47104064 5457596.779440 5298 -28 R 56 T CN 39813520 589240.204071 1810 -28 R 57 T CN 39813200 1253920.506312 4022 -58 T 29 R CN 40181760 17388431.077442 8594 -59 T 29 R CN 40181760 15275599.714230 8708 -29 R 28 R L 36743304 4747217.925107 3404 -29 R 30 R L 39226976 4588611.200791 4500 -29 R 31 R L 43193824 5690009.346772 4949 -29 R 6 R G 43193824 5690009.346772 4949 -29 R 10 R G 46395064 5499095.428036 5989 -29 R 58 T CN 39813520 813051.646961 2322 -29 R 59 T CN 39813520 916003.412645 3024 -60 T 30 R CN 40181760 14057550.559304 8373 -61 T 30 R CN 30257152 18690662.547655 6874 -30 R 28 R L 37206752 5500248.346157 3199 -30 R 29 R L 46057680 5746289.642082 5828 -30 R 31 R L 51234608 5090602.876008 6564 -30 R 13 R G 51234608 5090602.876008 6564 -30 R 17 R G 50215328 5459724.442375 7315 -30 R 60 T CN 39813520 1135713.228615 3800 -30 R 61 T CN 30049352 1610036.399930 4445 -62 T 31 R CN 30257152 22529024.421436 6870 -63 T 31 R CN 40181760 16088278.649605 8550 -31 R 28 R L 54027816 4799689.111254 6056 -31 R 29 R L 38985584 5655226.937628 4649 -31 R 30 R L 47371424 4792407.927307 5839 -31 R 20 R G 46661320 5892242.729819 5976 -31 R 24 R G 49969336 6454067.586342 6779 -31 R 62 T CN 30049352 1152784.313173 3566 -31 R 63 T CN 39813520 803243.110496 2380 -64 T 32 R CN 40181760 13805423.148064 6702 -65 T 32 R CN 30175232 22415755.472424 6932 -32 R 33 R L 39495072 4060223.754083 4027 -32 R 34 R L 38318680 3832552.194477 3756 -32 R 35 R L 51263104 5519808.640647 7544 -32 R 3 R G 51263104 5519808.640647 7544 -32 R 7 R G 39328112 5219095.840084 5052 -32 R 64 T CN 39813520 1210251.667998 3458 -32 R 65 T CN 30049192 630422.140586 2292 -66 T 33 R CN 40181760 17791245.347997 6309 -67 T 33 R CN 30257152 15175940.132678 6274 -33 R 32 R L 55929688 5194979.684028 5947 -33 R 34 R L 32785920 4097967.225898 2747 -33 R 35 R L 39697960 5994401.587308 5430 -33 R 10 R G 39697960 5994401.587308 5430 -33 R 14 R G 38884784 4379182.216837 4004 -33 R 66 T CN 39813520 1107798.113758 3239 -33 R 67 T CN 30049352 811674.087607 2529 -68 T 34 R CN 30257152 11926841.048741 6879 -69 T 34 R CN 30257152 18609998.026356 6837 -34 R 32 R L 39777456 5046916.308178 5315 -34 R 33 R L 34270784 5365204.443651 4172 -34 R 35 R L 42249888 4560827.376285 5055 -34 R 17 R G 42249888 4560827.376285 5055 -34 R 21 R G 36831104 4824175.442723 3634 -34 R 68 T CN 30049352 1078687.231229 3249 -34 R 69 T CN 30049352 739703.645466 2337 -70 T 35 R CN 30277632 18582548.588579 6780 -71 T 35 R CN 30257152 14445475.496982 6863 -35 R 32 R L 39508896 5032306.409923 6202 -35 R 33 R L 34469632 5572253.621625 4066 -35 R 34 R L 33126928 4824327.757457 3462 -35 R 24 R G 41519936 4246448.961771 4535 -35 R 28 R G 43743104 4729620.821349 5151 -35 R 70 T CN 30049392 447139.490875 1640 -35 R 71 T CN 30049352 1020380.529942 3401 \ No newline at end of file diff --git a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-all b/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-all deleted file mode 100644 index b2df970f..00000000 --- a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-all +++ /dev/null @@ -1,72 +0,0 @@ -lp:2 send_count:9770 send_bytes:39813200 send_time:7583466.666666 recv_count:9770 recv_bytes:39813200 recv_time:352766743.133932 max_event_size:792 -lp:3 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:290194557.627888 max_event_size:792 -lp:7 send_count:7392 send_bytes:30000240 send_time:5714331.428571 recv_count:7392 recv_bytes:30000240 recv_time:460660441.899865 max_event_size:792 -lp:8 send_count:7367 send_bytes:30000040 send_time:5714293.333333 recv_count:7367 recv_bytes:30000040 recv_time:301501694.772019 max_event_size:792 -lp:12 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:343292157.501219 max_event_size:792 -lp:13 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:255851734.723889 max_event_size:792 -lp:17 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:330699749.483186 max_event_size:792 -lp:18 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:379598594.313395 max_event_size:792 -lp:22 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:417481264.821944 max_event_size:792 -lp:23 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:463516326.154559 max_event_size:792 -lp:27 send_count:9820 send_bytes:39813600 send_time:7583542.857142 recv_count:9820 recv_bytes:39813600 recv_time:375187965.536660 max_event_size:792 -lp:28 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:426144648.397676 max_event_size:792 -lp:32 send_count:7367 send_bytes:30000040 send_time:5714293.333333 recv_count:7367 recv_bytes:30000040 recv_time:354505543.112376 max_event_size:792 -lp:33 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:359959185.432986 max_event_size:792 -lp:37 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:416754479.974724 max_event_size:792 -lp:38 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:412627423.811912 max_event_size:792 -lp:42 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:420355037.806001 max_event_size:792 -lp:43 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:376096618.471747 max_event_size:792 -lp:47 send_count:7392 send_bytes:30000240 send_time:5714331.428571 recv_count:7392 recv_bytes:30000240 recv_time:313224399.141498 max_event_size:792 -lp:48 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:396555601.315165 max_event_size:792 -lp:52 send_count:9820 send_bytes:39813600 send_time:7583542.857142 recv_count:9820 recv_bytes:39813600 recv_time:427372522.534280 max_event_size:792 -lp:53 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:328550335.514686 max_event_size:792 -lp:57 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:456515775.394763 max_event_size:792 -lp:58 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:247548705.168367 max_event_size:792 -lp:62 send_count:7392 send_bytes:30000240 send_time:5714331.428571 recv_count:7392 recv_bytes:30000240 recv_time:271836197.869304 max_event_size:792 -lp:63 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:303306215.174348 max_event_size:792 -lp:67 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:300015958.614822 max_event_size:792 -lp:68 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:411979610.984809 max_event_size:792 -lp:72 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:231372703.409270 max_event_size:792 -lp:73 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:323329470.014433 max_event_size:792 -lp:77 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:367165238.584887 max_event_size:792 -lp:78 send_count:9770 send_bytes:39813200 send_time:7583466.666666 recv_count:9770 recv_bytes:39813200 recv_time:409000712.088329 max_event_size:792 -lp:82 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:332865289.311507 max_event_size:792 -lp:83 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:484565584.355521 max_event_size:792 -lp:87 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:432419296.580878 max_event_size:792 -lp:88 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:382314536.175355 max_event_size:792 -lp:92 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:386772466.832815 max_event_size:792 -lp:93 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:343301557.045121 max_event_size:792 -lp:97 send_count:7367 send_bytes:30000040 send_time:5714293.333333 recv_count:7367 recv_bytes:30000040 recv_time:264514236.096660 max_event_size:792 -lp:98 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:313557345.269660 max_event_size:792 -lp:102 send_count:9820 send_bytes:39813600 send_time:7583542.857142 recv_count:9820 recv_bytes:39813600 recv_time:340629834.844444 max_event_size:792 -lp:103 send_count:9770 send_bytes:39813200 send_time:7583466.666666 recv_count:9770 recv_bytes:39813200 recv_time:372973290.936664 max_event_size:792 -lp:107 send_count:9820 send_bytes:39813600 send_time:7583542.857142 recv_count:9820 recv_bytes:39813600 recv_time:437304384.944908 max_event_size:792 -lp:108 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:377859452.518662 max_event_size:792 -lp:112 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:471587827.861975 max_event_size:792 -lp:113 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:283376963.287888 max_event_size:792 -lp:117 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:347661284.230018 max_event_size:792 -lp:118 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:318512073.696441 max_event_size:792 -lp:122 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:256795353.934300 max_event_size:792 -lp:123 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:237701963.756680 max_event_size:792 -lp:127 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:353573738.286471 max_event_size:792 -lp:128 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:465258641.273453 max_event_size:792 -lp:132 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:374138273.240924 max_event_size:792 -lp:133 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:428587201.804590 max_event_size:792 -lp:137 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:404905020.644931 max_event_size:792 -lp:138 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:350589038.917737 max_event_size:792 -lp:142 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:297762961.526255 max_event_size:792 -lp:143 send_count:9770 send_bytes:39813200 send_time:7583466.666666 recv_count:9770 recv_bytes:39813200 recv_time:410133909.716090 max_event_size:792 -lp:147 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:319808170.413551 max_event_size:792 -lp:148 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:341829101.902517 max_event_size:792 -lp:152 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:380910163.193983 max_event_size:792 -lp:153 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:283849636.026727 max_event_size:792 -lp:157 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:274524505.335948 max_event_size:792 -lp:158 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:315657639.722941 max_event_size:792 -lp:162 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:356174453.289787 max_event_size:792 -lp:163 send_count:7367 send_bytes:30000040 send_time:5714293.333333 recv_count:7367 recv_bytes:30000040 recv_time:289274523.506280 max_event_size:792 -lp:167 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:351044401.571382 max_event_size:792 -lp:168 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:259385936.957835 max_event_size:792 -lp:172 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:341719498.237854 max_event_size:792 -lp:173 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:307179521.253780 max_event_size:792 -lp:177 send_count:7392 send_bytes:30000240 send_time:5714331.428571 recv_count:7392 recv_bytes:30000240 recv_time:228528983.462069 max_event_size:792 -lp:178 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:267558998.807704 max_event_size:792 diff --git a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-high b/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-high deleted file mode 100644 index b2df970f..00000000 --- a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/model-net-category-high +++ /dev/null @@ -1,72 +0,0 @@ -lp:2 send_count:9770 send_bytes:39813200 send_time:7583466.666666 recv_count:9770 recv_bytes:39813200 recv_time:352766743.133932 max_event_size:792 -lp:3 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:290194557.627888 max_event_size:792 -lp:7 send_count:7392 send_bytes:30000240 send_time:5714331.428571 recv_count:7392 recv_bytes:30000240 recv_time:460660441.899865 max_event_size:792 -lp:8 send_count:7367 send_bytes:30000040 send_time:5714293.333333 recv_count:7367 recv_bytes:30000040 recv_time:301501694.772019 max_event_size:792 -lp:12 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:343292157.501219 max_event_size:792 -lp:13 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:255851734.723889 max_event_size:792 -lp:17 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:330699749.483186 max_event_size:792 -lp:18 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:379598594.313395 max_event_size:792 -lp:22 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:417481264.821944 max_event_size:792 -lp:23 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:463516326.154559 max_event_size:792 -lp:27 send_count:9820 send_bytes:39813600 send_time:7583542.857142 recv_count:9820 recv_bytes:39813600 recv_time:375187965.536660 max_event_size:792 -lp:28 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:426144648.397676 max_event_size:792 -lp:32 send_count:7367 send_bytes:30000040 send_time:5714293.333333 recv_count:7367 recv_bytes:30000040 recv_time:354505543.112376 max_event_size:792 -lp:33 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:359959185.432986 max_event_size:792 -lp:37 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:416754479.974724 max_event_size:792 -lp:38 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:412627423.811912 max_event_size:792 -lp:42 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:420355037.806001 max_event_size:792 -lp:43 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:376096618.471747 max_event_size:792 -lp:47 send_count:7392 send_bytes:30000240 send_time:5714331.428571 recv_count:7392 recv_bytes:30000240 recv_time:313224399.141498 max_event_size:792 -lp:48 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:396555601.315165 max_event_size:792 -lp:52 send_count:9820 send_bytes:39813600 send_time:7583542.857142 recv_count:9820 recv_bytes:39813600 recv_time:427372522.534280 max_event_size:792 -lp:53 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:328550335.514686 max_event_size:792 -lp:57 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:456515775.394763 max_event_size:792 -lp:58 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:247548705.168367 max_event_size:792 -lp:62 send_count:7392 send_bytes:30000240 send_time:5714331.428571 recv_count:7392 recv_bytes:30000240 recv_time:271836197.869304 max_event_size:792 -lp:63 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:303306215.174348 max_event_size:792 -lp:67 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:300015958.614822 max_event_size:792 -lp:68 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:411979610.984809 max_event_size:792 -lp:72 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:231372703.409270 max_event_size:792 -lp:73 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:323329470.014433 max_event_size:792 -lp:77 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:367165238.584887 max_event_size:792 -lp:78 send_count:9770 send_bytes:39813200 send_time:7583466.666666 recv_count:9770 recv_bytes:39813200 recv_time:409000712.088329 max_event_size:792 -lp:82 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:332865289.311507 max_event_size:792 -lp:83 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:484565584.355521 max_event_size:792 -lp:87 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:432419296.580878 max_event_size:792 -lp:88 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:382314536.175355 max_event_size:792 -lp:92 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:386772466.832815 max_event_size:792 -lp:93 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:343301557.045121 max_event_size:792 -lp:97 send_count:7367 send_bytes:30000040 send_time:5714293.333333 recv_count:7367 recv_bytes:30000040 recv_time:264514236.096660 max_event_size:792 -lp:98 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:313557345.269660 max_event_size:792 -lp:102 send_count:9820 send_bytes:39813600 send_time:7583542.857142 recv_count:9820 recv_bytes:39813600 recv_time:340629834.844444 max_event_size:792 -lp:103 send_count:9770 send_bytes:39813200 send_time:7583466.666666 recv_count:9770 recv_bytes:39813200 recv_time:372973290.936664 max_event_size:792 -lp:107 send_count:9820 send_bytes:39813600 send_time:7583542.857142 recv_count:9820 recv_bytes:39813600 recv_time:437304384.944908 max_event_size:792 -lp:108 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:377859452.518662 max_event_size:792 -lp:112 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:471587827.861975 max_event_size:792 -lp:113 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:283376963.287888 max_event_size:792 -lp:117 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:347661284.230018 max_event_size:792 -lp:118 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:318512073.696441 max_event_size:792 -lp:122 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:256795353.934300 max_event_size:792 -lp:123 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:237701963.756680 max_event_size:792 -lp:127 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:353573738.286471 max_event_size:792 -lp:128 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:465258641.273453 max_event_size:792 -lp:132 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:374138273.240924 max_event_size:792 -lp:133 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:428587201.804590 max_event_size:792 -lp:137 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:404905020.644931 max_event_size:792 -lp:138 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:350589038.917737 max_event_size:792 -lp:142 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:297762961.526255 max_event_size:792 -lp:143 send_count:9770 send_bytes:39813200 send_time:7583466.666666 recv_count:9770 recv_bytes:39813200 recv_time:410133909.716090 max_event_size:792 -lp:147 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:319808170.413551 max_event_size:792 -lp:148 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:341829101.902517 max_event_size:792 -lp:152 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:380910163.193983 max_event_size:792 -lp:153 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:283849636.026727 max_event_size:792 -lp:157 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:274524505.335948 max_event_size:792 -lp:158 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:315657639.722941 max_event_size:792 -lp:162 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:356174453.289787 max_event_size:792 -lp:163 send_count:7367 send_bytes:30000040 send_time:5714293.333333 recv_count:7367 recv_bytes:30000040 recv_time:289274523.506280 max_event_size:792 -lp:167 send_count:9810 send_bytes:39813520 send_time:7583527.619046 recv_count:9810 recv_bytes:39813520 recv_time:351044401.571382 max_event_size:792 -lp:168 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:259385936.957835 max_event_size:792 -lp:172 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:341719498.237854 max_event_size:792 -lp:173 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:307179521.253780 max_event_size:792 -lp:177 send_count:7392 send_bytes:30000240 send_time:5714331.428571 recv_count:7392 recv_bytes:30000240 recv_time:228528983.462069 max_event_size:792 -lp:178 send_count:7387 send_bytes:30000200 send_time:5714323.809523 recv_count:7387 recv_bytes:30000200 recv_time:267558998.807704 max_event_size:792 diff --git a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/mpi-replay-stats b/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/mpi-replay-stats deleted file mode 100644 index fed80ff6..00000000 --- a/tmptest/expected/tmptest-jacobiS_MILC-25331-1693498581/mpi-replay-stats +++ /dev/null @@ -1,73 +0,0 @@ -# Format - 0 0 1 0 90 90 39813200 39813200 89237007.320510 21119347.570794 250.000000 0.000000 0.000000 - 1 1 0 19 67 67 30000200 30000200 56475874.301444 17852333.411932 10000000.000000 0.000000 0.000000 - 5 2 0 3 72 72 30000240 30000240 62621556.489293 17852174.837306 10000000.000000 0.000000 0.000000 - 6 3 0 6 47 47 30000040 30000040 53717217.531453 17853913.164114 10000000.000000 0.000000 0.000000 - 10 4 1 10 130 130 39813520 39813520 82212308.371531 21068601.547571 250.000000 0.000000 0.000000 - 11 5 0 23 67 67 30000200 30000200 56605447.229800 17856451.510284 10000000.000000 0.000000 0.000000 - 15 6 0 18 67 67 30000200 30000200 59377020.615150 17849212.584262 10000000.000000 0.000000 0.000000 - 16 7 0 28 67 67 30000200 30000200 56611204.332201 17853245.667187 10000000.000000 0.000000 0.000000 - 20 8 0 14 67 67 30000200 30000200 58046006.712762 17853330.657890 10000000.000000 0.000000 0.000000 - 21 9 0 27 67 67 30000200 30000200 54666576.648049 17857362.848268 10000000.000000 0.000000 0.000000 - 25 10 1 3 140 140 39813600 39813600 79806532.629068 21115366.245572 250.000000 0.000000 0.000000 - 26 11 0 9 67 67 30000200 30000200 55190353.587108 17851269.424881 10000000.000000 0.000000 0.000000 - 30 12 0 0 47 47 30000040 30000040 56355045.144427 17849847.891871 10000000.000000 0.000000 0.000000 - 31 13 0 34 67 67 30000200 30000200 57319838.732274 17853331.023495 10000000.000000 0.000000 0.000000 - 35 14 1 20 130 130 39813520 39813520 79825582.529869 21113723.869383 250.000000 0.000000 0.000000 - 36 15 0 30 67 67 30000200 30000200 50539798.306285 17855710.850404 10000000.000000 0.000000 0.000000 - 40 16 1 23 130 130 39813520 39813520 96570614.385612 21173404.388469 250.000000 0.000000 0.000000 - 41 17 1 32 130 130 39813520 39813520 104174353.511400 21127185.068113 250.000000 0.000000 0.000000 - 45 18 0 5 72 72 30000240 30000240 60746833.137984 17855387.498509 10000000.000000 0.000000 0.000000 - 46 19 1 28 130 130 39813520 39813520 90532100.834860 21113902.678420 250.000000 0.000000 0.000000 - 50 20 1 5 140 140 39813600 39813600 93215021.859260 21107999.546402 250.000000 0.000000 0.000000 - 51 21 0 13 67 67 30000200 30000200 62524142.281874 17854071.877755 10000000.000000 0.000000 0.000000 - 55 22 1 11 130 130 39813520 39813520 87108820.845512 21253403.349942 250.000000 0.000000 0.000000 - 56 23 0 26 67 67 30000200 30000200 58124303.599644 17850680.445220 10000000.000000 0.000000 0.000000 - 60 24 0 1 72 72 30000240 30000240 73825732.655678 17853237.674558 10000000.000000 0.000000 0.000000 - 61 25 0 35 67 67 30000200 30000200 74632319.393684 17856451.858593 10000000.000000 0.000000 0.000000 - 65 26 0 29 67 67 30000200 30000200 60973047.224376 17856451.550016 10000000.000000 0.000000 0.000000 - 66 27 1 30 130 130 39813520 39813520 97183327.200930 21102174.565586 250.000000 0.000000 0.000000 - 70 28 0 33 67 67 30000200 30000200 69666737.099805 17855617.501467 10000000.000000 0.000000 0.000000 - 71 29 1 33 130 130 39813520 39813520 100178218.078153 21238162.751729 250.000000 0.000000 0.000000 - 75 30 1 27 130 130 39813520 39813520 98877667.184731 21256700.657271 250.000000 0.000000 0.000000 - 76 31 1 2 90 90 39813200 39813200 100942318.305743 21155764.515093 250.000000 0.000000 0.000000 - 80 32 0 22 67 67 30000200 30000200 65591630.997276 17854157.642763 10000000.000000 0.000000 0.000000 - 81 33 1 12 130 130 39813520 39813520 78974801.715336 21109777.342929 250.000000 0.000000 0.000000 - 85 34 1 31 130 130 39813520 39813520 100689518.650071 21192061.784327 250.000000 0.000000 0.000000 - 86 35 1 21 130 130 39813520 39813520 101556407.296841 21115137.760280 250.000000 0.000000 0.000000 - 90 36 1 26 130 130 39813520 39813520 87181078.575814 21143229.925367 250.000000 0.000000 0.000000 - 91 37 1 14 130 130 39813520 39813520 91281082.849771 21097362.783538 250.000000 0.000000 0.000000 - 95 38 0 2 47 47 30000040 30000040 63940848.873793 17847374.542274 10000000.000000 0.000000 0.000000 - 96 39 0 8 67 67 30000200 30000200 71690442.429131 17849121.063364 10000000.000000 0.000000 0.000000 - 100 40 1 7 140 140 39813600 39813600 88967018.559046 21161719.568495 250.000000 0.000000 0.000000 - 101 41 1 6 90 90 39813200 39813200 85527779.462703 21163275.624817 250.000000 0.000000 0.000000 - 105 42 1 1 140 140 39813600 39813600 100457403.678861 21110006.073760 250.000000 0.000000 0.000000 - 106 43 1 17 130 130 39813520 39813520 99253629.669866 21189028.967585 250.000000 0.000000 0.000000 - 110 44 1 22 130 130 39813520 39813520 88807135.248057 21070787.824921 250.000000 0.000000 0.000000 - 111 45 0 15 67 67 30000200 30000200 55657802.114682 17856451.485560 10000000.000000 0.000000 0.000000 - 115 46 0 25 67 67 30000200 30000200 57569424.228786 17853238.309627 10000000.000000 0.000000 0.000000 - 116 47 1 16 130 130 39813520 39813520 94714665.003806 21118593.796807 250.000000 0.000000 0.000000 - 120 48 0 12 67 67 30000200 30000200 49329790.727020 17848300.919883 10000000.000000 0.000000 0.000000 - 121 49 0 20 67 67 30000200 30000200 49076027.353583 17848300.513379 10000000.000000 0.000000 0.000000 - 125 50 1 19 130 130 39813520 39813520 92110607.580828 21257528.308085 250.000000 0.000000 0.000000 - 126 51 1 29 130 130 39813520 39813520 94697768.477917 21128810.332614 250.000000 0.000000 0.000000 - 130 52 0 32 67 67 30000200 30000200 52119612.770477 17853139.356496 10000000.000000 0.000000 0.000000 - 131 53 1 34 130 130 39813520 39813520 87422795.515014 21154612.434599 250.000000 0.000000 0.000000 - 135 54 1 18 130 130 39813520 39813520 78422965.566418 21073020.434406 250.000000 0.000000 0.000000 - 136 55 0 17 67 67 30000200 30000200 53390682.328147 17853238.062964 10000000.000000 0.000000 0.000000 - 140 56 1 25 130 130 39813520 39813520 89493603.447990 21189561.925589 250.000000 0.000000 0.000000 - 141 57 1 4 90 90 39813200 39813200 85436564.237106 21111296.853831 250.000000 0.000000 0.000000 - 145 58 1 13 130 130 39813520 39813520 96802622.211033 21115137.315974 250.000000 0.000000 0.000000 - 146 59 1 15 130 130 39813520 39813520 96583487.482583 21161490.594629 250.000000 0.000000 0.000000 - 150 60 1 35 130 130 39813520 39813520 90266817.624509 21356588.612738 250.000000 0.000000 0.000000 - 151 61 0 24 67 67 30000200 30000200 58285049.554882 17852412.897185 10000000.000000 0.000000 0.000000 - 155 62 0 31 67 67 30000200 30000200 66320862.530225 17860569.932221 10000000.000000 0.000000 0.000000 - 156 63 1 24 130 130 39813520 39813520 98077004.277963 21119191.871021 250.000000 0.000000 0.000000 - 160 64 1 9 130 130 39813520 39813520 71635687.154524 21185283.014578 250.000000 0.000000 0.000000 - 161 65 0 4 47 47 30000040 30000040 55102180.614745 17851269.485238 10000000.000000 0.000000 0.000000 - 165 66 1 8 130 130 39813520 39813520 68646777.969893 21113251.882987 250.000000 0.000000 0.000000 - 166 67 0 11 67 67 30000200 30000200 51878884.318374 17855809.464314 10000000.000000 0.000000 0.000000 - 170 68 0 21 67 67 30000200 30000200 59776460.886694 17852418.990265 10000000.000000 0.000000 0.000000 - 171 69 0 10 67 67 30000200 30000200 57589326.703996 17849795.090486 10000000.000000 0.000000 0.000000 - 175 70 0 7 72 72 30000240 30000240 54267360.186473 17850595.235628 10000000.000000 0.000000 0.000000 - 176 71 0 16 67 67 30000200 30000200 56663635.212701 17849021.295227 10000000.000000 0.000000 0.000000 \ No newline at end of file diff --git a/tmptest/expected/tmptest-jacobiS_MILC.output b/tmptest/expected/tmptest-jacobiS_MILC.output deleted file mode 100644 index b8fb808a..00000000 --- a/tmptest/expected/tmptest-jacobiS_MILC.output +++ /dev/null @@ -1,240 +0,0 @@ -/home/ac.xwang/install/codes-new/bin/model-net-mpi-replay --sync=1 --workload_type=conc-online --lp-io-use-suffix=1 --workload_conf_file=/home/ac.xwang/tools/codes-new/tmptest/conf/jacobi_MILC.conf --alloc_file=/home/ac.xwang/tools/codes-new/tmptest/conf/rand_node0-1d-72-jacobi_MILC.conf --lp-io-dir=tmptest-jacobiS_MILC -- /home/ac.xwang/tools/codes-new/tmptest/conf/dfdally-72-par.conf - -Thu Aug 31 11:16:21 2023 - -ROSS Version: v8.0.0-dirty - -tw_net_start: Found world size to be 1 - - num_net_traces 72; num_dumpi_traces 72NIC num injection port not specified, setting to 1 -NIC seq delay not specified, setting to 10.000000 -NIC num copy queues not specified, setting to 1 -Dragonfly rail selection is 3 -within node transfer per byte delay is 0.190476 - -ROSS Core Configuration: - Total PEs 1 - Total KPs [Nodes (1) x KPs (16)] 16 - Total LPs 180 - Simulation End Time 3600000000000.00 - LP-to-PE Mapping model defined - - -ROSS Event Memory Allocation: - Model events 46081 - Network events 16 - Total events 46096 - -*** START SEQUENTIAL SIMULATION *** - -Jacobi3D: Running Jacobi on 36 processors with (4, 3, 3) elements -Jacobi3D: Array Dimensions: 400 300 300 -Jacobi3D: Block Dimensions: 100 100 100 -Set num_servers per router 2, servers per injection queue per router 2, servers per node copy queue per node 1, num nics 2 - - Network node 10 Rank 4 App 1 finished at 21068851.547571 - Network node 22 Rank 44 App 1 finished at 21071037.824921 - Network node 18 Rank 54 App 1 finished at 21073270.434406 - Network node 14 Rank 37 App 1 finished at 21097612.783538 - Network node 30 Rank 27 App 1 finished at 21102424.565586 - Network node 5 Rank 20 App 1 finished at 21108249.546402 - Network node 12 Rank 33 App 1 finished at 21110027.342929 - Network node 1 Rank 42 App 1 finished at 21110256.073760 - Network node 4 Rank 57 App 1 finished at 21111546.853831 - Network node 8 Rank 66 App 1 finished at 21113501.882987 - Network node 20 Rank 14 App 1 finished at 21113973.869383 - Network node 28 Rank 19 App 1 finished at 21114152.678420 - Network node 13 Rank 58 App 1 finished at 21115387.315974 - Network node 21 Rank 35 App 1 finished at 21115387.760280 - Network node 3 Rank 10 App 1 finished at 21115616.245572 - Network node 16 Rank 47 App 1 finished at 21118843.796807 - Network node 24 Rank 63 App 1 finished at 21119441.871021 - Network node 0 Rank 0 App 1 finished at 21119597.570794 - Network node 32 Rank 17 App 1 finished at 21127435.068113 - Network node 29 Rank 51 App 1 finished at 21129060.332614 - Network node 26 Rank 36 App 1 finished at 21143479.925367 - Network node 34 Rank 53 App 1 finished at 21154862.434599 - Network node 2 Rank 31 App 1 finished at 21156014.515093 - Network node 15 Rank 59 App 1 finished at 21161740.594629 - Network node 7 Rank 40 App 1 finished at 21161969.568495 - Network node 6 Rank 41 App 1 finished at 21163525.624817 - Network node 23 Rank 16 App 1 finished at 21173654.388469 - Network node 9 Rank 64 App 1 finished at 21185533.014578 - Network node 17 Rank 43 App 1 finished at 21189278.967585 - Network node 25 Rank 56 App 1 finished at 21189811.925589 - Network node 31 Rank 34 App 1 finished at 21192311.784327 - Network node 33 Rank 29 App 1 finished at 21238412.751729 - Network node 11 Rank 22 App 1 finished at 21253653.349942 - Network node 27 Rank 30 App 1 finished at 21256950.657271 - Network node 19 Rank 50 App 1 finished at 21257778.308085 - Network node 35 Rank 60 App 1 finished at 21356838.612738 App 0: Received finished workload notificationThere is still a nonsynethic workload left. 1 != 2 - - Network node 2 Rank 38 App 0 finished at 27847374.542274 - Network node 20 Rank 49 App 0 finished at 27848300.513379 - Network node 12 Rank 48 App 0 finished at 27848300.919883 - Network node 16 Rank 71 App 0 finished at 27849021.295227 - Network node 8 Rank 39 App 0 finished at 27849121.063364 - Network node 18 Rank 6 App 0 finished at 27849212.584262 - Network node 10 Rank 69 App 0 finished at 27849795.090486 - Network node 0 Rank 12 App 0 finished at 27849847.891871 - Network node 7 Rank 70 App 0 finished at 27850595.235628 - Network node 26 Rank 23 App 0 finished at 27850680.445220 - Network node 9 Rank 11 App 0 finished at 27851269.424881 - Network node 4 Rank 65 App 0 finished at 27851269.485238 - Network node 3 Rank 2 App 0 finished at 27852174.837306 - Network node 19 Rank 1 App 0 finished at 27852333.411932 - Network node 24 Rank 61 App 0 finished at 27852412.897185 - Network node 21 Rank 68 App 0 finished at 27852418.990265 - Network node 32 Rank 52 App 0 finished at 27853139.356496 - Network node 1 Rank 24 App 0 finished at 27853237.674558 - Network node 17 Rank 55 App 0 finished at 27853238.062964 - Network node 25 Rank 46 App 0 finished at 27853238.309627 - Network node 28 Rank 7 App 0 finished at 27853245.667187 - Network node 14 Rank 8 App 0 finished at 27853330.657890 - Network node 34 Rank 13 App 0 finished at 27853331.023495 - Network node 6 Rank 3 App 0 finished at 27853913.164114 - Network node 13 Rank 21 App 0 finished at 27854071.877755 - Network node 22 Rank 32 App 0 finished at 27854157.642763 - Network node 5 Rank 18 App 0 finished at 27855387.498509 - Network node 33 Rank 28 App 0 finished at 27855617.501467 - Network node 30 Rank 15 App 0 finished at 27855710.850404 - Network node 11 Rank 67 App 0 finished at 27855809.464314 - Network node 15 Rank 45 App 0 finished at 27856451.485560 - Network node 23 Rank 5 App 0 finished at 27856451.510284 - Network node 29 Rank 26 App 0 finished at 27856451.550016 - Network node 35 Rank 25 App 0 finished at 27856451.858593 - Network node 27 Rank 9 App 0 finished at 27857362.848268 - Network node 31 Rank 62 App 0 finished at 27860569.932221 App 0: Received finished workload notificationApp 0: All non-synthetic workloads have completed -*** END SIMULATION *** - - - : Running Time = 35.7901 seconds - -TW Library Statistics: - Total Events Processed 11315021 - Events Aborted (part of RBs) 0 - Events Rolled Back 0 - Event Ties Detected in PE Queues 0 - Efficiency 100.00 % - Total Remote (shared mem) Events Processed 0 - Percent Remote Events 0.00 % - Total Remote (network) Events Processed 0 - Percent Remote Events 0.00 % - - Total Roll Backs 0 - Primary Roll Backs 0 - Secondary Roll Backs 0 - Fossil Collect Attempts 0 - Total GVT Computations 0 - - Net Events Processed 11315021 - Event Rate (events/sec) 316149.8 - Total Events Scheduled Past End Time 0 - -TW Memory Statistics: - Events Allocated 46097 - Memory Allocated 80000 - Memory Wasted 101 - -TW Data Structure sizes in bytes (sizeof): - PE struct 3888 - KP struct 960 - LP struct 960 - LP Model struct 96 - LP RNGs 80 - Total LP 1136 - Event struct 976 - Event struct with Model 1768 - -TW Clock Cycle Statistics (MAX values in secs at 1.0000 GHz): - Initialization 0.3151 - Priority Queue (enq/deq) 5.0614 - AVL Tree (insert/delete) 0.0000 - LZ4 (de)compression 0.0000 - Buddy system 0.0000 - Event Processing 0.0000 - Event Cancel 0.0000 - Event Abort 0.0000 - - GVT 0.0000 - Fossil Collect 0.0000 - Primary Rollbacks 0.0000 - Network Read 0.0000 - Other Network 0.0000 - Instrumentation (computation) 0.0000 - Instrumentation (write) 0.0000 - Total Time (Note: Using Running Time above for Speedup) 74.9855 - -TW GVT Statistics: MPI AllReduce - GVT Interval 16 - GVT Real Time Interval (cycles) 0 - GVT Real Time Interval (sec) 0.00000000 - Batch Size 16 - - Forced GVT 0 - Total GVT Computations 0 - Total All Reduce Calls 0 - Average Reduction / GVT -nan - - Total bytes sent 2513292480 recvd 2513292480 - max runtime 27860569.932221 ns avg runtime 24501344.282682 - max comm time 21356588.612738 avg comm time 19501219.282682 - max send time 104174353.511400 avg send time 74765835.724547 - max recv time 100173347.933146 avg recv time 74419141.320531 - max wait time 19215338.524462 avg wait time 16456309.234173 - ----------- -Per App Max Elapsed Times: - App 0: 27860569.9322 - App 1: 21356838.6127 ----------- -LP-IO: writing output to tmptest-jacobiS_MILC-25331-1693498581/ -LP-IO: data files: - tmptest-jacobiS_MILC-25331-1693498581/dragonfly-cn-stats - tmptest-jacobiS_MILC-25331-1693498581/dragonfly-link-stats - tmptest-jacobiS_MILC-25331-1693498581/model-net-category-all - tmptest-jacobiS_MILC-25331-1693498581/model-net-category-high - tmptest-jacobiS_MILC-25331-1693498581/avg-all-reduce-time - tmptest-jacobiS_MILC-25331-1693498581/mpi-replay-stats - ------------------- Dragonfly Dally Parameters --------- - num_routers = 4 - local_bandwidth = 5.25 - global_bandwidth = 4.70 - cn_bandwidth = 5.25 - num_vcs = 4 - num_qos_levels = 1 - local_vc_size = 16384 - global_vc_size = 16384 - cn_vc_size = 32768 - chunk_size = 4096 - num_cn = 2 - cn_radix = 2 - intra_grp_radix = 3 - num_groups = 9 - total_groups = 9 - virtual radix = 7 - total_routers = 36 - total_terminals = 72 - num_global_channels = 2 - num_injection_queues = 1 - num_rails = 1 - num_planes = 1 - cn_delay = 726.61 - local_delay = 726.61 - global_delay = 811.64 - local credit_delay = 1.42 - global credit_delay = 1.42 - cn credit_delay = 1.42 - router_delay = 100.00 - routing = PROG_ADAPTIVE - adaptive_threshold = 0 - max hops notification = 2147483647 ------------------------------------------------------- - - -Average number of hops traversed 4.101973 average chunk latency 40.897694 us maximum chunk latency 732.304909 us avg message size 363612.937500 bytes finished messages 6912 finished chunks 618912 - -ADAPTIVE ROUTING STATS: 349554 chunks routed minimally 269358 chunks routed non-minimally completed packets 618912 - -Total packets generated 618912 finished 618912 Locally routed- same router 14696 different-router 56472 Remote (inter-group) 547744 From 03d7da6f27dc12aa9b4889b616b845950c21aa60 Mon Sep 17 00:00:00 2001 From: helq Date: Mon, 16 Jun 2025 16:00:50 -0400 Subject: [PATCH 080/110] Fixing bug where MILC would not work with network surrogate when freezing --- src/networks/model-net/dragonfly-dally.C | 45 +++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 99b292ee..846d8734 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -495,6 +495,7 @@ struct packet_sent { double next_packet_delay; // When the packet is initially sent, this value is -1, when the next packet is sent this value is updated to the actual delay to process the next packet void * message_data; // Yep, we have to save the entire message just because we might need to resend the message when switching to surrogate-mode. It's wasteful but there is no other way void * remote_event_data; // This and the one above have to be freed. This contains the extra information that the message contains + void * local_data; // This and the one above have to be freed. This contains the extra information that the message contains }; struct packet_id { @@ -604,6 +605,7 @@ struct terminal_state // Variables to recover latency of packets sent to other terminals // Sent packets (to be populated at by commit handler of packet sender) map sent_packets; + set is_pending_local_send; int64_t last_packet_sent_id; // We need the next packet to be injected in the network before feeding the packet info forward (the predictor needs starting time, delay to send next packet and latency) struct { @@ -3028,6 +3030,9 @@ static void feed_packet_to_predictor(terminal_state * s, tw_lp * lp, uint64_t pa if (sent.remote_event_data) { free(sent.remote_event_data); } + if (sent.local_data) { + free(sent.local_data); + } } // We check an event that is in the event queue, thus we do not process it yet @@ -3116,7 +3121,18 @@ static void dragonfly_dally_terminal_highdef_to_surrogate( tw_event_send(e); //printf("NOTIFYING of zombie: packet dest id %d dest gid %d\n", sent.start.dest_terminal_lpid, sent.start.dfdally_dest_terminal_id); - notify_dest_lp_of(s, lp, m, NOTIFY_ZOMBIE); + notify_dest_lp_of(s, lp, msg_data, NOTIFY_ZOMBIE); + + if (s->is_pending_local_send.count(packet_ID) == 1) { + assert(sent.local_data); + assert(msg_data->local_event_size_bytes); + double const local_ts = 11; + tw_event *e_new = tw_event_new(msg_data->sender_lp, local_ts, lp); + void * m_new = tw_event_data(e_new); + memcpy(m_new, sent.local_data, msg_data->local_event_size_bytes); + tw_event_send(e_new); + s->is_pending_local_send.erase(packet_ID); + } // Deallocating memory from packet_start if (sent.message_data) { @@ -3125,9 +3141,13 @@ static void dragonfly_dally_terminal_highdef_to_surrogate( if (sent.remote_event_data) { free(sent.remote_event_data); } + if (sent.local_data) { + free(sent.local_data); + } } } assert(s->sent_packets.empty()); + assert(s->is_pending_local_send.empty()); // Hide current state and clean current state. Hidding the network information is in principle // the same as freezing the state of the network. @@ -3160,6 +3180,7 @@ static void dragonfly_dally_terminal_highdef_to_surrogate( memcpy(&s->arrival_of_last_packet, &frozen_state->arrival_of_last_packet, sizeof(s->arrival_of_last_packet)); memcpy(&s->zombies, &frozen_state->zombies, sizeof(s->zombies)); memcpy(&s->sent_packets, &frozen_state->sent_packets, sizeof(s->sent_packets)); + memcpy(&s->is_pending_local_send, &frozen_state->is_pending_local_send, sizeof(s->is_pending_local_send)); memcpy(&s->remaining_sz_packets, &frozen_state->remaining_sz_packets, sizeof(s->remaining_sz_packets)); s->frozen_state = frozen_state; @@ -3201,6 +3222,7 @@ static void dragonfly_dally_terminal_surrogate_to_highdef( memcpy(&frozen_state->arrival_of_last_packet, &s->arrival_of_last_packet, sizeof(s->arrival_of_last_packet)); memcpy(&frozen_state->zombies, &s->zombies, sizeof(s->zombies)); memcpy(&frozen_state->sent_packets, &s->sent_packets, sizeof(s->sent_packets)); + memcpy(&frozen_state->is_pending_local_send, &s->is_pending_local_send, sizeof(s->is_pending_local_send)); memcpy(&frozen_state->remaining_sz_packets, &s->remaining_sz_packets, sizeof(s->remaining_sz_packets)); memcpy(s, frozen_state, sizeof(terminal_state)); memset(frozen_state, 0, sizeof(terminal_state)); @@ -3332,6 +3354,11 @@ static void terminal_commit_packet_generate(terminal_state * s, tw_bf * bf, term remote_data = malloc(msg->remote_event_size_bytes); memcpy(remote_data, model_net_method_get_edata(DRAGONFLY_DALLY, msg), msg->remote_event_size_bytes); } + void * local_data = NULL; + if (msg->local_event_size_bytes) { + local_data = malloc(msg->local_event_size_bytes); + memcpy(local_data, (char *) model_net_method_get_edata(DRAGONFLY_DALLY, msg) + msg->remote_event_size_bytes, msg->local_event_size_bytes); + } double const processing_packet_delay = msg->saved_next_packet_delay; // TODO (elkin): In the future, this ugly initialization could be done all in a single "line" instead of setting all values one by one. The reason to do it this way is because some old compilers do not understand other ways of initializing @@ -3347,8 +3374,12 @@ static void terminal_commit_packet_generate(terminal_state * s, tw_bf * bf, term sent.next_packet_delay = -1; sent.message_data = msg_data; sent.remote_event_data = remote_data; + sent.local_data = local_data; s->sent_packets[msg->packet_ID] = sent; + if (freeze_network_on_switch && msg->local_event_size_bytes > 0) { + s->is_pending_local_send.insert(msg->packet_ID); + } // Set next_packet_delay for the last past sent packet if (s->sent_packets.count(s->last_packet_sent_id) == 1) { @@ -3444,6 +3475,11 @@ static void terminal_dally_commit(terminal_state * s, break; case T_SEND: + if (freeze_network_on_switch) { + if (bf->c16 && s->is_pending_local_send.count(msg->packet_ID) == 1) { + s->is_pending_local_send.erase(msg->packet_ID); + } + } break; case T_BUFFER: @@ -3701,6 +3737,7 @@ static void terminal_dally_init( terminal_state * s, tw_lp * lp ) s->arrival_of_last_packet.packet_ID = -1; s->arrival_of_last_packet.travel_end_time = -1; new (&s->sent_packets) map(); + new (&s->is_pending_local_send) set(); new (&s->remaining_sz_packets) map(); new (&s->zombies) set(); s->frozen_state = NULL; @@ -4763,6 +4800,8 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message * if(cur_entry->msg.chunk_id == num_chunks - 1 && (cur_entry->msg.local_event_size_bytes > 0)) { + bf->c16 = 1; + msg->packet_ID = cur_entry->msg.packet_ID; tw_stime local_ts = 0; tw_event *e_new = tw_event_new(cur_entry->msg.sender_lp, local_ts, lp); void * m_new = tw_event_data(e_new); @@ -5681,6 +5720,7 @@ static void dragonfly_dally_terminal_final( terminal_state * s, // Deallocating memory from packet_start if (sent.message_data) { free(sent.message_data); } if (sent.remote_event_data) { free(sent.remote_event_data); } + if (sent.local_data) { free(sent.local_data); } s->sent_packets.erase(s->arrival_of_last_packet.packet_ID); s->arrival_of_last_packet.packet_ID = -1; @@ -5702,6 +5742,7 @@ static void dragonfly_dally_terminal_final( terminal_state * s, // Deallocating memory from packet_start if (sent.message_data) { free(sent.message_data); } if (sent.remote_event_data) { free(sent.remote_event_data); } + if (sent.local_data) { free(sent.local_data); } } } @@ -5739,8 +5780,10 @@ static void dragonfly_dally_terminal_final( terminal_state * s, for (auto&& kv: s->sent_packets) { if (kv.second.message_data) { free(kv.second.message_data); } if (kv.second.remote_event_data) { free(kv.second.remote_event_data); } + if (kv.second.local_data) { free(kv.second.local_data); } } s->sent_packets.~map(); + s->is_pending_local_send.~set(); s->remaining_sz_packets.~map(); if (s->predictor_data) { From fcdf824d8f144da3a5164f111c308325be45c05d Mon Sep 17 00:00:00 2001 From: helq Date: Mon, 16 Jun 2025 16:37:19 -0400 Subject: [PATCH 081/110] Fixed a bug on reading setting from file --- src/surrogate/init.c | 18 ++++++++++++++++-- src/surrogate/network-surrogate.c | 2 +- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/surrogate/init.c b/src/surrogate/init.c index 2e93ed75..dc165eae 100644 --- a/src/surrogate/init.c +++ b/src/surrogate/init.c @@ -177,9 +177,23 @@ static int load_and_validate_int_param(const char* param_name, int default_value return value; } +static double load_and_validate_double_param(const char* param_name, double default_value) { + char param_str[MAX_NAME_LENGTH]; + param_str[0] = '\0'; + int const rc = configuration_get_value(&config, "APPLICATION_SURROGATE", param_name, NULL, param_str, MAX_NAME_LENGTH); + double value = (rc > 0) ? strtod(param_str, NULL) : default_value; + + if (value <= 0) { + tw_warning(TW_LOC, "%s must be a positive integer, got %d. Using default value %d.", param_name, value, default_value); + value = default_value; + } + + return value; +} + static struct application_director_config load_director_config(void) { int const default_gvt = 100; - int const default_ns = 1000000; // 1ms + double const default_ns = 1.0e6; // 1ms enum { MODE_NOT_SET, @@ -203,7 +217,7 @@ static struct application_director_config load_director_config(void) { } int every_n_gvt = load_and_validate_int_param("director_num_gvt", default_gvt); - int every_n_ns = load_and_validate_int_param("director_num_ns", default_ns); + double every_n_ns = load_and_validate_double_param("director_num_ns", default_ns); bool const is_sequential = (g_tw_synchronization_protocol == SEQUENTIAL || g_tw_synchronization_protocol == SEQUENTIAL_ROLLBACK_CHECK); diff --git a/src/surrogate/network-surrogate.c b/src/surrogate/network-surrogate.c index 230c6ade..ed7185f1 100644 --- a/src/surrogate/network-surrogate.c +++ b/src/surrogate/network-surrogate.c @@ -368,7 +368,7 @@ void network_director(tw_pe * pe) { } // ---- Past this means that we are in fact switching ---- - bool const pre_switch_status = net_surr_config.model.is_surrogate_on(); + net_surr_config.model.is_surrogate_on(); // Asking the director/model to switch if (DEBUG_DIRECTOR && g_tw_mynode == 0) { From 1a41fda188bf000322e92e43800205bd593453af Mon Sep 17 00:00:00 2001 From: helq Date: Mon, 16 Jun 2025 17:22:24 -0400 Subject: [PATCH 082/110] Resetting predictor when turning back into full fidelity --- codes/surrogate/network-surrogate.h | 1 + .../surrogate/packet-latency-predictor/common.h | 2 ++ src/networks/model-net/dragonfly-dally.C | 10 ++++++++++ src/surrogate/network-surrogate.c | 7 +++++++ src/surrogate/packet-latency-predictor/average.c | 16 ++++++++++++++++ 5 files changed, 36 insertions(+) diff --git a/codes/surrogate/network-surrogate.h b/codes/surrogate/network-surrogate.h index a550464d..4b22e238 100644 --- a/codes/surrogate/network-surrogate.h +++ b/codes/surrogate/network-surrogate.h @@ -41,6 +41,7 @@ struct lp_types_switch { model_ask_if_freeze_f should_event_be_frozen; // true means event from LP type shouldn't be frozen model_ask_if_freeze_f should_event_be_deleted; // true means event from LP type shouldn't be deleted model_check_event_f check_event_in_queue; + model_switch_f reset_predictor; }; struct switch_at_struct { diff --git a/codes/surrogate/packet-latency-predictor/common.h b/codes/surrogate/packet-latency-predictor/common.h index 61b0283c..3faa7bff 100644 --- a/codes/surrogate/packet-latency-predictor/common.h +++ b/codes/surrogate/packet-latency-predictor/common.h @@ -37,6 +37,7 @@ struct packet_end { // Definition of functions needed to define a predictor typedef void (*init_pred_lat_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id); // Initializes the predictor (eg, LSTM) +typedef void (*reset_pred_lat_f) (void * predictor_data, tw_lp * lp); typedef void (*feed_pred_lat_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start const *, struct packet_end const *); // Feeds known latency for packet sent at `now` typedef struct packet_end (*predict_pred_lat_f) (void * predictor_data, tw_lp * lp, unsigned int terminal_id, struct packet_start const *); // Get prediction for packet sent to `destination` at `now` typedef void (*predict_pred_lat_rc_f) (void * predictor_data, tw_lp * lp); // Reverse prediction (reverse state of predictor one prediction) @@ -44,6 +45,7 @@ typedef void (*predict_pred_lat_rc_f) (void * predictor_data, tw_lp * lp); // Re // API for packet latency predictors struct packet_latency_predictor { init_pred_lat_f init; + reset_pred_lat_f reset; feed_pred_lat_f feed; predict_pred_lat_f predict; predict_pred_lat_rc_f predict_rc; diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 846d8734..2175515d 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -203,6 +203,7 @@ static void dragonfly_dally_terminal_surrogate_to_highdef(terminal_state * s, tw static bool dragonfly_dally_terminal_should_event_be_frozen(tw_lp * lp, tw_event * event); static bool dragonfly_dally_router_should_event_be_frozen(tw_lp * lp, tw_event * event); static void dragonfly_dally_terminal_pre_surrogate_switch_event_queue( terminal_state * s, tw_lp * lp, tw_event * event); +static void dragonfly_dally_terminal_reset_predictor(terminal_state * s, tw_lp * lp, void *); // // ==== END OF Parameters to tune surrogate mode ==== @@ -2463,6 +2464,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params) .should_event_be_frozen = dragonfly_dally_terminal_should_event_be_frozen, .should_event_be_deleted = NULL, .check_event_in_queue = (model_check_event_f) dragonfly_dally_terminal_pre_surrogate_switch_event_queue, + .reset_predictor = (model_switch_f) dragonfly_dally_terminal_reset_predictor, }, {.lpname = "modelnet_dragonfly_dally_router", .trigger_idle_modelnet = false, @@ -2471,6 +2473,7 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params) .should_event_be_frozen = dragonfly_dally_router_should_event_be_frozen, .should_event_be_deleted = NULL, .check_event_in_queue = NULL, + .reset_predictor = NULL, }, 0 } @@ -3052,6 +3055,13 @@ static void dragonfly_dally_terminal_pre_surrogate_switch_event_queue( } } +static void dragonfly_dally_terminal_reset_predictor(terminal_state * s, tw_lp * lp, void * vacuous) { + (void) vacuous; + if (terminal_predictor != NULL && s->predictor_data != NULL) { + terminal_predictor->reset(s->predictor_data, lp); + } +} + // This function never rollsback because it's called at GVT static void dragonfly_dally_terminal_highdef_to_surrogate( terminal_state * s, tw_lp * lp, tw_event ** terminal_events) { diff --git a/src/surrogate/network-surrogate.c b/src/surrogate/network-surrogate.c index ed7185f1..b7108cc8 100644 --- a/src/surrogate/network-surrogate.c +++ b/src/surrogate/network-surrogate.c @@ -299,6 +299,13 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe) { lp_type_switch->surrogate_to_highdef(lp->cur_state, lp, NULL); } } + if (lp_type_switch->reset_predictor) { + if (is_lp_modelnet) { + model_net_method_call_inner(lp, (void (*) (void *, tw_lp *, void *))lp_type_switch->reset_predictor, NULL); + } else { + lp_type_switch->reset_predictor(lp->cur_state, lp, NULL); + } + } } #ifdef USE_RAND_TIEBREAKER diff --git a/src/surrogate/packet-latency-predictor/average.c b/src/surrogate/packet-latency-predictor/average.c index c6553563..4b14aedb 100644 --- a/src/surrogate/packet-latency-predictor/average.c +++ b/src/surrogate/packet-latency-predictor/average.c @@ -98,12 +98,28 @@ static void predict_latency_rc(struct latency_surrogate * data, tw_lp * lp) { (void) lp; } +static void reset_pred(struct latency_surrogate * data, tw_lp * lp) { + (void) lp; + + data->aggregated_next_packet_delay.sum_latency = 0; + data->aggregated_next_packet_delay.total_msgs = 0; + + data->aggregated_latency_for_all.sum_latency = 0; + data->aggregated_latency_for_all.total_msgs = 0; + + for (int i = 0; i < num_terminals; i++) { + data->aggregated_latency[i].sum_latency = 0; + data->aggregated_latency[i].total_msgs = 0; + } +} + struct packet_latency_predictor average_latency_predictor(int num_terminals_) { assert(num_terminals_ >= 0); num_terminals = num_terminals_; return (struct packet_latency_predictor) { .init = (init_pred_lat_f) init_pred, + .reset = (reset_pred_lat_f) reset_pred, .feed = (feed_pred_lat_f) feed_pred, .predict = (predict_pred_lat_f) predict_latency, .predict_rc = (predict_pred_lat_rc_f) predict_latency_rc, From e70f540ec94686ecf2cf6d4aba7e10f2fbf831ad Mon Sep 17 00:00:00 2001 From: helq Date: Mon, 16 Jun 2025 17:55:07 -0400 Subject: [PATCH 083/110] Adding more tests for Union and the application surrogacy --- tests/CMakeLists.txt | 2 + ...test-surrogate-parallel-deterministic-1.sh | 112 +++++++++++++++++ ...test-surrogate-parallel-deterministic-2.sh | 114 ++++++++++++++++++ .../union-workload-test-surrogate-parallel.sh | 2 +- tests/union-workload-test-surrogate.sh | 2 +- 5 files changed, 230 insertions(+), 2 deletions(-) create mode 100644 tests/union-workload-test-surrogate-parallel-deterministic-1.sh create mode 100644 tests/union-workload-test-surrogate-parallel-deterministic-2.sh diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a78e7210..e5e11309 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -101,6 +101,8 @@ if(USE_UNION) list(APPEND test-shell-files union-workload-test-surrogate.sh union-workload-test-surrogate-parallel.sh + union-workload-test-surrogate-parallel-deterministic-1.sh + union-workload-test-surrogate-parallel-deterministic-2.sh ) endif() diff --git a/tests/union-workload-test-surrogate-parallel-deterministic-1.sh b/tests/union-workload-test-surrogate-parallel-deterministic-1.sh new file mode 100644 index 00000000..c5d145e3 --- /dev/null +++ b/tests/union-workload-test-surrogate-parallel-deterministic-1.sh @@ -0,0 +1,112 @@ +#!/bin/bash + +if [[ -z $bindir ]] ; then + echo bindir variable not set + exit 1 +fi + +if [[ -z $UNION_DATAROOTDIR ]] ; then + echo UNION_DATAROOTDIR variable not set + exit 1 +fi + +if [[ -z $SWM_DATAROOTDIR ]] ; then + echo SWM_DATAROOTDIR variable not set + exit 1 +fi + +np=3 + +expfolder="$PWD" +export CONFIGS_PATH="$srcdir/tests/conf/union-milc-jacobi-workload" + +# Backing up and copying milc json! +tmpdir="$(TMPDIR="$PWD" mktemp -d)" +mv "$SWM_DATAROOTDIR/milc_skeleton.json" "$tmpdir/milc_skeleton.json" +cp "$CONFIGS_PATH/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$UNION_DATAROOTDIR/conceptual.json" "$tmpdir/conceptual.json" +cp "$CONFIGS_PATH/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" + +# Copying configuration files to keep as documentation +cp "$CONFIGS_PATH/milc_skeleton.json" "$expfolder" +cp "$CONFIGS_PATH/conceptual.json" "$expfolder" +cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder" +cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" + +# CODES config file +export PATH_TO_CONNECTIONS="$CONFIGS_PATH" +export NETWORK_SURR_ON=0 +export APP_SURR_ON=1 +export APP_DIRECTOR_MODE=every-n-nanoseconds +#export APP_DIRECTOR_MODE=every-n-gvt +export EVERY_N_GVT=500 +export EVERY_NSECS=1e6 +envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par.conf" + +# running simulation +cons_lookahead=200 +opt_lookahead=600 + +export PATH_TO_CODES_BUILD="$bindir" + +mkdir run-1 +pushd run-1 + +mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \ + --synch=3 \ + --batch=4 --gvt-interval=256 \ + --cons-lookahead=$cons_lookahead \ + --max-opt-lookahead=$opt_lookahead \ + --workload_type=conc-online \ + --lp-io-dir=lp-io-dir \ + --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \ + --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \ + -- "$expfolder/dfdally-72-par.conf" \ + > model-output-1.txt 2> model-output-1-error.txt + +err=$? +[[ $err -ne 0 ]] && exit $err + +popd + +mkdir run-2 +pushd run-2 + +mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \ + --synch=3 \ + --batch=4 --gvt-interval=256 \ + --cons-lookahead=$cons_lookahead \ + --max-opt-lookahead=$opt_lookahead \ + --workload_type=conc-online \ + --lp-io-dir=lp-io-dir \ + --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \ + --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \ + -- "$expfolder/dfdally-72-par.conf" \ + > model-output-2.txt 2> model-output-2-error.txt + +err=$? + +popd + +# Setting milc json back +mv "$tmpdir/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$tmpdir/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" +rmdir "$tmpdir" + +[[ $err -ne 0 ]] && exit $err + +# Checking that there is actual output +grep 'Net Events Processed' run-1/model-output-1.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +diff <(grep 'Net Events Processed' run-1/model-output-1.txt) \ + <(grep 'Net Events Processed' run-2/model-output-2.txt) +err=$? +if [[ $err -ne 0 ]]; then + >&2 echo "The number of net events processed does not coincide, ie," \ + "the simulation is not deterministic" + exit $err +fi + +exit 0 diff --git a/tests/union-workload-test-surrogate-parallel-deterministic-2.sh b/tests/union-workload-test-surrogate-parallel-deterministic-2.sh new file mode 100644 index 00000000..ab596dd5 --- /dev/null +++ b/tests/union-workload-test-surrogate-parallel-deterministic-2.sh @@ -0,0 +1,114 @@ +#!/bin/bash + +if [[ -z $bindir ]] ; then + echo bindir variable not set + exit 1 +fi + +if [[ -z $UNION_DATAROOTDIR ]] ; then + echo UNION_DATAROOTDIR variable not set + exit 1 +fi + +if [[ -z $SWM_DATAROOTDIR ]] ; then + echo SWM_DATAROOTDIR variable not set + exit 1 +fi + +np=3 + +expfolder="$PWD" +export CONFIGS_PATH="$srcdir/tests/conf/union-milc-jacobi-workload" + +# Backing up and copying milc json! +tmpdir="$(TMPDIR="$PWD" mktemp -d)" +mv "$SWM_DATAROOTDIR/milc_skeleton.json" "$tmpdir/milc_skeleton.json" +cp "$CONFIGS_PATH/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$UNION_DATAROOTDIR/conceptual.json" "$tmpdir/conceptual.json" +cp "$CONFIGS_PATH/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" + +# Copying configuration files to keep as documentation +cp "$CONFIGS_PATH/milc_skeleton.json" "$expfolder" +cp "$CONFIGS_PATH/conceptual.json" "$expfolder" +cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder" +cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" + +# CODES config file +export PATH_TO_CONNECTIONS="$CONFIGS_PATH" +export NETWORK_SURR_ON=0 +export APP_SURR_ON=1 +export APP_DIRECTOR_MODE=every-n-nanoseconds +export EVERY_N_GVT=500 +export EVERY_NSECS=1e6 +envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par-1.conf" + +export APP_DIRECTOR_MODE=every-n-gvt +envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par-2.conf" + +# running simulation +cons_lookahead=200 +opt_lookahead=600 + +export PATH_TO_CODES_BUILD="$bindir" + +mkdir run-1 +pushd run-1 + +mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \ + --synch=3 \ + --batch=4 --gvt-interval=256 \ + --cons-lookahead=$cons_lookahead \ + --max-opt-lookahead=$opt_lookahead \ + --workload_type=conc-online \ + --lp-io-dir=lp-io-dir \ + --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \ + --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \ + -- "$expfolder/dfdally-72-par-1.conf" \ + > model-output-1.txt 2> model-output-1-error.txt + +err=$? +[[ $err -ne 0 ]] && exit $err + +popd + +mkdir run-2 +pushd run-2 + +mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \ + --synch=3 \ + --batch=4 --gvt-interval=256 \ + --cons-lookahead=$cons_lookahead \ + --max-opt-lookahead=$opt_lookahead \ + --workload_type=conc-online \ + --lp-io-dir=lp-io-dir \ + --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \ + --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \ + -- "$expfolder/dfdally-72-par-2.conf" \ + > model-output-2.txt 2> model-output-2-error.txt + +err=$? + +popd + +# Setting milc json back +mv "$tmpdir/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$tmpdir/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" +rmdir "$tmpdir" + +[[ $err -ne 0 ]] && exit $err + +# Checking that there is actual output +grep 'Net Events Processed' run-1/model-output-1.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +diff <(grep 'Net Events Processed' run-1/model-output-1.txt) \ + <(grep 'Net Events Processed' run-2/model-output-2.txt) +err=$? +if [[ $err -ne 0 ]]; then + >&2 echo "The number of net events processed does not coincide, ie," \ + "the simulation is not deterministic" + exit $err +fi + +exit 0 diff --git a/tests/union-workload-test-surrogate-parallel.sh b/tests/union-workload-test-surrogate-parallel.sh index c16deb8f..f2940d12 100644 --- a/tests/union-workload-test-surrogate-parallel.sh +++ b/tests/union-workload-test-surrogate-parallel.sh @@ -80,7 +80,7 @@ grep 'MILC: Iteration 119/120' model-output.txt err=$? [[ $err -ne 0 ]] && exit $err -grep 'Jacobi3D: Completed 40 iterations' model-output.txt +grep 'Jacobi3D: Completed 39 iterations' model-output.txt err=$? [[ $err -ne 0 ]] && exit $err diff --git a/tests/union-workload-test-surrogate.sh b/tests/union-workload-test-surrogate.sh index 59c0c067..032b649e 100644 --- a/tests/union-workload-test-surrogate.sh +++ b/tests/union-workload-test-surrogate.sh @@ -80,7 +80,7 @@ grep 'MILC: Iteration 119/120' model-output.txt err=$? [[ $err -ne 0 ]] && exit $err -grep 'Jacobi3D: Completed 40 iterations' model-output.txt +grep 'Jacobi3D: Completed 39 iterations' model-output.txt err=$? [[ $err -ne 0 ]] && exit $err From 9b9a1eda9614abc2033124efa6e9dd886939a2d5 Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 17 Jun 2025 00:12:48 -0400 Subject: [PATCH 084/110] Fixing a bug and adding a test to check for different sizes of chunks and packets --- src/networks/model-net/dragonfly-dally.C | 3 +- tests/CMakeLists.txt | 7 +- .../conceptual.json | 2 +- .../dfdally-72-par.conf.in | 2 +- .../jacobi_MILC.workload.conf | 1 - .../rand_node0-1d-72-jacobi_MILC.alloc.conf | 1 - tests/union-workload-test-surrogate-fails.sh | 102 ++++++++++++++++++ tests/union-workload-test-surrogate.sh | 23 ++-- 8 files changed, 122 insertions(+), 19 deletions(-) create mode 100644 tests/union-workload-test-surrogate-fails.sh diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 2175515d..0014fe88 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -6490,7 +6490,6 @@ static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_m if(cur_entry->msg.packet_size < s->params->chunk_size) msg_size = cur_entry->msg.packet_size; - s->qos_data[output_port][vcg] -= msg_size; s->next_output_available_time[output_port] = msg->saved_available_time; if(bf->c11) @@ -6499,6 +6498,7 @@ static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_m s->link_traffic_sample[output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; s->ross_rsample.link_traffic_sample[output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; s->link_traffic_ross_sample[output_port] -= cur_entry->msg.packet_size % s->params->chunk_size; + msg_size = cur_entry->msg.packet_size % s->params->chunk_size; //Xin: reverse link traffic if(rolback && current_window >= 0){ @@ -6518,6 +6518,7 @@ static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_m } } + s->qos_data[output_port][vcg] -= msg_size; s->total_chunks[output_port]--; prepend_to_terminal_dally_message_list(s->pending_msgs[output_port], diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e5e11309..e6d46ef8 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -100,9 +100,10 @@ configure_file(conf/union-milc-jacobi-workload/dfdally-72-par.conf.in conf/union if(USE_UNION) list(APPEND test-shell-files union-workload-test-surrogate.sh - union-workload-test-surrogate-parallel.sh - union-workload-test-surrogate-parallel-deterministic-1.sh - union-workload-test-surrogate-parallel-deterministic-2.sh + union-workload-test-surrogate-fails.sh + #union-workload-test-surrogate-parallel.sh + #union-workload-test-surrogate-parallel-deterministic-1.sh + #union-workload-test-surrogate-parallel-deterministic-2.sh ) endif() diff --git a/tests/conf/union-milc-jacobi-workload/conceptual.json b/tests/conf/union-milc-jacobi-workload/conceptual.json index 557c0bce..27a03f6c 100644 --- a/tests/conf/union-milc-jacobi-workload/conceptual.json +++ b/tests/conf/union-milc-jacobi-workload/conceptual.json @@ -41,7 +41,7 @@ "100", "100", "50000", - "39", + "1", "200000", "barrier" ] diff --git a/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in b/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in index 3b72d00a..11598088 100644 --- a/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in +++ b/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in @@ -19,7 +19,7 @@ PARAMS modelnet_scheduler="fcfs"; # chunk size in the network (when chunk size = packet size, packets will not be # divided into chunks) - chunk_size="4096"; + chunk_size="${CHUNK_SIZE}"; # modelnet_scheduler="round-robin"; # number of routers in group num_routers="4"; diff --git a/tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf b/tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf index 93c60688..cf0a4b93 100644 --- a/tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf +++ b/tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf @@ -1,2 +1 @@ 36 conceptual-jacobi3d 1 0 -36 milc 1 0 diff --git a/tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf b/tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf index 07e490d0..bae48ac5 100644 --- a/tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf +++ b/tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf @@ -1,2 +1 @@ 12 24 38 2 65 18 3 70 39 11 69 67 48 21 8 45 71 55 6 1 49 68 32 5 61 46 23 9 7 26 15 62 52 28 13 25 -0 42 31 10 57 20 41 40 66 64 4 22 33 58 37 59 47 43 54 50 14 35 44 16 63 56 36 30 19 51 27 34 17 29 53 60 diff --git a/tests/union-workload-test-surrogate-fails.sh b/tests/union-workload-test-surrogate-fails.sh new file mode 100644 index 00000000..1d81c4ee --- /dev/null +++ b/tests/union-workload-test-surrogate-fails.sh @@ -0,0 +1,102 @@ +#!/bin/bash + +if [[ -z $bindir ]] ; then + echo bindir variable not set + exit 1 +fi + +if [[ -z $UNION_DATAROOTDIR ]] ; then + echo UNION_DATAROOTDIR variable not set + exit 1 +fi + +if [[ -z $SWM_DATAROOTDIR ]] ; then + echo SWM_DATAROOTDIR variable not set + exit 1 +fi + +np=1 + +expfolder="$PWD" +export CONFIGS_PATH="$srcdir/tests/conf/union-milc-jacobi-workload" + +# Backing up and copying milc json! +tmpdir="$(TMPDIR="$PWD" mktemp -d)" +mv "$SWM_DATAROOTDIR/milc_skeleton.json" "$tmpdir/milc_skeleton.json" +cp "$CONFIGS_PATH/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$UNION_DATAROOTDIR/conceptual.json" "$tmpdir/conceptual.json" +cp "$CONFIGS_PATH/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" + +# Copying configuration files to keep as documentation +cp "$CONFIGS_PATH/milc_skeleton.json" "$expfolder" +cp "$CONFIGS_PATH/conceptual.json" "$expfolder" +cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder" +cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" + +# CODES config file +export CHUNK_SIZE=2048 +export PATH_TO_CONNECTIONS="$CONFIGS_PATH" +export NETWORK_SURR_ON=0 +export APP_SURR_ON=0 +export APP_DIRECTOR_MODE=every-n-nanoseconds +#export APP_DIRECTOR_MODE=every-n-gvt +export EVERY_N_GVT=500 +export EVERY_NSECS=1e6 +envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par.conf" + +# running simulation +cons_lookahead=200 +opt_lookahead=600 + +export PATH_TO_CODES_BUILD="$bindir" + +mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \ + --synch=1 \ + --batch=4 --gvt-interval=256 \ + --cons-lookahead=$cons_lookahead \ + --max-opt-lookahead=$opt_lookahead \ + --workload_type=conc-online \ + --lp-io-dir=lp-io-dir \ + --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \ + --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \ + -- "$expfolder/dfdally-72-par.conf" \ + > model-output.txt 2> model-output-error.txt + +err=$? + +# Setting milc json back +mv "$tmpdir/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$tmpdir/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" +rmdir "$tmpdir" + +[[ $err -ne 0 ]] && exit $err + +# Checking that there is actual output +grep 'Net Events Processed' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# Checking both milc and jacobi ran +#grep 'MILC: Iteration 119/120' model-output.txt +#err=$? +#[[ $err -ne 0 ]] && exit $err + +grep 'Jacobi3D: Completed 1 iterations' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +grep 'App 0: All non-synthetic workloads have completed' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +# it transitioned into surrogacy +#grep -e 'application iteration surrogate mode at GVT [0-9]* time' model-output.txt +#err=$? +#[[ $err -ne 0 ]] && exit $err + +# it transitioned back to high-fidelity +#grep -e 'application iteration mode at GVT [0-9]* time' model-output.txt +#err=$? +#[[ $err -ne 0 ]] && exit $err + +exit 0 diff --git a/tests/union-workload-test-surrogate.sh b/tests/union-workload-test-surrogate.sh index 032b649e..9cd2c6be 100644 --- a/tests/union-workload-test-surrogate.sh +++ b/tests/union-workload-test-surrogate.sh @@ -34,9 +34,10 @@ cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder" cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" # CODES config file +export CHUNK_SIZE=4096 export PATH_TO_CONNECTIONS="$CONFIGS_PATH" export NETWORK_SURR_ON=0 -export APP_SURR_ON=1 +export APP_SURR_ON=0 export APP_DIRECTOR_MODE=every-n-nanoseconds #export APP_DIRECTOR_MODE=every-n-gvt export EVERY_N_GVT=500 @@ -76,11 +77,11 @@ err=$? [[ $err -ne 0 ]] && exit $err # Checking both milc and jacobi ran -grep 'MILC: Iteration 119/120' model-output.txt -err=$? -[[ $err -ne 0 ]] && exit $err +#grep 'MILC: Iteration 119/120' model-output.txt +#err=$? +#[[ $err -ne 0 ]] && exit $err -grep 'Jacobi3D: Completed 39 iterations' model-output.txt +grep 'Jacobi3D: Completed 1 iterations' model-output.txt err=$? [[ $err -ne 0 ]] && exit $err @@ -89,13 +90,13 @@ err=$? [[ $err -ne 0 ]] && exit $err # it transitioned into surrogacy -grep -e 'application iteration surrogate mode at GVT [0-9]* time' model-output.txt -err=$? -[[ $err -ne 0 ]] && exit $err +#grep -e 'application iteration surrogate mode at GVT [0-9]* time' model-output.txt +#err=$? +#[[ $err -ne 0 ]] && exit $err # it transitioned back to high-fidelity -grep -e 'application iteration mode at GVT [0-9]* time' model-output.txt -err=$? -[[ $err -ne 0 ]] && exit $err +#grep -e 'application iteration mode at GVT [0-9]* time' model-output.txt +#err=$? +#[[ $err -ne 0 ]] && exit $err exit 0 From 66511d98e3777008901156441a08643cd986d501 Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 17 Jun 2025 00:50:56 -0400 Subject: [PATCH 085/110] potential/partial fix to unmatched receives bug --- src/networks/model-net/dragonfly-dally.C | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 0014fe88..ea442aef 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -5353,10 +5353,7 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message /* WE do not allow self messages through dragonfly */ assert(lp->gid != msg->src_terminal_id); - // TODO (elkin): this is wrong, this is _not_ finding the number of chunks, consider: chunk_size = 2 and packet_size = 5. There should be 3 chunks, but the code outputs 2! - uint64_t num_chunks = msg->packet_size / s->params->chunk_size; - if (msg->packet_size < s->params->chunk_size) - num_chunks++; + uint64_t num_chunks = (msg->packet_size + s->params->chunk_size - 1) / s->params->chunk_size; if(msg->path_type == MINIMAL) minimal_count++; @@ -5444,8 +5441,9 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message int const chunk_size = s->params->chunk_size; if (has_remaining_sz) { bf->c28 = 1; - assert(s->remaining_sz_packets[packet_key] >= chunk_size); - s->remaining_sz_packets[packet_key] -= chunk_size; + int const actual_chunk_size = std::min(chunk_size, (int)s->remaining_sz_packets[packet_key]); + assert(s->remaining_sz_packets[packet_key] >= actual_chunk_size); + s->remaining_sz_packets[packet_key] -= actual_chunk_size; // if `remaining == 0`, ie, if the packet has been completed if (s->remaining_sz_packets[packet_key] == 0) { From ecae4b87e4c5cf8b03b8924d3de64a09d6d03fdb Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 17 Jun 2025 10:51:31 -0400 Subject: [PATCH 086/110] Fixing bugs that show up with Jacobi and chunk size != packet size --- src/networks/model-net/dragonfly-dally.C | 35 +++++-------------- tests/CMakeLists.txt | 8 ++--- .../conceptual.json | 2 +- .../jacobi_MILC.workload.conf | 1 + .../rand_node0-1d-72-jacobi_MILC.alloc.conf | 1 + ...test-surrogate-parallel-deterministic-1.sh | 1 + ...test-surrogate-parallel-deterministic-2.sh | 1 + .../union-workload-test-surrogate-parallel.sh | 1 + ...load-test-surrogate-smaller-chunk-size.sh} | 22 ++++++------ tests/union-workload-test-surrogate.sh | 22 ++++++------ 10 files changed, 41 insertions(+), 53 deletions(-) rename tests/{union-workload-test-surrogate-fails.sh => union-workload-test-surrogate-smaller-chunk-size.sh} (86%) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index ea442aef..978ac455 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -79,6 +79,8 @@ // If we have configured the network surrogate, then we will collect packet delay data, which is done via the scheduling of an event. This additional event will shift the random generator and thus the same model will behave differently from the start when compared with the one where the surrogate is not setup. If one wants to test both scenarios (with and without the surrogate) and maintain determinism in high-fidelity, one has to enable this option #define ALWAYS_DETERMINISTIC_NETWORK 0 +#define num_chunks_for(message_size, chunk_size) ((message_size) ? ((message_size) + (chunk_size) - 1) / (chunk_size) : 1) + /* handles terminal and router events like packet generate/send/receive/buffer */ typedef struct terminal_state terminal_state; typedef struct router_state router_state; @@ -4213,9 +4215,7 @@ static void packet_generate_rc(terminal_state * s, tw_bf * bf, terminal_dally_me if(bf->c4) num_remote_packets--; - int num_chunks = msg->packet_size/s->params->chunk_size; - if(msg->packet_size < s->params->chunk_size) - num_chunks++; + int const num_chunks = num_chunks_for(msg->packet_size, s->params->chunk_size); int i; int vcg = 0; @@ -4277,15 +4277,7 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa const dragonfly_param *p = s->params; int total_event_size; - uint64_t num_chunks = msg->packet_size / p->chunk_size; - - double cn_delay = s->params->cn_delay; - - if (msg->packet_size < s->params->chunk_size) - num_chunks++; - - if(msg->packet_size < s->params->chunk_size) - cn_delay = bytes_to_ns(msg->packet_size % s->params->chunk_size, s->params->cn_bandwidth); + uint64_t const num_chunks = num_chunks_for(msg->packet_size, p->chunk_size); int dest_router_id; if (s->params->num_injection_queues > 1 || netMan.is_link_failures_enabled()) { @@ -4733,9 +4725,8 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message * msg->saved_vc = vcg; terminal_dally_message_list* cur_entry = s->terminal_msgs[msg->rail_id][vcg]; int data_size = s->params->chunk_size; - uint64_t num_chunks = cur_entry->msg.packet_size/s->params->chunk_size; - if(cur_entry->msg.packet_size < s->params->chunk_size) - num_chunks++; + uint64_t const num_chunks = num_chunks_for(cur_entry->msg.packet_size, s->params->chunk_size); + msg->saved_avg_time = cur_entry->msg.travel_start_time; // reusing field saved_avg_time. It is only used in another event handler path (arrive). So, no interruptions here cur_entry->msg.travel_start_time = tw_now(lp); @@ -5353,7 +5344,7 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message /* WE do not allow self messages through dragonfly */ assert(lp->gid != msg->src_terminal_id); - uint64_t num_chunks = (msg->packet_size + s->params->chunk_size - 1) / s->params->chunk_size; + uint64_t const num_chunks = num_chunks_for(msg->packet_size, s->params->chunk_size); if(msg->path_type == MINIMAL) minimal_count++; @@ -5484,13 +5475,7 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message if(hash_link) tmp = qhash_entry(hash_link, struct dfly_qhash_entry, hash_link); - uint64_t total_chunks = msg->total_size / s->params->chunk_size; - - if(msg->total_size % s->params->chunk_size) - total_chunks++; - - if(!total_chunks) - total_chunks = 1; + uint64_t const total_chunks = num_chunks_for(msg->total_size, s->params->chunk_size); /*if(tmp) { @@ -6622,9 +6607,7 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes bandwidth = s->params->global_bandwidth; } - uint64_t num_chunks = cur_entry->msg.packet_size / s->params->chunk_size; - if(cur_entry->msg.packet_size < s->params->chunk_size) - num_chunks++; + uint64_t const num_chunks = num_chunks_for(cur_entry->msg.packet_size, s->params->chunk_size); /* Injection delay: Time taken for the data to be placed on the link/channel * - Based on bandwidth diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e6d46ef8..3efbaeaa 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -100,10 +100,10 @@ configure_file(conf/union-milc-jacobi-workload/dfdally-72-par.conf.in conf/union if(USE_UNION) list(APPEND test-shell-files union-workload-test-surrogate.sh - union-workload-test-surrogate-fails.sh - #union-workload-test-surrogate-parallel.sh - #union-workload-test-surrogate-parallel-deterministic-1.sh - #union-workload-test-surrogate-parallel-deterministic-2.sh + union-workload-test-surrogate-smaller-chunk-size.sh + union-workload-test-surrogate-parallel.sh + union-workload-test-surrogate-parallel-deterministic-1.sh + union-workload-test-surrogate-parallel-deterministic-2.sh ) endif() diff --git a/tests/conf/union-milc-jacobi-workload/conceptual.json b/tests/conf/union-milc-jacobi-workload/conceptual.json index 27a03f6c..557c0bce 100644 --- a/tests/conf/union-milc-jacobi-workload/conceptual.json +++ b/tests/conf/union-milc-jacobi-workload/conceptual.json @@ -41,7 +41,7 @@ "100", "100", "50000", - "1", + "39", "200000", "barrier" ] diff --git a/tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf b/tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf index cf0a4b93..93c60688 100644 --- a/tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf +++ b/tests/conf/union-milc-jacobi-workload/jacobi_MILC.workload.conf @@ -1 +1,2 @@ 36 conceptual-jacobi3d 1 0 +36 milc 1 0 diff --git a/tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf b/tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf index bae48ac5..07e490d0 100644 --- a/tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf +++ b/tests/conf/union-milc-jacobi-workload/rand_node0-1d-72-jacobi_MILC.alloc.conf @@ -1 +1,2 @@ 12 24 38 2 65 18 3 70 39 11 69 67 48 21 8 45 71 55 6 1 49 68 32 5 61 46 23 9 7 26 15 62 52 28 13 25 +0 42 31 10 57 20 41 40 66 64 4 22 33 58 37 59 47 43 54 50 14 35 44 16 63 56 36 30 19 51 27 34 17 29 53 60 diff --git a/tests/union-workload-test-surrogate-parallel-deterministic-1.sh b/tests/union-workload-test-surrogate-parallel-deterministic-1.sh index c5d145e3..7afae27c 100644 --- a/tests/union-workload-test-surrogate-parallel-deterministic-1.sh +++ b/tests/union-workload-test-surrogate-parallel-deterministic-1.sh @@ -34,6 +34,7 @@ cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder" cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" # CODES config file +export CHUNK_SIZE=4096 export PATH_TO_CONNECTIONS="$CONFIGS_PATH" export NETWORK_SURR_ON=0 export APP_SURR_ON=1 diff --git a/tests/union-workload-test-surrogate-parallel-deterministic-2.sh b/tests/union-workload-test-surrogate-parallel-deterministic-2.sh index ab596dd5..ca2cb776 100644 --- a/tests/union-workload-test-surrogate-parallel-deterministic-2.sh +++ b/tests/union-workload-test-surrogate-parallel-deterministic-2.sh @@ -34,6 +34,7 @@ cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder" cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" # CODES config file +export CHUNK_SIZE=4096 export PATH_TO_CONNECTIONS="$CONFIGS_PATH" export NETWORK_SURR_ON=0 export APP_SURR_ON=1 diff --git a/tests/union-workload-test-surrogate-parallel.sh b/tests/union-workload-test-surrogate-parallel.sh index f2940d12..fae9abd4 100644 --- a/tests/union-workload-test-surrogate-parallel.sh +++ b/tests/union-workload-test-surrogate-parallel.sh @@ -34,6 +34,7 @@ cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder" cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" # CODES config file +export CHUNK_SIZE=4096 export PATH_TO_CONNECTIONS="$CONFIGS_PATH" export NETWORK_SURR_ON=0 export APP_SURR_ON=1 diff --git a/tests/union-workload-test-surrogate-fails.sh b/tests/union-workload-test-surrogate-smaller-chunk-size.sh similarity index 86% rename from tests/union-workload-test-surrogate-fails.sh rename to tests/union-workload-test-surrogate-smaller-chunk-size.sh index 1d81c4ee..e31a0d4d 100644 --- a/tests/union-workload-test-surrogate-fails.sh +++ b/tests/union-workload-test-surrogate-smaller-chunk-size.sh @@ -37,7 +37,7 @@ cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" export CHUNK_SIZE=2048 export PATH_TO_CONNECTIONS="$CONFIGS_PATH" export NETWORK_SURR_ON=0 -export APP_SURR_ON=0 +export APP_SURR_ON=1 export APP_DIRECTOR_MODE=every-n-nanoseconds #export APP_DIRECTOR_MODE=every-n-gvt export EVERY_N_GVT=500 @@ -77,11 +77,11 @@ err=$? [[ $err -ne 0 ]] && exit $err # Checking both milc and jacobi ran -#grep 'MILC: Iteration 119/120' model-output.txt -#err=$? -#[[ $err -ne 0 ]] && exit $err +grep 'MILC: Iteration 119/120' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err -grep 'Jacobi3D: Completed 1 iterations' model-output.txt +grep 'Jacobi3D: Completed 39 iterations' model-output.txt err=$? [[ $err -ne 0 ]] && exit $err @@ -90,13 +90,13 @@ err=$? [[ $err -ne 0 ]] && exit $err # it transitioned into surrogacy -#grep -e 'application iteration surrogate mode at GVT [0-9]* time' model-output.txt -#err=$? -#[[ $err -ne 0 ]] && exit $err +grep -e 'application iteration surrogate mode at GVT [0-9]* time' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err # it transitioned back to high-fidelity -#grep -e 'application iteration mode at GVT [0-9]* time' model-output.txt -#err=$? -#[[ $err -ne 0 ]] && exit $err +grep -e 'application iteration mode at GVT [0-9]* time' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err exit 0 diff --git a/tests/union-workload-test-surrogate.sh b/tests/union-workload-test-surrogate.sh index 9cd2c6be..64a19ee8 100644 --- a/tests/union-workload-test-surrogate.sh +++ b/tests/union-workload-test-surrogate.sh @@ -37,7 +37,7 @@ cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" export CHUNK_SIZE=4096 export PATH_TO_CONNECTIONS="$CONFIGS_PATH" export NETWORK_SURR_ON=0 -export APP_SURR_ON=0 +export APP_SURR_ON=1 export APP_DIRECTOR_MODE=every-n-nanoseconds #export APP_DIRECTOR_MODE=every-n-gvt export EVERY_N_GVT=500 @@ -77,11 +77,11 @@ err=$? [[ $err -ne 0 ]] && exit $err # Checking both milc and jacobi ran -#grep 'MILC: Iteration 119/120' model-output.txt -#err=$? -#[[ $err -ne 0 ]] && exit $err +grep 'MILC: Iteration 119/120' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err -grep 'Jacobi3D: Completed 1 iterations' model-output.txt +grep 'Jacobi3D: Completed 39 iterations' model-output.txt err=$? [[ $err -ne 0 ]] && exit $err @@ -90,13 +90,13 @@ err=$? [[ $err -ne 0 ]] && exit $err # it transitioned into surrogacy -#grep -e 'application iteration surrogate mode at GVT [0-9]* time' model-output.txt -#err=$? -#[[ $err -ne 0 ]] && exit $err +grep -e 'application iteration surrogate mode at GVT [0-9]* time' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err # it transitioned back to high-fidelity -#grep -e 'application iteration mode at GVT [0-9]* time' model-output.txt -#err=$? -#[[ $err -ne 0 ]] && exit $err +grep -e 'application iteration mode at GVT [0-9]* time' model-output.txt +err=$? +[[ $err -ne 0 ]] && exit $err exit 0 From 737c702e7b92a395ea4b2f877a170e09d21e7bf2 Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 17 Jun 2025 11:29:51 -0400 Subject: [PATCH 087/110] Adding more tests :) --- tests/CMakeLists.txt | 2 + .../dfdally-72-par.conf.in | 4 +- ...test-surrogate-parallel-deterministic-1.sh | 1 + ...test-surrogate-parallel-deterministic-2.sh | 1 + ...test-surrogate-parallel-deterministic-3.sh | 114 ++++++++++++++++++ ...test-surrogate-parallel-deterministic-4.sh | 114 ++++++++++++++++++ .../union-workload-test-surrogate-parallel.sh | 1 + ...kload-test-surrogate-smaller-chunk-size.sh | 1 + tests/union-workload-test-surrogate.sh | 1 + 9 files changed, 237 insertions(+), 2 deletions(-) create mode 100644 tests/union-workload-test-surrogate-parallel-deterministic-3.sh create mode 100644 tests/union-workload-test-surrogate-parallel-deterministic-4.sh diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 3efbaeaa..6390965c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -104,6 +104,8 @@ if(USE_UNION) union-workload-test-surrogate-parallel.sh union-workload-test-surrogate-parallel-deterministic-1.sh union-workload-test-surrogate-parallel-deterministic-2.sh + union-workload-test-surrogate-parallel-deterministic-3.sh + union-workload-test-surrogate-parallel-deterministic-4.sh ) endif() diff --git a/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in b/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in index 11598088..ade97ef2 100644 --- a/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in +++ b/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in @@ -75,14 +75,14 @@ NETWORK_SURROGATE { # latency predictor to use. Options: average, torch-jit packet_latency_predictor="average"; # some workload models need some time to stabilize, a point where the network behaviour stabilizes. The predictor will ignore all packet latencies that arrive during this period - ignore_until="10.0e6"; + ignore_until="2.0e6"; # parameters for torch-jit latency predictor torch_jit_mode="single-static-model-for-all-terminals"; torch_jit_model_path=""; # selecting network treatment on switching to surrogate. Options: freeze, nothing - network_treatment_on_switch="nothing"; # freeze is buggy sadly. It freezes more events than it should + network_treatment_on_switch="${NETWORK_MODE}"; } APPLICATION_SURROGATE { enable="${APP_SURR_ON}"; # either 0 or 1 diff --git a/tests/union-workload-test-surrogate-parallel-deterministic-1.sh b/tests/union-workload-test-surrogate-parallel-deterministic-1.sh index 7afae27c..22294863 100644 --- a/tests/union-workload-test-surrogate-parallel-deterministic-1.sh +++ b/tests/union-workload-test-surrogate-parallel-deterministic-1.sh @@ -37,6 +37,7 @@ cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" export CHUNK_SIZE=4096 export PATH_TO_CONNECTIONS="$CONFIGS_PATH" export NETWORK_SURR_ON=0 +export NETWORK_MODE=nothing export APP_SURR_ON=1 export APP_DIRECTOR_MODE=every-n-nanoseconds #export APP_DIRECTOR_MODE=every-n-gvt diff --git a/tests/union-workload-test-surrogate-parallel-deterministic-2.sh b/tests/union-workload-test-surrogate-parallel-deterministic-2.sh index ca2cb776..2a0384fd 100644 --- a/tests/union-workload-test-surrogate-parallel-deterministic-2.sh +++ b/tests/union-workload-test-surrogate-parallel-deterministic-2.sh @@ -37,6 +37,7 @@ cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" export CHUNK_SIZE=4096 export PATH_TO_CONNECTIONS="$CONFIGS_PATH" export NETWORK_SURR_ON=0 +export NETWORK_MODE=nothing export APP_SURR_ON=1 export APP_DIRECTOR_MODE=every-n-nanoseconds export EVERY_N_GVT=500 diff --git a/tests/union-workload-test-surrogate-parallel-deterministic-3.sh b/tests/union-workload-test-surrogate-parallel-deterministic-3.sh new file mode 100644 index 00000000..93b74afc --- /dev/null +++ b/tests/union-workload-test-surrogate-parallel-deterministic-3.sh @@ -0,0 +1,114 @@ +#!/bin/bash + +if [[ -z $bindir ]] ; then + echo bindir variable not set + exit 1 +fi + +if [[ -z $UNION_DATAROOTDIR ]] ; then + echo UNION_DATAROOTDIR variable not set + exit 1 +fi + +if [[ -z $SWM_DATAROOTDIR ]] ; then + echo SWM_DATAROOTDIR variable not set + exit 1 +fi + +np=3 + +expfolder="$PWD" +export CONFIGS_PATH="$srcdir/tests/conf/union-milc-jacobi-workload" + +# Backing up and copying milc json! +tmpdir="$(TMPDIR="$PWD" mktemp -d)" +mv "$SWM_DATAROOTDIR/milc_skeleton.json" "$tmpdir/milc_skeleton.json" +cp "$CONFIGS_PATH/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$UNION_DATAROOTDIR/conceptual.json" "$tmpdir/conceptual.json" +cp "$CONFIGS_PATH/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" + +# Copying configuration files to keep as documentation +cp "$CONFIGS_PATH/milc_skeleton.json" "$expfolder" +cp "$CONFIGS_PATH/conceptual.json" "$expfolder" +cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder" +cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" + +# CODES config file +export CHUNK_SIZE=4096 +export PATH_TO_CONNECTIONS="$CONFIGS_PATH" +export NETWORK_SURR_ON=1 +export NETWORK_MODE=nothing +export APP_SURR_ON=1 +export APP_DIRECTOR_MODE=every-n-nanoseconds +#export APP_DIRECTOR_MODE=every-n-gvt +export EVERY_N_GVT=500 +export EVERY_NSECS=1e6 +envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par.conf" + +# running simulation +cons_lookahead=200 +opt_lookahead=600 + +export PATH_TO_CODES_BUILD="$bindir" + +mkdir run-1 +pushd run-1 + +mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \ + --synch=3 \ + --batch=4 --gvt-interval=256 \ + --cons-lookahead=$cons_lookahead \ + --max-opt-lookahead=$opt_lookahead \ + --workload_type=conc-online \ + --lp-io-dir=lp-io-dir \ + --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \ + --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \ + -- "$expfolder/dfdally-72-par.conf" \ + > model-output-1.txt 2> model-output-1-error.txt + +err=$? +[[ $err -ne 0 ]] && exit $err + +popd + +mkdir run-2 +pushd run-2 + +mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \ + --synch=3 \ + --batch=4 --gvt-interval=256 \ + --cons-lookahead=$cons_lookahead \ + --max-opt-lookahead=$opt_lookahead \ + --workload_type=conc-online \ + --lp-io-dir=lp-io-dir \ + --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \ + --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \ + -- "$expfolder/dfdally-72-par.conf" \ + > model-output-2.txt 2> model-output-2-error.txt + +err=$? + +popd + +# Setting milc json back +mv "$tmpdir/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$tmpdir/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" +rmdir "$tmpdir" + +[[ $err -ne 0 ]] && exit $err + +# Checking that there is actual output +grep 'Net Events Processed' run-1/model-output-1.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +diff <(grep 'Net Events Processed' run-1/model-output-1.txt) \ + <(grep 'Net Events Processed' run-2/model-output-2.txt) +err=$? +if [[ $err -ne 0 ]]; then + >&2 echo "The number of net events processed does not coincide, ie," \ + "the simulation is not deterministic" + exit $err +fi + +exit 0 diff --git a/tests/union-workload-test-surrogate-parallel-deterministic-4.sh b/tests/union-workload-test-surrogate-parallel-deterministic-4.sh new file mode 100644 index 00000000..639eed1e --- /dev/null +++ b/tests/union-workload-test-surrogate-parallel-deterministic-4.sh @@ -0,0 +1,114 @@ +#!/bin/bash + +if [[ -z $bindir ]] ; then + echo bindir variable not set + exit 1 +fi + +if [[ -z $UNION_DATAROOTDIR ]] ; then + echo UNION_DATAROOTDIR variable not set + exit 1 +fi + +if [[ -z $SWM_DATAROOTDIR ]] ; then + echo SWM_DATAROOTDIR variable not set + exit 1 +fi + +np=3 + +expfolder="$PWD" +export CONFIGS_PATH="$srcdir/tests/conf/union-milc-jacobi-workload" + +# Backing up and copying milc json! +tmpdir="$(TMPDIR="$PWD" mktemp -d)" +mv "$SWM_DATAROOTDIR/milc_skeleton.json" "$tmpdir/milc_skeleton.json" +cp "$CONFIGS_PATH/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$UNION_DATAROOTDIR/conceptual.json" "$tmpdir/conceptual.json" +cp "$CONFIGS_PATH/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" + +# Copying configuration files to keep as documentation +cp "$CONFIGS_PATH/milc_skeleton.json" "$expfolder" +cp "$CONFIGS_PATH/conceptual.json" "$expfolder" +cp "$CONFIGS_PATH/jacobi_MILC.workload.conf" "$expfolder" +cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" + +# CODES config file +export CHUNK_SIZE=4096 +export PATH_TO_CONNECTIONS="$CONFIGS_PATH" +export NETWORK_SURR_ON=1 +export NETWORK_MODE=freeze +export APP_SURR_ON=1 +export APP_DIRECTOR_MODE=every-n-nanoseconds +#export APP_DIRECTOR_MODE=every-n-gvt +export EVERY_N_GVT=500 +export EVERY_NSECS=1e6 +envsubst < "$bindir/tests/conf/union-milc-jacobi-workload/dfdally-72-par.conf.in" > "$expfolder/dfdally-72-par.conf" + +# running simulation +cons_lookahead=200 +opt_lookahead=600 + +export PATH_TO_CODES_BUILD="$bindir" + +mkdir run-1 +pushd run-1 + +mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \ + --synch=3 \ + --batch=4 --gvt-interval=256 \ + --cons-lookahead=$cons_lookahead \ + --max-opt-lookahead=$opt_lookahead \ + --workload_type=conc-online \ + --lp-io-dir=lp-io-dir \ + --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \ + --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \ + -- "$expfolder/dfdally-72-par.conf" \ + > model-output-1.txt 2> model-output-1-error.txt + +err=$? +[[ $err -ne 0 ]] && exit $err + +popd + +mkdir run-2 +pushd run-2 + +mpirun -np $np "$PATH_TO_CODES_BUILD"/src/model-net-mpi-replay \ + --synch=3 \ + --batch=4 --gvt-interval=256 \ + --cons-lookahead=$cons_lookahead \ + --max-opt-lookahead=$opt_lookahead \ + --workload_type=conc-online \ + --lp-io-dir=lp-io-dir \ + --workload_conf_file="$expfolder"/jacobi_MILC.workload.conf \ + --alloc_file="$expfolder"/rand_node0-1d-72-jacobi_MILC.alloc.conf \ + -- "$expfolder/dfdally-72-par.conf" \ + > model-output-2.txt 2> model-output-2-error.txt + +err=$? + +popd + +# Setting milc json back +mv "$tmpdir/milc_skeleton.json" "$SWM_DATAROOTDIR/milc_skeleton.json" +mv "$tmpdir/conceptual.json" "$UNION_DATAROOTDIR/conceptual.json" +rmdir "$tmpdir" + +[[ $err -ne 0 ]] && exit $err + +# Checking that there is actual output +grep 'Net Events Processed' run-1/model-output-1.txt +err=$? +[[ $err -ne 0 ]] && exit $err + +diff <(grep 'Net Events Processed' run-1/model-output-1.txt) \ + <(grep 'Net Events Processed' run-2/model-output-2.txt) +err=$? +if [[ $err -ne 0 ]]; then + >&2 echo "The number of net events processed does not coincide, ie," \ + "the simulation is not deterministic" + exit $err +fi + +exit 0 diff --git a/tests/union-workload-test-surrogate-parallel.sh b/tests/union-workload-test-surrogate-parallel.sh index fae9abd4..f84bccad 100644 --- a/tests/union-workload-test-surrogate-parallel.sh +++ b/tests/union-workload-test-surrogate-parallel.sh @@ -37,6 +37,7 @@ cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" export CHUNK_SIZE=4096 export PATH_TO_CONNECTIONS="$CONFIGS_PATH" export NETWORK_SURR_ON=0 +export NETWORK_MODE=nothing export APP_SURR_ON=1 export APP_DIRECTOR_MODE=every-n-nanoseconds #export APP_DIRECTOR_MODE=every-n-gvt diff --git a/tests/union-workload-test-surrogate-smaller-chunk-size.sh b/tests/union-workload-test-surrogate-smaller-chunk-size.sh index e31a0d4d..d7266ccc 100644 --- a/tests/union-workload-test-surrogate-smaller-chunk-size.sh +++ b/tests/union-workload-test-surrogate-smaller-chunk-size.sh @@ -37,6 +37,7 @@ cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" export CHUNK_SIZE=2048 export PATH_TO_CONNECTIONS="$CONFIGS_PATH" export NETWORK_SURR_ON=0 +export NETWORK_MODE=nothing export APP_SURR_ON=1 export APP_DIRECTOR_MODE=every-n-nanoseconds #export APP_DIRECTOR_MODE=every-n-gvt diff --git a/tests/union-workload-test-surrogate.sh b/tests/union-workload-test-surrogate.sh index 64a19ee8..0a34b2c8 100644 --- a/tests/union-workload-test-surrogate.sh +++ b/tests/union-workload-test-surrogate.sh @@ -37,6 +37,7 @@ cp "$CONFIGS_PATH/rand_node0-1d-72-jacobi_MILC.alloc.conf" "$expfolder" export CHUNK_SIZE=4096 export PATH_TO_CONNECTIONS="$CONFIGS_PATH" export NETWORK_SURR_ON=0 +export NETWORK_MODE=nothing export APP_SURR_ON=1 export APP_DIRECTOR_MODE=every-n-nanoseconds #export APP_DIRECTOR_MODE=every-n-gvt From 5755e06bc75a891068c869dd30c1c2c6079aab90 Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 17 Jun 2025 14:35:30 -0400 Subject: [PATCH 088/110] Changed terminal_dally_message_list to work with terminal_dally_message_list with terminals --- src/networks/model-net/dragonfly-dally.C | 230 +++++++++++++++++------ 1 file changed, 174 insertions(+), 56 deletions(-) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 978ac455..7562e844 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -213,14 +213,16 @@ typedef struct terminal_dally_message_list terminal_dally_message_list; struct terminal_dally_message_list { terminal_dally_message msg; char* event_data; - terminal_dally_message_list *next; - terminal_dally_message_list *prev; + struct qlist_head list; + terminal_dally_message_list *next; // Keep for router compatibility + terminal_dally_message_list *prev; // Keep for router compatibility }; static void init_terminal_dally_message_list(terminal_dally_message_list *thisO, terminal_dally_message *inmsg) { thisO->msg = *inmsg; thisO->event_data = NULL; + INIT_QLIST_HEAD(&thisO->list); thisO->next = NULL; thisO->prev = NULL; } @@ -536,8 +538,7 @@ struct terminal_state int** vc_occupancy; // vc_occupancies [rail_id][qos_level] tw_stime* terminal_available_time; // [rail_id] - terminal_dally_message_list ***terminal_msgs; //[rail_id][qos_level] - terminal_dally_message_list ***terminal_msgs_tail; //[rail_id][qos_level] + struct qlist_head **terminal_msgs; //[rail_id][qos_level] - quicklist heads int* in_send_loop; // [rail_id] struct mn_stats dragonfly_stats_array[CATEGORY_MAX]; @@ -1561,13 +1562,23 @@ static Connection dfdally_get_best_from_k_connections(router_state *s, tw_bf *bf return get_absolute_best_connection_from_conns(s, bf, msg, lp, k_conns); } +static inline void append_to_qlist(struct qlist_head *head, terminal_dally_message_list *msg) +{ + qlist_add_tail(&msg->list, head); +} + +static inline void prepend_to_qlist(struct qlist_head *head, terminal_dally_message_list *msg) +{ + qlist_add(&msg->list, head); +} + +// Restore old functions for router compatibility static void append_to_terminal_dally_message_list( terminal_dally_message_list ** thisq, terminal_dally_message_list ** thistail, int index, terminal_dally_message_list *msg) { -// printf("\n msg id %d ", msg->msg.packet_ID); if (thisq[index] == NULL) { thisq[index] = msg; } @@ -1577,7 +1588,105 @@ static void append_to_terminal_dally_message_list( msg->prev = thistail[index]; } thistail[index] = msg; -// printf("\n done adding %d ", msg->msg.packet_ID); +} + +static terminal_dally_message_list* return_head( + terminal_dally_message_list ** thisq, + terminal_dally_message_list ** thistail, + int index) +{ + terminal_dally_message_list *head = thisq[index]; + if (head != NULL) { + thisq[index] = head->next; + if(head->next != NULL) { + head->next->prev = NULL; + head->next = NULL; + } + else { + thistail[index] = NULL; + } + } + return head; +} + +static void copy_terminal_dally_message_qlist(struct qlist_head *into_head, struct qlist_head *from_head) +{ + if (qlist_empty(from_head)) { + return; + } + + terminal_dally_message_list *from_entry; + qlist_for_each_entry(from_entry, from_head, list) { + terminal_dally_message_list *copy_entry = (terminal_dally_message_list *)malloc(sizeof(terminal_dally_message_list)); + + // Deep copy the entry + memcpy(copy_entry, from_entry, sizeof(terminal_dally_message_list)); + INIT_QLIST_HEAD(©_entry->list); + + if (from_entry->event_data != NULL) { + int event_data_sz = from_entry->msg.remote_event_size_bytes + from_entry->msg.local_event_size_bytes; + copy_entry->event_data = (char *)malloc(event_data_sz); + memcpy(copy_entry->event_data, from_entry->event_data, event_data_sz); + } + + append_to_qlist(into_head, copy_entry); + } +} + +static void clean_terminal_dally_message_qlist(struct qlist_head *head) +{ + terminal_dally_message_list *entry, *tmp; + qlist_for_each_entry_safe(entry, tmp, head, list) { + qlist_del(&entry->list); + if (entry->event_data != NULL) { + free(entry->event_data); + } + free(entry); + } +} + +static bool check_terminal_dally_message_qlist(struct qlist_head *before, struct qlist_head *after) +{ + bool is_same = true; + + // Check if both are empty + if (qlist_empty(before) && qlist_empty(after)) { + return true; + } + + // If only one is empty, they're different + if (qlist_empty(before) || qlist_empty(after)) { + return false; + } + + terminal_dally_message_list *entry_before, *entry_after; + struct qlist_head *pos_before = before->next; + struct qlist_head *pos_after = after->next; + + while (pos_before != before && pos_after != after) { + entry_before = qlist_entry(pos_before, terminal_dally_message_list, list); + entry_after = qlist_entry(pos_after, terminal_dally_message_list, list); + + is_same &= check_terminal_dally_message(&entry_before->msg, &entry_after->msg); + is_same &= (entry_before->event_data == NULL) == (entry_after->event_data == NULL); + + if (entry_before->event_data != NULL && entry_after->event_data != NULL) { + int const message_size = entry_before->msg.remote_event_size_bytes + entry_before->msg.local_event_size_bytes; + int const message_size_after = entry_after->msg.remote_event_size_bytes + entry_after->msg.local_event_size_bytes; + is_same &= (message_size == message_size_after); + if (message_size > 0) { + is_same &= (memcmp(entry_before->event_data, entry_after->event_data, message_size) == 0); + } + } + + pos_before = pos_before->next; + pos_after = pos_after->next; + } + + // Check if both reached the end at the same time + is_same &= (pos_before == before) && (pos_after == after); + + return is_same; } static void prepend_to_terminal_dally_message_list( @@ -1596,23 +1705,24 @@ static void prepend_to_terminal_dally_message_list( thisq[index] = msg; } -static terminal_dally_message_list* return_head( - terminal_dally_message_list ** thisq, - terminal_dally_message_list ** thistail, - int index) +static terminal_dally_message_list* return_head_from_qlist(struct qlist_head *head) { - terminal_dally_message_list *head = thisq[index]; - if (head != NULL) { - thisq[index] = head->next; - if(head->next != NULL) { - head->next->prev = NULL; - head->next = NULL; - } - else { - thistail[index] = NULL; - } + if (qlist_empty(head)) { + return NULL; } - return head; + + struct qlist_head *item = qlist_pop(head); + return qlist_entry(item, terminal_dally_message_list, list); +} + +static terminal_dally_message_list* return_tail_from_qlist(struct qlist_head *head) +{ + if (qlist_empty(head)) { + return NULL; + } + + struct qlist_head *item = qlist_pop_back(head); + return qlist_entry(item, terminal_dally_message_list, list); } static terminal_dally_message_list* return_tail( @@ -1718,6 +1828,26 @@ static bool check_terminal_dally_message_list(terminal_dally_message_list * befo return is_same; } +static void print_terminal_dally_message_qlist(FILE * out, char const * prefix, terminal_state * ns, struct qlist_head * head) { + if (qlist_empty(head)) { + return; + } + + char addprefix_2[] = " | | "; + int len_subprefix = snprintf(NULL, 0, "%s%s", prefix, addprefix_2) + 1; + char * subprefix = (char *) malloc(len_subprefix * sizeof(char)); + snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_2); + + terminal_dally_message_list * entry; + qlist_for_each_entry(entry, head, list) { + fprintf(out, "%s terminal_dally_message_list (%p) {\n", prefix, entry); + fprintf(out, "%s packet_ID = %llu\n", subprefix, LLU(entry->msg.packet_ID)); + fprintf(out, "%s }\n", prefix); + } + + free(subprefix); +} + static void print_terminal_dally_message_list(FILE * out, char const * prefix, terminal_state * ns, terminal_dally_message_list * thisq) { if (thisq == NULL) { return; @@ -2859,7 +2989,7 @@ static int get_next_vcg(terminal_state * s, tw_bf * bf, terminal_dally_message * if(num_qos_levels == 1) { - if(s->terminal_msgs[msg->rail_id][0] == NULL || s->vc_occupancy[msg->rail_id][0] + s->params->chunk_size > s->params->cn_vc_size) + if(qlist_empty(&s->terminal_msgs[msg->rail_id][0]) || s->vc_occupancy[msg->rail_id][0] + s->params->chunk_size > s->params->cn_vc_size) return -1; else return 0; @@ -2892,7 +3022,7 @@ static int get_next_vcg(terminal_state * s, tw_bf * bf, terminal_dally_message * { if(s->qos_status[msg->rail_id][i] == Q_ACTIVE) { - if(s->terminal_msgs[msg->rail_id][i] != NULL && s->vc_occupancy[msg->rail_id][i] + s->params->chunk_size <= s->params->cn_vc_size) + if(!qlist_empty(&s->terminal_msgs[msg->rail_id][i]) && s->vc_occupancy[msg->rail_id][i] + s->params->chunk_size <= s->params->cn_vc_size) return i; } } @@ -2903,7 +3033,7 @@ static int get_next_vcg(terminal_state * s, tw_bf * bf, terminal_dally_message * /* All vcgs are exceeding their bandwidth limits*/ for(int i = 0; i < num_qos_levels; i++) { - if(s->terminal_msgs[msg->rail_id][i] != NULL && s->vc_occupancy[msg->rail_id][i] + s->params->chunk_size <= s->params->cn_vc_size) + if(!qlist_empty(&s->terminal_msgs[msg->rail_id][i]) && s->vc_occupancy[msg->rail_id][i] + s->params->chunk_size <= s->params->cn_vc_size) { bf->c2 = 1; @@ -3687,10 +3817,7 @@ static void terminal_dally_init( terminal_state * s, tw_lp * lp ) s->issueIdle = (int*)calloc(p->num_rails, sizeof(int)); s->rank_tbl = qhash_init(dragonfly_rank_hash_compare, dragonfly_hash_func, DFLY_HASH_TABLE_SIZE); - s->terminal_msgs = - (terminal_dally_message_list***)calloc(p->num_rails, sizeof(terminal_dally_message_list**)); - s->terminal_msgs_tail = - (terminal_dally_message_list***)calloc(p->num_rails, sizeof(terminal_dally_message_list**)); + s->terminal_msgs = (struct qlist_head**)calloc(p->num_rails, sizeof(struct qlist_head*)); s->qos_status = (int**)calloc(p->num_rails, sizeof(int*)); s->qos_data = (int**)calloc(p->num_rails, sizeof(int*)); @@ -3698,13 +3825,11 @@ static void terminal_dally_init( terminal_state * s, tw_lp * lp ) for(i = 0; i < p->num_rails; i++) { s->in_send_loop[i] = 0; - s->terminal_msgs[i] = (terminal_dally_message_list**)calloc(num_qos_levels, sizeof(terminal_dally_message_list*)); - s->terminal_msgs_tail[i] = (terminal_dally_message_list**)calloc(num_qos_levels, sizeof(terminal_dally_message_list*)); + s->terminal_msgs[i] = (struct qlist_head*)calloc(num_qos_levels, sizeof(struct qlist_head)); for(int j = 0; j < num_qos_levels; j++) { - s->terminal_msgs[i][j] = NULL; - s->terminal_msgs_tail[i][j] = NULL; + INIT_QLIST_HEAD(&s->terminal_msgs[i][j]); } /* Whether the virtual channel group is active or over-bw*/ @@ -4227,7 +4352,7 @@ static void packet_generate_rc(terminal_state * s, tw_bf * bf, terminal_dally_me assert(vcg < num_qos_levels); for(i = 0; i < num_chunks; i++) { - delete_terminal_dally_message_list(return_tail(s->terminal_msgs[msg->rail_id], s->terminal_msgs_tail[msg->rail_id], vcg)); + delete_terminal_dally_message_list(return_tail_from_qlist(&s->terminal_msgs[msg->rail_id][vcg])); s->terminal_length[msg->rail_id][vcg] -= s->params->chunk_size; } if(bf->c5) { @@ -4525,8 +4650,7 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_messa cur_chunk->msg.output_chan = vcg; cur_chunk->msg.chunk_id = i; cur_chunk->msg.origin_router_id = s->router_id[msg->rail_id]; - append_to_terminal_dally_message_list(s->terminal_msgs[msg->rail_id], s->terminal_msgs_tail[msg->rail_id], - vcg, cur_chunk); + append_to_qlist(&s->terminal_msgs[msg->rail_id][vcg], cur_chunk); s->terminal_length[msg->rail_id][vcg] += s->params->chunk_size; } @@ -4668,8 +4792,7 @@ static void packet_send_rc(terminal_state * s, tw_bf * bf, terminal_dally_messag s->qos_data[msg->rail_id][vcg] -= data_size; - prepend_to_terminal_dally_message_list(s->terminal_msgs[msg->rail_id], - s->terminal_msgs_tail[msg->rail_id], vcg, cur_entry); + prepend_to_qlist(&s->terminal_msgs[msg->rail_id][vcg], cur_entry); if(bf->c4) { s->in_send_loop[msg->rail_id] = msg->saved_send_loop; @@ -4723,7 +4846,7 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message * } msg->saved_vc = vcg; - terminal_dally_message_list* cur_entry = s->terminal_msgs[msg->rail_id][vcg]; + terminal_dally_message_list* cur_entry = return_head_from_qlist(&s->terminal_msgs[msg->rail_id][vcg]); int data_size = s->params->chunk_size; uint64_t const num_chunks = num_chunks_for(cur_entry->msg.packet_size, s->params->chunk_size); @@ -4813,7 +4936,7 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message * } s->vc_occupancy[msg->rail_id][vcg] += s->params->chunk_size; - cur_entry = return_head(s->terminal_msgs[msg->rail_id], s->terminal_msgs_tail[msg->rail_id], vcg); + rc_stack_push(lp, cur_entry, delete_terminal_dally_message_list, s->st); s->terminal_length[msg->rail_id][vcg] -= s->params->chunk_size; s->link_traffic[msg->rail_id] += s->params->chunk_size; @@ -4825,12 +4948,8 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message * if(num_qos_levels > 1) //I think this one is OK since the default is that terminals have only 1 VC anyway so leaving vcg as next_vcg = get_next_vcg(s, bf, msg, lp); - cur_entry = NULL; - if(next_vcg >= 0) - cur_entry = s->terminal_msgs[msg->rail_id][next_vcg]; - /* if there is another packet inline then schedule another send event */ - if(cur_entry != NULL && s->vc_occupancy[msg->rail_id][next_vcg] + s->params->chunk_size <= s->params->cn_vc_size) { + if(next_vcg >= 0 && !qlist_empty(&s->terminal_msgs[msg->rail_id][next_vcg]) && s->vc_occupancy[msg->rail_id][next_vcg] + s->params->chunk_size <= s->params->cn_vc_size) { terminal_dally_message *m_new; e = model_net_method_event_new(lp->gid, injection_ts + gen_noise(lp, &msg->num_rngs), lp, DRAGONFLY_DALLY, (void**)&m_new, NULL); m_new->type = T_SEND; @@ -5612,7 +5731,7 @@ static void terminal_buf_update(terminal_state * s, tw_stime ts = 0; s->vc_occupancy[msg->rail_id][vcg] -= s->params->chunk_size; - if(s->in_send_loop[msg->rail_id] == 0 && s->terminal_msgs[msg->rail_id][vcg] != NULL) { + if(s->in_send_loop[msg->rail_id] == 0 && !qlist_empty(&s->terminal_msgs[msg->rail_id][vcg])) { terminal_dally_message *m; bf->c1 = 1; tw_event* e = model_net_method_event_new(lp->gid, ts + gen_noise(lp, &msg->num_rngs), lp, DRAGONFLY_DALLY, @@ -5688,8 +5807,9 @@ static void dragonfly_dally_terminal_final( terminal_state * s, for(int i = 0; i < s->params->num_rails; i++) { - if(s->terminal_msgs[i][0] != NULL) - printf("[%llu] leftover terminal messages \n", LLU(lp->gid)); + if(!qlist_empty(&s->terminal_msgs[i][0])) { + printf("[%llu] leftover terminal messages \n", LLU(lp->gid)); + } } @@ -5753,11 +5873,9 @@ static void dragonfly_dally_terminal_final( terminal_state * s, free(s->vc_occupancy[i]); // TODO: terminal_msgs are not properly freed if there are messages left. Correct this! free(s->terminal_msgs[i]); - free(s->terminal_msgs_tail[i]); } free(s->vc_occupancy); free(s->terminal_msgs); - free(s->terminal_msgs_tail); // Calling destructors for data. There is no need to free data, the // destructors do it themselves. ROSS allocated space for the datatypes and @@ -7229,7 +7347,7 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from into->stalled_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t)); into->total_chunks = (unsigned long*) malloc(num_rails * sizeof(uint64_t)); into->busy_time = (tw_stime*) malloc(num_rails * sizeof(tw_stime)); - into->terminal_msgs = (terminal_dally_message_list***) malloc(num_rails * sizeof(terminal_dally_message_list**)); + into->terminal_msgs = (struct qlist_head**) malloc(num_rails * sizeof(struct qlist_head*)); into->link_traffic = (uint64_t*) malloc(num_rails * sizeof(uint64_t)); for(int i = 0; i < num_rails; i++) { @@ -7237,13 +7355,14 @@ static void save_terminal_state(terminal_state *into, terminal_state const *from into->terminal_length[i] = (int*) malloc(num_qos_levels * sizeof(int)); into->qos_status[i] = (int*) malloc(num_qos_levels * sizeof(int)); into->qos_data[i] = (int*) malloc(num_qos_levels * sizeof(int)); - into->terminal_msgs[i] = (terminal_dally_message_list**) malloc(num_qos_levels * sizeof(terminal_dally_message_list*)); + into->terminal_msgs[i] = (struct qlist_head*) malloc(num_qos_levels * sizeof(struct qlist_head)); for (int j = 0; jvc_occupancy[i][j] = from->vc_occupancy[i][j]; into->terminal_length[i][j] = from->terminal_length[i][j]; into->qos_data[i][j] = from->qos_data[i][j]; into->qos_status[i][j] = from->qos_status[i][j]; - copy_terminal_dally_message_list(&into->terminal_msgs[i][j], from->terminal_msgs[i][j]); + INIT_QLIST_HEAD(&into->terminal_msgs[i][j]); + copy_terminal_dally_message_qlist(&into->terminal_msgs[i][j], &from->terminal_msgs[i][j]); } into->last_buf_full[i] = from->last_buf_full[i]; into->in_send_loop[i] = from->in_send_loop[i]; @@ -7301,7 +7420,7 @@ static void clean_terminal_state(terminal_state *state) { free(state->qos_status[i]); free(state->qos_data[i]); for (int j = 0; jterminal_msgs[i][j]); + clean_terminal_dally_message_qlist(&state->terminal_msgs[i][j]); } free(state->terminal_msgs[i]); } @@ -7405,7 +7524,7 @@ static bool check_terminal_state(terminal_state *before, terminal_state *after) is_same &= (before->terminal_length[i][j] == after->terminal_length[i][j]); is_same &= (before->qos_status[i][j] == after->qos_status[i][j]); is_same &= (before->qos_data[i][j] == after->qos_data[i][j]); - is_same &= check_terminal_dally_message_list(before->terminal_msgs[i][j], after->terminal_msgs[i][j]); + is_same &= check_terminal_dally_message_qlist(&before->terminal_msgs[i][j], &after->terminal_msgs[i][j]); } is_same &= (before->last_buf_full[i] == after->last_buf_full[i]); @@ -7501,14 +7620,13 @@ static void print_terminal_state(FILE * out, char const * prefix, terminal_state fprintf(out, "%s | rail %d: [\n", prefix, i); for (int j=0; jparams->num_qos_levels; j++) { fprintf(out, "%s | | qos level %d\n", prefix, j); - print_terminal_dally_message_list(out, subprefix, state, state->terminal_msgs[i][j]); + print_terminal_dally_message_qlist(out, subprefix, state, &state->terminal_msgs[i][j]); } } fprintf(out, "%s | ]\n", prefix); free(subprefix); } - fprintf(out, "%s | *** terminal_msgs_tail = %p\n", prefix, state->terminal_msgs_tail); if (is_dally_surrogate_on) { fprintf(out, "%s | * in_send_loop = %p\n", prefix, state->in_send_loop); From 1710290fd2941d2fbc7283eb3b2a99f0cb7dfcae Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 17 Jun 2025 16:36:46 -0400 Subject: [PATCH 089/110] Fixing small silent bug at terminal initialization --- src/networks/model-net/dragonfly-dally.C | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 7562e844..e30769db 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -3825,9 +3825,9 @@ static void terminal_dally_init( terminal_state * s, tw_lp * lp ) for(i = 0; i < p->num_rails; i++) { s->in_send_loop[i] = 0; - s->terminal_msgs[i] = (struct qlist_head*)calloc(num_qos_levels, sizeof(struct qlist_head)); + s->terminal_msgs[i] = (struct qlist_head*)calloc(s->params->num_vcs, sizeof(struct qlist_head)); - for(int j = 0; j < num_qos_levels; j++) + for(int j = 0; j < s->params->num_vcs; j++) { INIT_QLIST_HEAD(&s->terminal_msgs[i][j]); } From 3d1b55c061a0d5ff0911979f59ad06e2a642b366 Mon Sep 17 00:00:00 2001 From: helq Date: Wed, 18 Jun 2025 04:43:05 -0400 Subject: [PATCH 090/110] Refactoring routers usage of custom double linked-list for qlist --- src/networks/model-net/dragonfly-dally.C | 332 +++++++++-------------- 1 file changed, 121 insertions(+), 211 deletions(-) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index e30769db..5465605a 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -214,8 +214,6 @@ struct terminal_dally_message_list { terminal_dally_message msg; char* event_data; struct qlist_head list; - terminal_dally_message_list *next; // Keep for router compatibility - terminal_dally_message_list *prev; // Keep for router compatibility }; static void init_terminal_dally_message_list(terminal_dally_message_list *thisO, @@ -223,8 +221,6 @@ static void init_terminal_dally_message_list(terminal_dally_message_list *thisO, thisO->msg = *inmsg; thisO->event_data = NULL; INIT_QLIST_HEAD(&thisO->list); - thisO->next = NULL; - thisO->prev = NULL; } static void delete_terminal_dally_message_list(void *thisO) { @@ -653,10 +649,8 @@ struct router_state unsigned long* stalled_chunks; //Counter for when a packet is put into queued messages instead of routing due to full VC unsigned long* total_chunks; //Counter for when a packet is sent - per port - terminal_dally_message_list ***pending_msgs; - terminal_dally_message_list ***pending_msgs_tail; - terminal_dally_message_list ***queued_msgs; - terminal_dally_message_list ***queued_msgs_tail; + struct qlist_head **pending_msgs; + struct qlist_head **queued_msgs; int *in_send_loop; int *queued_count; struct rc_stack * st; @@ -1573,41 +1567,7 @@ static inline void prepend_to_qlist(struct qlist_head *head, terminal_dally_mess } // Restore old functions for router compatibility -static void append_to_terminal_dally_message_list( - terminal_dally_message_list ** thisq, - terminal_dally_message_list ** thistail, - int index, - terminal_dally_message_list *msg) -{ - if (thisq[index] == NULL) { - thisq[index] = msg; - } - else { - assert(thistail[index] != NULL); - thistail[index]->next = msg; - msg->prev = thistail[index]; - } - thistail[index] = msg; -} -static terminal_dally_message_list* return_head( - terminal_dally_message_list ** thisq, - terminal_dally_message_list ** thistail, - int index) -{ - terminal_dally_message_list *head = thisq[index]; - if (head != NULL) { - thisq[index] = head->next; - if(head->next != NULL) { - head->next->prev = NULL; - head->next = NULL; - } - else { - thistail[index] = NULL; - } - } - return head; -} static void copy_terminal_dally_message_qlist(struct qlist_head *into_head, struct qlist_head *from_head) { @@ -1689,21 +1649,6 @@ static bool check_terminal_dally_message_qlist(struct qlist_head *before, struct return is_same; } -static void prepend_to_terminal_dally_message_list( - terminal_dally_message_list ** thisq, - terminal_dally_message_list ** thistail, - int index, - terminal_dally_message_list *msg) -{ - if (thisq[index] == NULL) { - thistail[index] = msg; - } - else { - thisq[index]->prev = msg; - msg->next = thisq[index]; - } - thisq[index] = msg; -} static terminal_dally_message_list* return_head_from_qlist(struct qlist_head *head) { @@ -1725,104 +1670,69 @@ static terminal_dally_message_list* return_tail_from_qlist(struct qlist_head *he return qlist_entry(item, terminal_dally_message_list, list); } -static terminal_dally_message_list* return_tail( - terminal_dally_message_list ** thisq, - terminal_dally_message_list ** thistail, - int index) -{ - terminal_dally_message_list *tail = thistail[index]; - assert(tail); - if (tail->prev != NULL) { - tail->prev->next = NULL; - thistail[index] = tail->prev; - tail->prev = NULL; - } - else { - thistail[index] = NULL; - thisq[index] = NULL; - } - return tail; -} -// Copies a list and returns the tail -static terminal_dally_message_list * copy_terminal_dally_message_list(terminal_dally_message_list ** into_thisq, terminal_dally_message_list const * from_thisq) { - if (from_thisq == NULL) { - *into_thisq = NULL; - return NULL; - } - terminal_dally_message_list const * from_head = from_thisq; - terminal_dally_message_list * prev = NULL; - while(from_head != NULL) { - terminal_dally_message_list * copy_head = (terminal_dally_message_list *) malloc(sizeof(terminal_dally_message_list)); +static void copy_msgs_qlist(struct qlist_head *into_qlist, struct qlist_head *from_qlist) { + INIT_QLIST_HEAD(into_qlist); - //copy_head->msg = from_head->msg; - memcpy(copy_head, from_head, sizeof(terminal_dally_message_list)); - copy_head->prev = prev; + if (qlist_empty(from_qlist)) { + return; + } - if (from_head->event_data != NULL) { - int const message_size = from_head->msg.remote_event_size_bytes + from_head->msg.local_event_size_bytes; - assert(message_size > 0); - copy_head->event_data = (char *) malloc(message_size); - memcpy(copy_head->event_data, from_head->event_data, message_size); - } + struct qlist_head *pos; + qlist_for_each(pos, from_qlist) { + terminal_dally_message_list *from_entry = qlist_entry(pos, terminal_dally_message_list, list); + terminal_dally_message_list *copy_entry = (terminal_dally_message_list*) malloc(sizeof(terminal_dally_message_list)); - if (prev == NULL) { - *into_thisq = copy_head; - } else { - prev->next = copy_head; + init_terminal_dally_message_list(copy_entry, &from_entry->msg); + if (from_entry->event_data != NULL) { + copy_entry->event_data = (char*) malloc(from_entry->msg.remote_event_size_bytes); + memcpy(copy_entry->event_data, from_entry->event_data, from_entry->msg.remote_event_size_bytes); } - prev = copy_head; - from_head = from_head->next; + qlist_add_tail(©_entry->list, into_qlist); } - prev->next = NULL; - - return prev; } -static void clean_terminal_dally_message_list(terminal_dally_message_list * thisq) { - if (thisq == NULL) { - return; + + +static bool check_msgs_qlist(struct qlist_head * before, struct qlist_head * after) { + bool is_same = true; + + if (qlist_empty(before) && qlist_empty(after)) { + return true; } - terminal_dally_message_list * prev = thisq; - terminal_dally_message_list * head = prev->next; - free(prev->event_data); - while (head != NULL) { - free(head->event_data); - free(prev); - prev = head; - head = head->next; + if (qlist_empty(before) != qlist_empty(after)) { + return false; } - free(prev); -} -static bool check_terminal_dally_message_list(terminal_dally_message_list * before, terminal_dally_message_list * after) { - bool is_same = true; + struct qlist_head *pos_before = before->next; + struct qlist_head *pos_after = after->next; - terminal_dally_message_list * head_before = before; - terminal_dally_message_list * head_after = after; - while (head_before != NULL && head_after != NULL) { - is_same &= check_terminal_dally_message(&head_before->msg, &head_after->msg); - is_same &= (head_before->event_data == NULL) == (head_after->event_data == NULL); + while (pos_before != before && pos_after != after) { + terminal_dally_message_list *entry_before = qlist_entry(pos_before, terminal_dally_message_list, list); + terminal_dally_message_list *entry_after = qlist_entry(pos_after, terminal_dally_message_list, list); + + is_same &= check_terminal_dally_message(&entry_before->msg, &entry_after->msg); + is_same &= (entry_before->event_data == NULL) == (entry_after->event_data == NULL); - int const message_size = head_before->msg.remote_event_size_bytes + head_before->msg.local_event_size_bytes; - int const message_size_after = head_after->msg.remote_event_size_bytes + head_after->msg.local_event_size_bytes; + int const message_size = entry_before->msg.remote_event_size_bytes + entry_before->msg.local_event_size_bytes; + int const message_size_after = entry_after->msg.remote_event_size_bytes + entry_after->msg.local_event_size_bytes; is_same &= message_size == message_size_after; - if (is_same && head_before->event_data != NULL) { + if (is_same && entry_before->event_data != NULL) { assert(message_size > 0); - - is_same &= !memcmp(head_before->event_data, head_after->event_data, message_size); + is_same &= !memcmp(entry_before->event_data, entry_after->event_data, message_size); } - head_before = head_before->next; - head_after = head_after->next; + pos_before = pos_before->next; + pos_after = pos_after->next; } - if (head_before != NULL || head_after != NULL) { - is_same = false; // at least one of them is longer than the other + // Check if both reached end + if (pos_before != before || pos_after != after) { + is_same = false; // different lengths } return is_same; @@ -1848,8 +1758,9 @@ static void print_terminal_dally_message_qlist(FILE * out, char const * prefix, free(subprefix); } -static void print_terminal_dally_message_list(FILE * out, char const * prefix, terminal_state * ns, terminal_dally_message_list * thisq) { - if (thisq == NULL) { + +static void print_msgs_qlist(FILE * out, char const * prefix, struct qlist_head * qlist) { + if (qlist_empty(qlist)) { return; } @@ -1858,25 +1769,17 @@ static void print_terminal_dally_message_list(FILE * out, char const * prefix, t char * subprefix = (char *) malloc(len_subprefix * sizeof(char)); snprintf(subprefix, len_subprefix, "%s%s", prefix, addprefix_2); - terminal_dally_message_list * head = thisq; - while (head != NULL) { - fprintf(out, "%s{\n", prefix); - fprintf(out, "%s | msg:\n", prefix); - print_terminal_dally_message(out, subprefix, ns, &head->msg); - fprintf(out, "%s | event_data = %p\n", prefix, head->event_data); - int const message_size = head->msg.remote_event_size_bytes + head->msg.local_event_size_bytes; - if (head->event_data != NULL) { - assert(message_size > 0); - tw_fprint_binary_array(out, subprefix, head->event_data, message_size); - } - fprintf(out, "%s},\n", prefix); - head = head->next; + struct qlist_head *pos; + qlist_for_each(pos, qlist) { + terminal_dally_message_list *entry = qlist_entry(pos, terminal_dally_message_list, list); + fprintf(out, "%s qlist entry (%p) {\n", prefix, entry); + print_terminal_dally_message(out, subprefix, NULL, &entry->msg); + fprintf(out, "%s }\n", prefix); } free(subprefix); } - static tw_stime* buff_time_storage_create(terminal_state *s) { tw_stime* storage = (tw_stime*)malloc(s->params->num_rails * sizeof(tw_stime)); @@ -3092,7 +2995,7 @@ static int get_next_router_vcg(router_state * s, tw_bf * bf, terminal_dally_mess int base_limit = i * vcs_per_qos; for(int k = base_limit; k < base_limit + vcs_per_qos; k ++) { - if(s->pending_msgs[output_port][k] != NULL) + if(!qlist_empty(&s->pending_msgs[output_port][k])) return k; } } @@ -3106,7 +3009,7 @@ static int get_next_router_vcg(router_state * s, tw_bf * bf, terminal_dally_mess for(int i = 0; i < s->params->num_vcs; i++) { - if(s->pending_msgs[output_port][next_rr_vc] != NULL) + if(!qlist_empty(&s->pending_msgs[output_port][next_rr_vc])) { s->last_qos_lvl[output_port] = next_rr_vc; return next_rr_vc; @@ -3985,13 +3888,9 @@ static void router_dally_init(router_state * r, tw_lp * lp) r->last_qos_lvl = (int*)calloc(p->radix, sizeof(int)); r->qos_status = (int**)calloc(p->radix, sizeof(int*)); r->pending_msgs = - (terminal_dally_message_list***)calloc((p->radix), sizeof(terminal_dally_message_list**)); - r->pending_msgs_tail = - (terminal_dally_message_list***)calloc((p->radix), sizeof(terminal_dally_message_list**)); + (struct qlist_head**)calloc(p->radix, sizeof(struct qlist_head*)); r->queued_msgs = - (terminal_dally_message_list***)calloc(p->radix, sizeof(terminal_dally_message_list**)); - r->queued_msgs_tail = - (terminal_dally_message_list***)calloc(p->radix, sizeof(terminal_dally_message_list**)); + (struct qlist_head**)calloc(p->radix, sizeof(struct qlist_head*)); r->queued_count = (int*)calloc(p->radix, sizeof(int)); r->last_buf_full = (tw_stime*)calloc(p->radix, sizeof(tw_stime*)); r->busy_time = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); @@ -4024,14 +3923,8 @@ static void router_dally_init(router_state * r, tw_lp * lp) r->in_send_loop[i] = 0; r->vc_occupancy[i] = (int*)calloc(p->num_vcs, sizeof(int)); // printf("\n Number of vcs %d for radix %d ", p->num_vcs, p->radix); - r->pending_msgs[i] = (terminal_dally_message_list**)calloc(p->num_vcs, - sizeof(terminal_dally_message_list*)); - r->pending_msgs_tail[i] = (terminal_dally_message_list**)calloc(p->num_vcs, - sizeof(terminal_dally_message_list*)); - r->queued_msgs[i] = (terminal_dally_message_list**)calloc(p->num_vcs, - sizeof(terminal_dally_message_list*)); - r->queued_msgs_tail[i] = (terminal_dally_message_list**)calloc(p->num_vcs, - sizeof(terminal_dally_message_list*)); + r->pending_msgs[i] = (struct qlist_head*)calloc(p->num_vcs, sizeof(struct qlist_head)); + r->queued_msgs[i] = (struct qlist_head*)calloc(p->num_vcs, sizeof(struct qlist_head)); r->qos_status[i] = (int*)calloc(num_qos_levels, sizeof(int)); r->qos_data[i] = (int*)calloc(num_qos_levels, sizeof(int)); for(int j = 0; j < num_qos_levels; j++) @@ -4041,10 +3934,8 @@ static void router_dally_init(router_state * r, tw_lp * lp) } for(int j = 0; j < p->num_vcs; j++) { - r->pending_msgs[i][j] = NULL; - r->pending_msgs_tail[i][j] = NULL; - r->queued_msgs[i][j] = NULL; - r->queued_msgs_tail[i][j] = NULL; + INIT_QLIST_HEAD(&r->pending_msgs[i][j]); + INIT_QLIST_HEAD(&r->queued_msgs[i][j]); } } @@ -5920,11 +5811,11 @@ void dragonfly_dally_router_final(router_state * s, tw_lp * lp){ int i, j; for(i = 0; i < s->params->radix; i++) { for(j = 0; j < s->params->num_vcs; j++) { - if(s->queued_msgs[i][j] != NULL) { + if(!qlist_empty(&s->queued_msgs[i][j])) { printf("[%llu] leftover queued messages %d %d %d\n", LLU(lp->gid), i, j, s->vc_occupancy[i][j]); } - if(s->pending_msgs[i][j] != NULL) { + if(!qlist_empty(&s->pending_msgs[i][j])) { printf("[%llu] lefover pending messages %d %d\n", LLU(lp->gid), i, j); } } @@ -6282,8 +6173,12 @@ static void router_packet_receive_rc(router_state * s, s->is_monitoring_bw = 0; if(bf->c2) { - terminal_dally_message_list * tail = return_tail(s->pending_msgs[output_port], s->pending_msgs_tail[output_port], output_chan); - delete_terminal_dally_message_list(tail); + if (!qlist_empty(&s->pending_msgs[output_port][output_chan])) { + struct qlist_head *last = s->pending_msgs[output_port][output_chan].prev; + qlist_del(last); + terminal_dally_message_list *tail = qlist_entry(last, terminal_dally_message_list, list); + delete_terminal_dally_message_list(tail); + } s->vc_occupancy[output_port][output_chan] -= s->params->chunk_size; if(bf->c3) { s->in_send_loop[output_port] = 0; @@ -6295,8 +6190,12 @@ static void router_packet_receive_rc(router_state * s, { s->last_buf_full[output_port] = msg->saved_busy_time; } - delete_terminal_dally_message_list(return_tail(s->queued_msgs[output_port], - s->queued_msgs_tail[output_port], output_chan)); + if (!qlist_empty(&s->queued_msgs[output_port][output_chan])) { + struct qlist_head *last = s->queued_msgs[output_port][output_chan].prev; + qlist_del(last); + terminal_dally_message_list *tail = qlist_entry(last, terminal_dally_message_list, list); + delete_terminal_dally_message_list(tail); + } s->queued_count[output_port] -= s->params->chunk_size; } @@ -6474,8 +6373,7 @@ static void router_packet_receive( router_state * s, assert(output_chan < s->params->num_vcs && output_port < s->params->radix); router_credit_send(s, msg, lp, -1, &(msg->num_rngs)); - append_to_terminal_dally_message_list(s->pending_msgs[output_port], s->pending_msgs_tail[output_port], - output_chan, cur_chunk); + qlist_add_tail(&cur_chunk->list, &s->pending_msgs[output_port][output_chan]); s->vc_occupancy[output_port][output_chan] += s->params->chunk_size; if(s->in_send_loop[output_port] == 0) { bf->c3 = 1; @@ -6499,8 +6397,7 @@ static void router_packet_receive( router_state * s, cur_chunk->msg.saved_vc = msg->vc_index; cur_chunk->msg.saved_channel = msg->output_chan; assert(output_chan < s->params->num_vcs && output_port < s->params->radix); - append_to_terminal_dally_message_list( s->queued_msgs[output_port], - s->queued_msgs_tail[output_port], output_chan, cur_chunk); + qlist_add_tail(&cur_chunk->list, &s->queued_msgs[output_port][output_chan]); s->queued_count[output_port] += s->params->chunk_size; @@ -6622,8 +6519,7 @@ static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_m s->qos_data[output_port][vcg] -= msg_size; s->total_chunks[output_port]--; - prepend_to_terminal_dally_message_list(s->pending_msgs[output_port], - s->pending_msgs_tail[output_port], output_chan, cur_entry); + qlist_add(&cur_entry->list, &s->pending_msgs[output_port][output_chan]); if (g_congestion_control_enabled) { congestion_control_message *cc_msg_rc = (congestion_control_message*)rc_stack_pop(s->cc_st); @@ -6681,7 +6577,12 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes return; } - cur_entry = s->pending_msgs[output_port][output_chan]; + if (!qlist_empty(&s->pending_msgs[output_port][output_chan])) { + struct qlist_head *first = s->pending_msgs[output_port][output_chan].next; + cur_entry = qlist_entry(first, terminal_dally_message_list, list); + } else { + cur_entry = NULL; + } msg->dfdally_src_terminal_id = cur_entry->msg.dfdally_src_terminal_id; @@ -6840,8 +6741,8 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes rc_stack_push(lp, cc_msg_rc, cc_msg_rc_storage_delete, s->cc_st); } - cur_entry = return_head(s->pending_msgs[output_port], - s->pending_msgs_tail[output_port], output_chan); + struct qlist_head *item = qlist_pop(&s->pending_msgs[output_port][output_chan]); + cur_entry = item ? qlist_entry(item, terminal_dally_message_list, list) : NULL; rc_stack_push(lp, cur_entry, delete_terminal_dally_message_list, s->st); s->qos_data[output_port][vcg] += msg_size; @@ -6856,7 +6757,7 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes base_limit = i * vcs_per_qos; for(int k = base_limit; k < base_limit + vcs_per_qos; k ++) { - if(s->pending_msgs[output_port][k] != NULL) + if(!qlist_empty(&s->pending_msgs[output_port][k])) { next_output_chan = k; break; @@ -6872,7 +6773,12 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes s->in_send_loop[output_port] = 0; return; } - cur_entry = s->pending_msgs[output_port][next_output_chan]; + if (!qlist_empty(&s->pending_msgs[output_port][next_output_chan])) { + struct qlist_head *first = s->pending_msgs[output_port][next_output_chan].next; + cur_entry = qlist_entry(first, terminal_dally_message_list, list); + } else { + cur_entry = NULL; + } assert(cur_entry != NULL); terminal_dally_message *m_new; @@ -6913,10 +6819,8 @@ static void router_buf_update_rc(router_state * s, } } if(bf->c1) { - terminal_dally_message_list* head = return_tail(s->pending_msgs[indx], - s->pending_msgs_tail[indx], output_chan); - prepend_to_terminal_dally_message_list(s->queued_msgs[indx], - s->queued_msgs_tail[indx], output_chan, head); + terminal_dally_message_list* head = return_tail_from_qlist(&s->pending_msgs[indx][output_chan]); + qlist_add(&head->list, &s->queued_msgs[indx][output_chan]); s->vc_occupancy[indx][output_chan] -= s->params->chunk_size; s->queued_count[indx] += s->params->chunk_size; } @@ -6963,12 +6867,12 @@ static void router_buf_update(router_state * s, tw_bf * bf, terminal_dally_messa s->last_buf_full[indx] = 0.0; } - if(s->queued_msgs[indx][output_chan] != NULL) { + if(!qlist_empty(&s->queued_msgs[indx][output_chan])) { bf->c1 = 1; assert(indx < s->params->radix); assert(output_chan < s->params->num_vcs); - terminal_dally_message_list *head = return_head(s->queued_msgs[indx], - s->queued_msgs_tail[indx], output_chan); + struct qlist_head *item = qlist_pop(&s->queued_msgs[indx][output_chan]); + terminal_dally_message_list *head = item ? qlist_entry(item, terminal_dally_message_list, list) : NULL; /*if(strcmp(head->msg.category, "medium") == 0) { if(head->msg.saved_channel < 4 || head->msg.saved_channel >= 8) @@ -6977,13 +6881,12 @@ static void router_buf_update(router_state * s, tw_bf * bf, terminal_dally_messa } }*/ router_credit_send(s, &head->msg, lp, 1, &(msg->num_rngs)); - append_to_terminal_dally_message_list(s->pending_msgs[indx], - s->pending_msgs_tail[indx], output_chan, head); + qlist_add_tail(&head->list, &s->pending_msgs[indx][output_chan]); s->vc_occupancy[indx][output_chan] += s->params->chunk_size; s->queued_count[indx] -= s->params->chunk_size; } - if(s->in_send_loop[indx] == 0 && s->pending_msgs[indx][output_chan] != NULL) { + if(s->in_send_loop[indx] == 0 && !qlist_empty(&s->pending_msgs[indx][output_chan])) { bf->c2 = 1; terminal_dally_message *m; tw_stime ts = maxd(s->next_output_available_time[indx], tw_now(lp)) - tw_now(lp); @@ -7854,8 +7757,8 @@ static void save_router_state(router_state *into, router_state const *from) { into->vc_occupancy = (int**) malloc(radix * sizeof(int*)); into->qos_status = (int**) malloc(radix * sizeof(int*)); into->qos_data = (int**) malloc(radix * sizeof(int*)); - into->pending_msgs = (terminal_dally_message_list***) malloc(radix * sizeof(terminal_dally_message_list**)); - into->queued_msgs = (terminal_dally_message_list***) malloc(radix * sizeof(terminal_dally_message_list**)); + into->pending_msgs = (struct qlist_head**) malloc(radix * sizeof(struct qlist_head*)); + into->queued_msgs = (struct qlist_head**) malloc(radix * sizeof(struct qlist_head*)); for (int i = 0; i < radix; i++) { into->next_output_available_time[i] = from->next_output_available_time[i]; @@ -7874,13 +7777,13 @@ static void save_router_state(router_state *into, router_state const *from) { into->qos_status[i] = (int*) malloc(num_qos_levels * sizeof(int)); into->qos_data[i] = (int*) malloc(num_qos_levels * sizeof(int)); - into->pending_msgs[i] = (terminal_dally_message_list**) malloc(p->num_vcs * sizeof(terminal_dally_message_list*)); - into->queued_msgs[i] = (terminal_dally_message_list**) malloc(p->num_vcs * sizeof(terminal_dally_message_list*)); + into->pending_msgs[i] = (struct qlist_head*) malloc(p->num_vcs * sizeof(struct qlist_head)); + into->queued_msgs[i] = (struct qlist_head*) malloc(p->num_vcs * sizeof(struct qlist_head)); for (int j = 0; j < p->num_vcs; j++) { into->vc_occupancy[i][j] = from->vc_occupancy[i][j]; - copy_terminal_dally_message_list(&into->pending_msgs[i][j], from->pending_msgs[i][j]); - copy_terminal_dally_message_list(&into->queued_msgs[i][j], from->queued_msgs[i][j]); + copy_msgs_qlist(&into->pending_msgs[i][j], &from->pending_msgs[i][j]); + copy_msgs_qlist(&into->queued_msgs[i][j], &from->queued_msgs[i][j]); } for (int j = 0; j < num_qos_levels; j++) { into->qos_status[i][j] = from->qos_status[i][j]; @@ -7935,8 +7838,19 @@ static void clean_router_state(router_state *state) { free(state->qos_data[i]); for (int j = 0; j < p->num_vcs; j++) { - clean_terminal_dally_message_list(state->pending_msgs[i][j]); - clean_terminal_dally_message_list(state->queued_msgs[i][j]); + // Clean up qlist entries - remove and free all elements + while (!qlist_empty(&state->pending_msgs[i][j])) { + struct qlist_head *item = qlist_pop(&state->pending_msgs[i][j]); + terminal_dally_message_list *entry = qlist_entry(item, terminal_dally_message_list, list); + free(entry->event_data); + free(entry); + } + while (!qlist_empty(&state->queued_msgs[i][j])) { + struct qlist_head *item = qlist_pop(&state->queued_msgs[i][j]); + terminal_dally_message_list *entry = qlist_entry(item, terminal_dally_message_list, list); + free(entry->event_data); + free(entry); + } } free(state->pending_msgs[i]); @@ -8014,8 +7928,8 @@ static bool check_router_state(router_state const *before, router_state const *a return false; } - if (!check_terminal_dally_message_list(before->pending_msgs[i][j], after->pending_msgs[i][j]) || - !check_terminal_dally_message_list(before->queued_msgs[i][j], after->queued_msgs[i][j])) { + if (!check_msgs_qlist(&before->pending_msgs[i][j], &after->pending_msgs[i][j]) || + !check_msgs_qlist(&before->queued_msgs[i][j], &after->queued_msgs[i][j])) { return false; } } @@ -8138,28 +8052,24 @@ static void print_router_state(FILE * out, char const * prefix, router_state * s fprintf(out, "%s | port %d: [\n", prefix, i); for (int j = 0; j < p->num_vcs; j++) { fprintf(out, "%s | | vcs # %d\n", prefix, j); - print_terminal_dally_message_list(out, subprefix, NULL, state->pending_msgs[i][j]); + print_msgs_qlist(out, subprefix, &state->pending_msgs[i][j]); } fprintf(out, "%s | ]\n", prefix); } fprintf(out, "%s | ]\n", prefix); - fprintf(out, "%s | *** pending_msgs_tail = %p\n", prefix, state->pending_msgs_tail); - fprintf(out, "%s | *** queued_msgs[%d][%d] = [\n", prefix, radix, p->num_vcs); for (int i = 0; i < radix; i++) { fprintf(out, "%s | port %d: [\n", prefix, i); for (int j = 0; j < p->num_vcs; j++) { fprintf(out, "%s | | vcs # %d\n", prefix, j); - print_terminal_dally_message_list(out, subprefix, NULL, state->queued_msgs[i][j]); + print_msgs_qlist(out, subprefix, &state->queued_msgs[i][j]); } fprintf(out, "%s | ]\n", prefix); } fprintf(out, "%s | ]\n", prefix); free(subprefix); - fprintf(out, "%s | *** queued_msgs_tail = %p\n", prefix, state->queued_msgs_tail); - fprintf(out, "%s | * in_send_loop[%d] = [", prefix, radix); for (int i = 0; i < radix; i++) { fprintf(out, "%s%d", i ? ", " : "", state->in_send_loop[i]); From 274f020483fd0fe7e9f4a42843c0a53c7d0aafd0 Mon Sep 17 00:00:00 2001 From: helq Date: Wed, 18 Jun 2025 08:32:26 -0400 Subject: [PATCH 091/110] Allowing director to be called after simulation ended, to repopulate network if needed --- codes/surrogate/network-surrogate.h | 2 +- src/network-workloads/model-net-mpi-replay.c | 2 +- src/surrogate/application-surrogate.c | 38 +++++++------ src/surrogate/network-surrogate.c | 56 +++++++++++++------- 4 files changed, 61 insertions(+), 37 deletions(-) diff --git a/codes/surrogate/network-surrogate.h b/codes/surrogate/network-surrogate.h index 4b22e238..b4dae45c 100644 --- a/codes/surrogate/network-surrogate.h +++ b/codes/surrogate/network-surrogate.h @@ -60,7 +60,7 @@ struct network_surrogate_config { void network_director_configure(struct network_surrogate_config *, struct switch_at_struct * switch_network_at, bool freeze_network_on_switch); // Function for application director to use network freezing machinery -void surrogate_switch_network_model(tw_pe * pe); +void surrogate_switch_network_model(tw_pe * pe, bool is_queue_empty); void network_director_finalize(void); diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index fd28775f..475d1675 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -4080,7 +4080,7 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) } i++; } - printf("\n num_net_traces %d; num_dumpi_traces %d", num_net_traces, num_dumpi_traces); + printf("\n num_net_traces %d; num_dumpi_traces %d\n", num_net_traces, num_dumpi_traces); fclose(name_file); assert(strlen(alloc_file) != 0); alloc_spec = 1; diff --git a/src/surrogate/application-surrogate.c b/src/surrogate/application-surrogate.c index 870794b6..736fdd89 100644 --- a/src/surrogate/application-surrogate.c +++ b/src/surrogate/application-surrogate.c @@ -23,7 +23,11 @@ static enum { #define master_printf(...) if (g_tw_mynode == 0) { printf(__VA_ARGS__); } -static void application_director_pre_switch(tw_pe * pe) { +static void application_director_pre_switch(tw_pe * pe, bool is_queue_empty) { + // No need to switch to surrogate when the simulation has ended + if (is_queue_empty || gvt_for(pe) >= g_tw_ts_end) { + return; + } // Scheduling next GVT hook call if it is not scheduled every tw_trigger_gvt_hook_every if (conf.option == APP_DIRECTOR_OPTS_call_every_ns) { tw_trigger_gvt_hook_at(gvt_for(pe) + conf.call_every_ns); @@ -41,7 +45,7 @@ static void application_director_pre_switch(tw_pe * pe) { if (conf.use_network_surrogate) { master_printf("Switching network surrogate on\n"); - surrogate_switch_network_model(pe); + surrogate_switch_network_model(pe, is_queue_empty); } surrogate_time_last = tw_clock_read(); @@ -55,12 +59,19 @@ static void application_director_pre_switch(tw_pe * pe) { } } -static void application_director_post_switch(tw_pe * pe) { +static void application_director_post_switch(tw_pe * pe, bool is_queue_empty) { + // No need to restart high-fidelity simulation if network was not suspended + if (is_queue_empty && !conf.use_network_surrogate) { + return; + } + // Scheduling next GVT hook call - if (conf.option == APP_DIRECTOR_OPTS_call_every_ns) { - tw_trigger_gvt_hook_at(gvt_for(pe) + conf.call_every_ns); - } else { - tw_trigger_gvt_hook_every(conf.every_n_gvt); + if (!is_queue_empty) { + if (conf.option == APP_DIRECTOR_OPTS_call_every_ns) { + tw_trigger_gvt_hook_at(gvt_for(pe) + conf.call_every_ns); + } else { + tw_trigger_gvt_hook_every(conf.every_n_gvt); + } } double const start = tw_clock_read(); @@ -73,8 +84,7 @@ static void application_director_post_switch(tw_pe * pe) { if (conf.use_network_surrogate) { master_printf("Switching network surrogate off\n"); - surrogate_switch_network_model(pe); - // TODO: reset network predictors and ask not to gather any data for 1 ms + surrogate_switch_network_model(pe, is_queue_empty); } time_in_surrogate += start - surrogate_time_last; @@ -85,18 +95,14 @@ static void application_director_post_switch(tw_pe * pe) { director_state = PRE_JUMP; } -static void application_director(tw_pe * pe) { - // Director is not called if the simulation has ended - if (gvt_for(pe) >= g_tw_ts_end) { - return; - } +static void application_director(tw_pe * pe, bool is_queue_empty) { switch (director_state) { case PRE_JUMP: - application_director_pre_switch(pe); + application_director_pre_switch(pe, is_queue_empty); break; case POST_JUMP_switched: case POST_JUMP_skipped: - application_director_post_switch(pe); + application_director_post_switch(pe, is_queue_empty); break; } } diff --git a/src/surrogate/network-surrogate.c b/src/surrogate/network-surrogate.c index b7108cc8..c2278583 100644 --- a/src/surrogate/network-surrogate.c +++ b/src/surrogate/network-surrogate.c @@ -4,6 +4,8 @@ #include #include +#define master_printf(cond, ...) if (cond && g_tw_mynode == 0) { printf(__VA_ARGS__); } + static bool is_network_surrogate_configured = false; static struct switch_at_struct switch_network_at = {0}; static struct network_surrogate_config net_surr_config = {0}; @@ -198,9 +200,9 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe) { tw_error(TW_LOC, "Sorry, sending packets to the future hasn't been implement in this mode"); } - printf("PE %lu - AVL size %d (before freezing events)\n", g_tw_mynode, pe->avl_tree_size); + master_printf(DEBUG_DIRECTOR > 1, "PE %lu - AVL size %d (before freezing events)\n", g_tw_mynode, pe->avl_tree_size); freeze_events_to_separate_queue_pe(pe); - printf("PE %lu - AVL size %d (after freezing events to separate queue)\n", g_tw_mynode, pe->avl_tree_size); + master_printf(DEBUG_DIRECTOR > 1, "PE %lu - AVL size %d (after freezing events to separate queue)\n", g_tw_mynode, pe->avl_tree_size); // Going through all LPs in PE and running their specific functions for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) { @@ -224,7 +226,11 @@ static void events_high_def_to_surrogate_switch(tw_pe * pe) { pe->cur_event = pe->abort_event; pe->cur_event->caused_by_me = NULL; +#ifdef USE_RAND_TIEBREAKER pe->cur_event->sig = pe->GVT_sig; +#else + pe->cur_event->recv_ts = pe->GVT; +#endif if (lp_type_switch) { if (lp_type_switch->trigger_idle_modelnet) { @@ -257,9 +263,9 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe) { #endif // Restore frozen events back to the main queue with timestamp adjustment - printf("PE %lu - AVL size %d (before injecting events into event queue again)\n", g_tw_mynode, pe->avl_tree_size); + master_printf(DEBUG_DIRECTOR > 1, "PE %lu - AVL size %d (before injecting events into event queue again)\n", g_tw_mynode, pe->avl_tree_size); unfreeze_events_from_separate_queue_pe(pe); - printf("PE %lu - AVL size %d (after defreezing events from separate queue)\n", g_tw_mynode, pe->avl_tree_size); + master_printf(DEBUG_DIRECTOR > 1, "PE %lu - AVL size %d (after defreezing events from separate queue)\n", g_tw_mynode, pe->avl_tree_size); // Going through all LPs in PE and running their specific functions for (tw_lpid local_lpid = 0; local_lpid < g_tw_nlp; local_lpid++) { @@ -285,7 +291,11 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe) { pe->cur_event = pe->abort_event; pe->cur_event->caused_by_me = NULL; +#ifdef USE_RAND_TIEBREAKER pe->cur_event->sig = pe->GVT_sig; +#else + pe->cur_event->recv_ts = pe->GVT; +#endif if (lp_type_switch) { if (lp_type_switch->trigger_idle_modelnet) { @@ -317,19 +327,23 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe) { } -static void switch_model(tw_pe * pe) { - // Rollback if in optimistic mode - if (g_tw_synchronization_protocol == OPTIMISTIC) { +static void switch_model(tw_pe * pe, bool is_queue_empty) { + // Rollback if in optimistic mode and the simulation has events yet to process (globally) + if (g_tw_synchronization_protocol == OPTIMISTIC && !is_queue_empty) { tw_scheduler_rollback_and_cancel_events_pe(pe); } - net_surr_config.model.switch_surrogate(); - if (DEBUG_DIRECTOR && g_tw_mynode == 0) { - printf("Switching to network %s\n", net_surr_config.model.is_surrogate_on() ? "surrogate" : "high-fidelity"); + master_printf(DEBUG_DIRECTOR, "Switching to network %s\n", net_surr_config.model.is_surrogate_on() ? "high-fidelity": "surrogate"); + + bool const is_surrogate_off = !net_surr_config.model.is_surrogate_on(); + if (is_surrogate_off && is_queue_empty) { + master_printf(true, "No need to switch to surrogate when the simulation has no events to process\n"); + return; } + net_surr_config.model.switch_surrogate(); // "Freezing" network events and activating LP's switch functions if (freeze_network_on_switch) { - if (net_surr_config.model.is_surrogate_on()) { + if (is_surrogate_off) { model_net_method_switch_to_surrogate(); events_high_def_to_surrogate_switch(pe); } else { @@ -340,7 +354,7 @@ static void switch_model(tw_pe * pe) { } -void network_director(tw_pe * pe) { +void network_director(tw_pe * pe, bool is_queue_empty) { assert(is_network_surrogate_configured); assert(network_director_enabled); @@ -375,7 +389,7 @@ void network_director(tw_pe * pe) { } // ---- Past this means that we are in fact switching ---- - net_surr_config.model.is_surrogate_on(); + bool const surrogate_state_pre_switch = net_surr_config.model.is_surrogate_on(); // Asking the director/model to switch if (DEBUG_DIRECTOR && g_tw_mynode == 0) { @@ -386,7 +400,7 @@ void network_director(tw_pe * pe) { } double const start = tw_clock_read(); - switch_model(pe); + switch_model(pe, is_queue_empty); double const end = tw_clock_read(); surrogate_switching_time += end - start; @@ -396,15 +410,19 @@ void network_director(tw_pe * pe) { tw_trigger_gvt_hook_at(next_switch); } - if (DEBUG_DIRECTOR == 1 && g_tw_mynode == 0) { - printf("Network switch completed!\n"); + bool const is_surrogate_on = net_surr_config.model.is_surrogate_on(); + if (is_surrogate_on == surrogate_state_pre_switch) { + // The surrogate was never switched! + return; } + + master_printf(DEBUG_DIRECTOR == 1, "Network switch completed!\n"); if (DEBUG_DIRECTOR > 1) { printf("PE %lu: Switch completed!\n", g_tw_mynode); } // Determining time in surrogate - if (net_surr_config.model.is_surrogate_on()) { + if (is_surrogate_on) { // Start tracking time spent in surrogate mode surrogate_time_last = end; } else { @@ -434,10 +452,10 @@ void network_director_finalize(void) { } // === Function for application director to use switch to surrogate machinery -void surrogate_switch_network_model(tw_pe * pe) { +void surrogate_switch_network_model(tw_pe * pe, bool is_queue_empty) { // Simply expose the existing switch_model function for use by application director double const start = tw_clock_read(); - switch_model(pe); + switch_model(pe, is_queue_empty); double const end = tw_clock_read(); surrogate_switching_time += end - start; } From ba7b826675ff4f690bb9465aa7bf81086b979be7 Mon Sep 17 00:00:00 2001 From: helq Date: Wed, 18 Jun 2025 10:03:17 -0400 Subject: [PATCH 092/110] Updating README and compile instructions --- CODES-compile-instructions.sh | 132 ++++++++++++++++++++++++++++++ README.md | 149 +++++++++++++++++++++++++++++++++- 2 files changed, 278 insertions(+), 3 deletions(-) create mode 100644 CODES-compile-instructions.sh diff --git a/CODES-compile-instructions.sh b/CODES-compile-instructions.sh new file mode 100644 index 00000000..ac15c087 --- /dev/null +++ b/CODES-compile-instructions.sh @@ -0,0 +1,132 @@ +#!/usr/bin bash -x + +# Switches +swm_enable=1 +union_enable=1 +torch_enable=0 + +# Uncomment below for MPICH +#export PATH=/usr/local/mpich-4.1.2/bin/:"$PATH" +# Note: remember to compile MPICH with nemesis not with UCX support + +################## Actual scripts starts from here ################## + +# SWM has to be enabled for UNION to work +if [ $union_enable = 1 ]; then + swm_enable=1 +fi + +# What to compile +CUR_DIR="$PWD" + +##### Downloading everything ##### + +git clone https://github.com/codes-org/codes --branch=kronos-develop +git clone https://github.com/ross-org/ross --depth=20 --branch=at_gvt_arbitrary_function + +if [ $swm_enable = 1 ]; then + git clone https://github.com/pmodels/argobots --depth=1 + # This version is one commit ahead + git clone https://github.com/helq/swm-workloads --depth=1 --branch=fix-global-variable-rem +fi + +if [ $union_enable = 1 ]; then + # Downloading conceptual + curl -L https://sourceforge.net/projects/conceptual/files/conceptual/1.5.1b/conceptual-1.5.1b.tar.gz -o conceptual-1.5.1b.tar.gz + tar xvf conceptual-1.5.1b.tar.gz + # Downloading union + git clone https://github.com/SPEAR-UIC/Union +fi + +##### COMPILING ##### + +mkdir ross/build +pushd ross/build +cmake .. -DROSS_BUILD_MODELS=ON -DCMAKE_INSTALL_PREFIX="$(realpath ./bin)" \ + -DCMAKE_C_COMPILER=mpicc -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_FLAGS="-g -Wall" +#make VERBOSE=1 +make install -j4 +err=$? +[[ $err -ne 0 ]] && exit $err +popd + +if [ $swm_enable = 1 ]; then + pushd swm-workloads/swm + ./prepare.sh + mkdir build + pushd build + ../configure --disable-shared --prefix="$(realpath ./bin)" CC=mpicc CXX=mpicxx CFLAGS=-g CXXFLAGS=-g + #make V=1 && make install + make -j4 && make install + err=$? + [[ $err -ne 0 ]] && exit $err + popd && popd + + pushd argobots + ./autogen.sh + mkdir build + pushd build + #../configure --enable-debug=all --disable-fast --disable-shared --prefix="$(realpath ./bin)" CC=mpicc CXX=mpicxx CFLAGS=-g CXXFLAGS=-g + ../configure --disable-shared --prefix="$(realpath ./bin)" CC=mpicc CXX=mpicxx CFLAGS=-g CXXFLAGS=-g + #make V=1 && make install + make -j4 && make install + err=$? + [[ $err -ne 0 ]] && exit $err + popd && popd +fi + +if [ $union_enable = 1 ]; then + pushd conceptual-1.5.1b + PYTHON=python2 ./configure --prefix="$(realpath ./install)" LIBS=-lm + make -j4 && make install + err=$? + [[ $err -ne 0 ]] && exit $err + popd + + pushd Union + ./prepare.sh + ./configure --disable-shared --with-conceptual="$(realpath ../conceptual-1.5.1b/install)" --prefix="$(realpath ./install)" CC=mpicc CXX=mpicxx + make -j4 && make install + err=$? + [[ $err -ne 0 ]] && exit $err + popd +fi + + +mkdir codes/build +pushd codes/build + +make_args_codes=( + -DCMAKE_PREFIX_PATH="$(realpath "$CUR_DIR/ross/build/bin")" + -DCMAKE_CXX_COMPILER=mpicxx -DCMAKE_C_COMPILER=mpicc + -DCMAKE_C_FLAGS="-g -Wall" + -DCMAKE_CXX_FLAGS="-g -Wall" + -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTING=ON + -DCMAKE_INSTALL_PREFIX="$(realpath bin)" +) +if [ $swm_enable = 1 ]; then + make_args_codes=( + "${make_args_codes[@]}" + -DSWM_PKG_CONFIG_PATH="$(realpath "$CUR_DIR/swm-workloads/swm/build/maint")" + -DARGOBOTS_PKG_CONFIG_PATH="$(realpath "$CUR_DIR/argobots/build/maint")" + ) +fi +if [ $union_enable = 1 ]; then + make_args_codes=( + "${make_args_codes[@]}" + -DUNION_PKG_CONFIG_PATH="$(realpath "$CUR_DIR/Union/install/lib/pkgconfig")" + ) +fi +if [ $torch_enable = 1 ]; then + make_args_codes=("${make_args_codes[@]}" -DUSE_TORCH=true) +else + make_args_codes=("${make_args_codes[@]}" -DUSE_TORCH=false) +fi + +cmake .. "${make_args_codes[@]}" +#make VERBOSE=1 +make -j4 +err=$? +[[ $err -ne 0 ]] && exit $err + +popd diff --git a/README.md b/README.md index 3388fad8..a86424be 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,154 @@ # CODES Discrete-event Simulation Framework -### [Join our CODES user mailing list](https://mailchi.mp/75d0c8aa42c3/codes-user-group) to stay up to date with major changes, events, and news! +A high-performance discrete-event simulation framework for modeling HPC system architectures, network fabrics, and storage systems. Built on top of ROSS (Rensselaer Optimistic Simulation System) for massively parallel simulation capabilities. -### New? Check out the [Wiki for Installation, Tutorials, and Documentation](https://github.com/codes-org/codes/wiki) +## Quick Start -Discrete event driven simulation of HPC system architectures and subsystems has emerged as a productive and cost-effective means to evaluating potential HPC designs, along with capabilities for executing simulations of extreme scale systems. The goal of the CODES project is to use highly parallel simulation to explore the design of exascale storage/network architectures and distributed data-intensive science facilities. +The easiest way to build CODES is using our automated compilation script that handles all dependencies and configurations. + +1. **Download the compilation script** [click here](https://raw.githubusercontent.com/codes-org/codes/master/CODES-compile-instructions.sh) or: + + ```bash + # Download the script to your desired directory + wget https://raw.githubusercontent.com/codes-org/codes/master/CODES-compile-instructions.sh + chmod +x CODES-compile-instructions.sh + ``` + +2. **Edit and Run the script**: + ```bash + ./CODES-compile-instructions.sh + ``` + +The script will create a new directory with all dependencies and CODES compiled and ready to use. + +## Features + +CODES provides comprehensive simulation capabilities for: + +### Network Topologies +- **Dragonfly**: High-radix interconnect with adaptive routing (most up to date) +- **Torus**: Multi-dimensional torus networks +- **Fat-tree**: Hierarchical tree topologies +- **Express Mesh**: Enhanced mesh networks +- **Simple P2P**: Point-to-point networks + +### Workload Generation +- **SWM and UNION**: Workload generation +- **MPI trace replay**: Support for DUMPI traces +- **Synthetic patterns**: Uniform random, nearest neighbor, and custom patterns + +### Multi-fidelity Simulation +- **Network surrogate models**: Switch between high-fidelity and surrogate modes +- **Application surrogate models**: Accelerate application-level simulation +- **Adaptive directors**: Intelligent switching between simulation modes + +## Prerequisites + +- **MPI**: OpenMPI or MPICH for parallel execution +- **CMake**: Version 3.12 or higher +- **ROSS**: Rensselaer Optimistic Simulation System (handled by script) +- **C/C++ compiler**: GCC or Clang with C++11 support + +Optional dependencies (automatically handled by script if enabled): +- **UNION**: For advanced workload generation +- **SWM**: For structured workload modeling +- **Argobots**: Threading library for enhanced performance +- **PyTorch**: For ML model integration (if enabled) + +## Manual Installation + +For advanced users who prefer manual installation: + +```bash +# 1. Build and install ROSS first +git clone https://github.com/ross-org/ROSS.git +cd ROSS && mkdir build && cd build +cmake .. -DCMAKE_INSTALL_PREFIX=$HOME/ross +make -j && make install +cd ../.. + +# 2. Clone and build CODES +git clone https://github.com/codes-org/codes.git +cd codes && mkdir build && cd build + +# 3. Configure with CMake +cmake .. \ + -DCMAKE_PREFIX_PATH=$HOME/ross \ + -DCMAKE_C_COMPILER=mpicc \ + -DCMAKE_CXX_COMPILER=mpicxx \ + -DCMAKE_BUILD_TYPE=Debug \ + -DBUILD_TESTING=ON + +# 4. Build and test +make -j +ctest +``` + +## Testing + +Check your installation with: + +```bash +# Run all tests +cd codes/build && ctest + +# Run specific tests +ctest -R modelnet-test-dragonfly +ctest -R union-workload-test-surrogate + +# Keep test output for inspection +DONT_DELETE_TEST_DIR=1 ctest -R your-test-name +``` + +All tests pass to date of writing, including those that require UNION support. Tests verify: + +- Network model correctness and determinism +- Workload generation and replay accuracy +- Multi-fidelity simulation switching +- Parallel execution and reverse computation +- Configuration file parsing and LP setup + +## Basic Usage + +Running a CODES experiment is tricky due to the large amount of compontents that have to be correctly configured. Please use the [experiments repo](https://github.com/CODES-org/experiments) for examples of simulation you can run. + +If you have used the compilation script from above (quick start) run the following (in the folder that contains `CODES-compile-instructions.sh`): + +```bash +git clone https://github.com/CODES-org/experiments +``` + +To run an experiment do: + +```bash +cd experiments +bash run-experiment.sh path-to-experiment/script.sh +``` + +A folder will be created under `path-to-experiment/results` containing the result of running the experiment. + +## Contributing + +Before contributing please run the full test suite. Some tests verify our determinism guarantees (every simulation should be reproducible), i.e, the number of net events processed between two runs in parallel mode should be the same. We want to keep our determinism guarantees forever. Non-deterministic simulations are often the result of faulty reverse handlers, which have caused serious bug failures and hundreds of hours of debugging. + +If you find yourself with a model that is not deterministic (two runs with the same initial configuration produce different numbers of net events), then you can check for errors in the reverse handlers via the ROSS feature: reverse handlers check. For this, run your model with `--synch=6`. Make sure that all LPs in the simulation (ie, routers, terminals and others) have implemented proper reversibility checks (defined in a struct of type `crv_checkpointer`). + +## License + +See LICENSE file for licensing information. + +## Credits + +Developed by Argonne National Laboratory and Rensselaer Polytechnic Institute, with collaborations from UC Davis and Lawrence Livermore National Laboratory. + +## About CODES + +Discrete event driven simulation of HPC system architectures and subsystems has emerged as a productive and cost-effective means to evaluating potential HPC designs, along with capabilities for executing simulations of extreme scale systems. The goal of the CODES project is to use highly parallel simulation to explore the design of exascale storage/network architectures and distributed data-intensive science facilities. Our simulations build upon the Rensselaer Optimistic Simulation System (ROSS), a discrete event simulation framework that allows simulations to be run in parallel, decreasing the simulation run time of massive simulations to hours. We are using ROSS to explore topics including large-scale storage systems, I/O workloads, HPC network fabrics, distributed science systems, and data-intensive computation environments. The CODES project is a collaboration between the Mathematics and Computer Science department at Argonne National Laboratory and Rensselaer Polytechnic Institute. We collaborate with researchers at University of California at Davis to come up with novel methods for analysis and visualizations of large-scale event driven simulations. We also collaborate with Lawrence Livermore National Laboratory for modeling HPC interconnect systems. + +## About this README + +Claude helped us in templating this doc. Any typos are our own and after the fact. From 07a4002fbe6b72d66c38839829ee1727d6689914 Mon Sep 17 00:00:00 2001 From: helq Date: Fri, 20 Jun 2025 09:17:05 -0400 Subject: [PATCH 093/110] Small print changes --- src/network-workloads/model-net-mpi-replay.c | 2 +- src/surrogate/app-iteration-predictor/average.c | 2 ++ src/surrogate/application-surrogate.c | 2 +- src/workload/methods/codes-conc-online-comm-wrkld.C | 1 + 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 475d1675..5d42f2b6 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -37,7 +37,7 @@ #define MAX_STATS 65536 #define COL_TAG 1235 #define BAR_TAG 1234 -#define PRINT_SYNTH_TRAFFIC 1 +#define PRINT_SYNTH_TRAFFIC 0 #define MAX_JOBS 64 #define MAX_PERIODS_PER_APP 512 #define NEAR_ZERO .0001 //timestamp for use to be 'close to zero' but still allow progress, zero offset events are hard on the PDES engine diff --git a/src/surrogate/app-iteration-predictor/average.c b/src/surrogate/app-iteration-predictor/average.c index 615594a5..db098307 100644 --- a/src/surrogate/app-iteration-predictor/average.c +++ b/src/surrogate/app-iteration-predictor/average.c @@ -122,6 +122,8 @@ static void model_calls_predict_rc(tw_lp * lp, int nw_id_in_pe) {} static void reset_with(bool const * app_just_ended) { ready_to_skip = false; + + master_printf("Resetting (average) application predictor at GVT %d time %f\n", g_tw_gvt_done, g_tw_pe->GVT_sig.recv_ts) int last_iter[my_config.num_apps]; find_max_iter_per_app(last_iter); // We should start tracking iterations from the next iteration diff --git a/src/surrogate/application-surrogate.c b/src/surrogate/application-surrogate.c index 736fdd89..fb6044df 100644 --- a/src/surrogate/application-surrogate.c +++ b/src/surrogate/application-surrogate.c @@ -90,7 +90,7 @@ static void application_director_post_switch(tw_pe * pe, bool is_queue_empty) { time_in_surrogate += start - surrogate_time_last; surrogate_time_last = 0.0; } else { - master_printf("Resetting predictor at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe)); + master_printf("Resetting network predictor at GVT %d time %f\n", g_tw_gvt_done, gvt_for(pe)); } director_state = PRE_JUMP; } diff --git a/src/workload/methods/codes-conc-online-comm-wrkld.C b/src/workload/methods/codes-conc-online-comm-wrkld.C index a78f9abf..cdd1705d 100644 --- a/src/workload/methods/codes-conc-online-comm-wrkld.C +++ b/src/workload/methods/codes-conc-online-comm-wrkld.C @@ -170,6 +170,7 @@ void UNION_MPI_Finalize() ABT_thread_yield_to(global_prod_thread); } +// cycle_count assumes 1 GHz, meaning, 1 cycle is 1 nanosecond. This is different from SWM_Compute! void UNION_Compute(long cycle_count) { /* Add an event in the shared queue and then yield */ From 8be98f948eb624b5d8249aaed24ed9bb1fe17f0d Mon Sep 17 00:00:00 2001 From: helq Date: Fri, 20 Jun 2025 12:45:24 -0400 Subject: [PATCH 094/110] Allowing conc-online to load json files from config path --- src/network-workloads/model-net-mpi-replay.c | 55 ++++++++++++++ .../methods/codes-conc-online-comm-wrkld.C | 76 ++++++++++--------- 2 files changed, 94 insertions(+), 37 deletions(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 5d42f2b6..0d9eea3d 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -90,10 +90,17 @@ static lp_io_handle io_handle; static unsigned int lp_io_use_suffix = 0; static int do_lp_io = 0; +/* Workload JSON file mapping structure */ +struct codes_workload_json_mapping { + char workload_type[MAX_NAME_LENGTH_WKLD]; + char json_path[8192]; +}; + /* variables for loading multiple applications */ char workloads_conf_file[8192]; char workloads_timer_file[8192]; char workloads_period_file[8192]; +char workload_json_files[8192]; char alloc_file[8192]; int num_traces_of_job[MAX_JOBS]; int is_job_synthetic[MAX_JOBS]; //0 if job is not synthetic 1 if job is @@ -105,6 +112,8 @@ int period_count[MAX_JOBS]; double period_time[MAX_JOBS][MAX_PERIODS_PER_APP]; float period_interval[MAX_JOBS][MAX_PERIODS_PER_APP]; char file_name_of_job[MAX_JOBS][8192]; +struct codes_workload_json_mapping workload_json_mappings[MAX_JOBS]; +int workload_json_mapping_count; tw_stime max_elapsed_time_per_job[MAX_JOBS] = {0}; @@ -2563,6 +2572,20 @@ void nw_test_init(nw_state* s, tw_lp* lp) { strcpy(oc_params.workload_name, file_name_of_job[lid.job]); } + + /* Look up custom JSON path for this workload */ + oc_params.file_path[0] = '\0'; + char * wrkl_name_settings = oc_params.workload_name; + if(strncmp("conceptual", oc_params.workload_name, 10) == 0) { + wrkl_name_settings = "conceptual"; + } + for(int i = 0; i < workload_json_mapping_count; i++) { + if(strcmp(workload_json_mappings[i].workload_type, wrkl_name_settings) == 0) { + strcpy(oc_params.file_path, workload_json_mappings[i].json_path); + break; + } + } + /*TODO: nprocs is different for dumpi and online workload. for * online, it is the number of ranks to be simulated. */ // printf("conc-online num_traces_of_job %d\n", num_traces_of_job[lid.job]); @@ -2667,6 +2690,7 @@ void nw_test_init(nw_state* s, tw_lp* lp) } if (iter_predictor && !am_i_synthetic) { + assert(s->wrkld_id != -1); int const ending_iter = codes_workload_get_final_iteration(s->wrkld_id, s->app_id, s->local_rank); if (ending_iter == -1) { tw_warning(TW_LOC, "Predictor for non-synthetic job cannot be initialized. app id=%d", s->app_id); @@ -3802,6 +3826,7 @@ const tw_optdef app_opt [] = TWOPT_CHAR("workload_file", workload_file, "workload file name"), TWOPT_CHAR("alloc_file", alloc_file, "allocation file name"), TWOPT_CHAR("workload_conf_file", workloads_conf_file, "workload config file name"), + TWOPT_CHAR("workload_json_files", workload_json_files, "workload json files mapping file name"), TWOPT_CHAR("link_failure_file", g_nm_link_failure_filepath, "filepath for override of link failure file from configuration for supporting models"), TWOPT_CHAR("workload_timer_file", workloads_timer_file, "workload timer file name (for starting/pausing/stopping synthetic traffic)"), TWOPT_CHAR("workload_period_file", workloads_period_file, "workload periods file name (for changing the per-job synthetic traffic load at specified periods/times)"), @@ -4026,9 +4051,12 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) return -1; } + bool is_conc_enabled = false; + /* Xin: Currently rendezvous protocol cannot work with Conceptual online workloads */ if(strcmp(workload_type, "conc-online") == 0) { EAGER_THRESHOLD = INT64_MAX; + is_conc_enabled = true; } jobmap_ctx = NULL; // make sure it's NULL if it's not used @@ -4133,6 +4161,33 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) } fclose(period_file); } + + /* Load workload JSON files mapping if specified */ + if(is_conc_enabled && strlen(workload_json_files) > 0) + { + FILE *json_file = fopen(workload_json_files, "r"); + if(!json_file) + tw_error(TW_LOC, "\n Could not open file %s ", workload_json_files); + + workload_json_mapping_count = 0; + + while(!feof(json_file) && workload_json_mapping_count < MAX_JOBS) + { + if(fscanf(json_file, "%s %s", + workload_json_mappings[workload_json_mapping_count].workload_type, + workload_json_mappings[workload_json_mapping_count].json_path) == 2) + { + workload_json_mapping_count++; + } + } + fclose(json_file); + + if(enable_debug) + printf("\n Loaded %d workload JSON mappings\n", workload_json_mapping_count); + } + if(!is_conc_enabled && strlen(workload_json_files) > 0) { + printf("\n Conceptual online worloads will not run, thus, we won't read any json files from --workload_json_files\n"); + } } else { diff --git a/src/workload/methods/codes-conc-online-comm-wrkld.C b/src/workload/methods/codes-conc-online-comm-wrkld.C index cdd1705d..96f93764 100644 --- a/src/workload/methods/codes-conc-online-comm-wrkld.C +++ b/src/workload/methods/codes-conc-online-comm-wrkld.C @@ -1839,6 +1839,44 @@ static void workload_caller(void * arg) } } +static void determine_workload_paths(const char* workload_name, const char* custom_json_path, string& swm_path, string& conc_path, bool& isconc) +{ + /* First check if custom JSON path is provided through file_path parameter */ + if(custom_json_path && strlen(custom_json_path) > 0) { + if(strncmp(workload_name, "conceptual", 10) == 0) { + conc_path.append(custom_json_path); + isconc = 1; + } else { + swm_path.append(custom_json_path); + } + return; + } + + /* Fall back to hardcoded paths */ + swm_path.append(SWM_DATAROOTDIR); + if(strcmp(workload_name, "lammps") == 0) { + swm_path.append("/lammps_workload.json"); + } else if(strcmp(workload_name, "nekbone") == 0) { + swm_path.append("/workload.json"); + } else if(strcmp(workload_name, "milc") == 0) { + swm_path.append("/milc_skeleton.json"); + } else if(strcmp(workload_name, "nearest_neighbor") == 0) { + swm_path.append("/skeleton.json"); + } else if(strcmp(workload_name, "incast") == 0) { + swm_path.append("/incast.json"); + } else if(strcmp(workload_name, "incast1") == 0) { + swm_path.append("/incast1.json"); + } else if(strcmp(workload_name, "incast2") == 0) { + swm_path.append("/incast2.json"); + } else if(strncmp(workload_name, "conceptual", 10) == 0) { + conc_path.append(UNION_DATADIR); + conc_path.append("/conceptual.json"); + isconc = 1; + } else { + tw_error(TW_LOC, "\n Undefined workload type %s ", workload_name); + } +} + static int comm_online_workload_load(const void * params, int app_id, int rank) { /* LOAD parameters from JSON file*/ @@ -1867,43 +1905,7 @@ static int comm_online_workload_load(const void * params, int app_id, int rank) bool isconc=0; // printf("workload name: %s\n", o_params->workload_name); - swm_path.append(SWM_DATAROOTDIR); - if(strcmp(o_params->workload_name, "lammps") == 0) - { - swm_path.append("/lammps_workload.json"); - } - else if(strcmp(o_params->workload_name, "nekbone") == 0) - { - swm_path.append("/workload.json"); - } - else if(strcmp(o_params->workload_name, "milc") == 0) - { - swm_path.append("/milc_skeleton.json"); - } - else if(strcmp(o_params->workload_name, "nearest_neighbor") == 0) - { - swm_path.append("/skeleton.json"); - } - else if(strcmp(o_params->workload_name, "incast") == 0) - { - swm_path.append("/incast.json"); - } - else if(strcmp(o_params->workload_name, "incast1") == 0) - { - swm_path.append("/incast1.json"); - } - else if(strcmp(o_params->workload_name, "incast2") == 0) - { - swm_path.append("/incast2.json"); - } - else if(strncmp(o_params->workload_name, "conceptual", 10) == 0) - { - conc_path.append(UNION_DATADIR); - conc_path.append("/conceptual.json"); - isconc = 1; - } - else - tw_error(TW_LOC, "\n Undefined workload type %s ", o_params->workload_name); + determine_workload_paths(o_params->workload_name, o_params->file_path, swm_path, conc_path, isconc); // printf("\nUnion jason path %s\n", conc_path.c_str()); if(isconc){ From 25ab4c9b081c01f95ae915ad7af14736fd50987a Mon Sep 17 00:00:00 2001 From: helq Date: Fri, 20 Jun 2025 17:57:31 -0400 Subject: [PATCH 095/110] If we pass on a `workload_json_files` conf file, we allow a job to take a different name The idea of this change is to be able to have a configuration file like: ``` 20 milc1 1 0 15 conceptual-jacobi3d-5 1 0 ``` While the workload_json_files allow us to tell CODES where to look for the json configuration files: ``` milc1 path-to/milc1.json conceptual-jacobi3d-5 path-to/my-conceptual-jacobi3d.json ``` --- src/network-workloads/model-net-mpi-replay.c | 6 +-- .../methods/codes-conc-online-comm-wrkld.C | 54 ++++++++++++------- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 0d9eea3d..36bed9f8 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -2575,12 +2575,8 @@ void nw_test_init(nw_state* s, tw_lp* lp) /* Look up custom JSON path for this workload */ oc_params.file_path[0] = '\0'; - char * wrkl_name_settings = oc_params.workload_name; - if(strncmp("conceptual", oc_params.workload_name, 10) == 0) { - wrkl_name_settings = "conceptual"; - } for(int i = 0; i < workload_json_mapping_count; i++) { - if(strcmp(workload_json_mappings[i].workload_type, wrkl_name_settings) == 0) { + if(strcmp(workload_json_mappings[i].workload_type, oc_params.workload_name) == 0) { strcpy(oc_params.file_path, workload_json_mappings[i].json_path); break; } diff --git a/src/workload/methods/codes-conc-online-comm-wrkld.C b/src/workload/methods/codes-conc-online-comm-wrkld.C index 96f93764..42b4b5c7 100644 --- a/src/workload/methods/codes-conc-online-comm-wrkld.C +++ b/src/workload/methods/codes-conc-online-comm-wrkld.C @@ -1839,41 +1839,41 @@ static void workload_caller(void * arg) } } -static void determine_workload_paths(const char* workload_name, const char* custom_json_path, string& swm_path, string& conc_path, bool& isconc) +static void determine_workload_paths(online_comm_params const * o_params, string& swm_path, string& conc_path, bool& isconc) { /* First check if custom JSON path is provided through file_path parameter */ - if(custom_json_path && strlen(custom_json_path) > 0) { - if(strncmp(workload_name, "conceptual", 10) == 0) { - conc_path.append(custom_json_path); + if(strlen(o_params->file_path) > 0) { + if(strncmp(o_params->workload_name, "conceptual", 10) == 0) { + conc_path.append(o_params->file_path); isconc = 1; } else { - swm_path.append(custom_json_path); + swm_path.append(o_params->file_path); } return; } /* Fall back to hardcoded paths */ swm_path.append(SWM_DATAROOTDIR); - if(strcmp(workload_name, "lammps") == 0) { + if(strcmp(o_params->workload_name, "lammps") == 0) { swm_path.append("/lammps_workload.json"); - } else if(strcmp(workload_name, "nekbone") == 0) { + } else if(strcmp(o_params->workload_name, "nekbone") == 0) { swm_path.append("/workload.json"); - } else if(strcmp(workload_name, "milc") == 0) { + } else if(strcmp(o_params->workload_name, "milc") == 0) { swm_path.append("/milc_skeleton.json"); - } else if(strcmp(workload_name, "nearest_neighbor") == 0) { + } else if(strcmp(o_params->workload_name, "nearest_neighbor") == 0) { swm_path.append("/skeleton.json"); - } else if(strcmp(workload_name, "incast") == 0) { + } else if(strcmp(o_params->workload_name, "incast") == 0) { swm_path.append("/incast.json"); - } else if(strcmp(workload_name, "incast1") == 0) { + } else if(strcmp(o_params->workload_name, "incast1") == 0) { swm_path.append("/incast1.json"); - } else if(strcmp(workload_name, "incast2") == 0) { + } else if(strcmp(o_params->workload_name, "incast2") == 0) { swm_path.append("/incast2.json"); - } else if(strncmp(workload_name, "conceptual", 10) == 0) { + } else if(strncmp(o_params->workload_name, "conceptual", 10) == 0) { conc_path.append(UNION_DATADIR); conc_path.append("/conceptual.json"); isconc = 1; } else { - tw_error(TW_LOC, "\n Undefined workload type %s ", workload_name); + tw_error(TW_LOC, "\n Undefined workload type %s ", o_params->workload_name); } } @@ -1905,7 +1905,7 @@ static int comm_online_workload_load(const void * params, int app_id, int rank) bool isconc=0; // printf("workload name: %s\n", o_params->workload_name); - determine_workload_paths(o_params->workload_name, o_params->file_path, swm_path, conc_path, isconc); + determine_workload_paths(o_params, swm_path, conc_path, isconc); // printf("\nUnion jason path %s\n", conc_path.c_str()); if(isconc){ @@ -1915,8 +1915,16 @@ static int comm_online_workload_load(const void * params, int app_id, int rank) // printf("workload_name: %s\n", o_params->workload_name); union_bench_param *tmp_params = (union_bench_param *) calloc(1, sizeof(union_bench_param)); - strcpy(tmp_params->conc_program, &o_params->workload_name[11]); - child = root.get_child(tmp_params->conc_program); + child = root.get_child(&o_params->workload_name[11]); + + // if we were given a path, we read the type of workload from the config + bool const has_path = o_params->file_path[0] != '\0'; + if (has_path) { + strcpy(tmp_params->conc_program, child.get_child("argv").begin()->second.data().c_str()); + } else { + strcpy(tmp_params->conc_program, &o_params->workload_name[11]); + } + tmp_params->conc_argc = child.get("argc"); int i = 0; BOOST_FOREACH(boost::property_tree::ptree::value_type &v, child.get_child("argv")) @@ -1931,7 +1939,7 @@ static int comm_online_workload_load(const void * params, int app_id, int rank) } catch(std::exception & e) { - printf("%s \n", e.what()); + printf("Exception when reading UNION/Conceptual json config %s: %s\n", conc_path.c_str(), e.what()); return -1; } } @@ -1939,12 +1947,18 @@ static int comm_online_workload_load(const void * params, int app_id, int rank) try { std::ifstream jsonFile(swm_path.c_str()); boost::property_tree::json_parser::read_json(jsonFile, root); - uint32_t process_cnt = root.get("jobs.size", 1); cpu_freq = root.get("jobs.cfg.cpu_freq") / 1e9; + + // if we were given a path, we read the type of workload from the config + bool const has_path = o_params->file_path[0] != '\0'; + if (has_path) { + strcpy(o_params->workload_name, root.get("jobs.cfg.app").c_str()); + strcpy(my_ctx->sctx.workload_name, o_params->workload_name); + } } catch(std::exception & e) { - printf("%s \n", e.what()); + printf("Exception when reading SWM json config %s: %s\n", swm_path.c_str(), e.what()); return -1; } my_ctx->sctx.isconc = 0; From 64c6cce74c3eb03abdaec251e4369eab0bbbd2a7 Mon Sep 17 00:00:00 2001 From: helq Date: Mon, 23 Jun 2025 18:54:51 -0400 Subject: [PATCH 096/110] Extending iterator predictor to predict when to restart the simulation --- .../app-iteration-predictor/common.h | 14 +- src/network-workloads/model-net-mpi-replay.c | 55 +++++--- .../app-iteration-predictor/average.c | 125 ++++++++++++++---- 3 files changed, 150 insertions(+), 44 deletions(-) diff --git a/codes/surrogate/app-iteration-predictor/common.h b/codes/surrogate/app-iteration-predictor/common.h index d2eabc99..5f5e7b96 100644 --- a/codes/surrogate/app-iteration-predictor/common.h +++ b/codes/surrogate/app-iteration-predictor/common.h @@ -17,9 +17,19 @@ extern "C" { * Iteration application prediction machinery. Notice that any of these predictors have to know how many iterations to run in total, thus they need data about the number of steps the application will take. */ +enum NODE_TYPE { + NODE_TYPE_unassigned = 0, + NODE_TYPE_background_noise = 1, + NODE_TYPE_app = 2, +}; + struct app_iter_node_config { int app_id; - int app_ending_iter; + enum NODE_TYPE type; + union { + // To be used by NODE_TYPE_app only + int app_ending_iter; + }; }; // This returns how much to skip ahead and when to restart @@ -45,8 +55,8 @@ typedef void (*feed_pred_iter_f) (tw_lp * lp, int nw_id_in_pe, int iteration_id, typedef void (*end_pred_iter_f) (tw_lp * lp, int nw_id_in_pe, double time); // Tells the predictor that the application has stopped running typedef struct iteration_pred (*predict_pred_iter_f) (tw_lp * lp, int nw_id_in_pe); // Get prediction typedef void (*predict_pred_iter_rc_f) (tw_lp * lp, int nw_id_in_pe); // Reverse prediction (reverse state of predictor one prediction) -// Director calls to predictor module typedef bool (*have_we_hit_switch_f) (tw_lp * lp, int nw_id_in_pe, int iteration_id); // Are we ready to switch to a future iterationº +// Director calls to predictor module typedef bool (*is_predictor_read_f) (void); // Checking if it is a good time to switch (enough data has been collected or we have received some notification of an application ending, forcing us to restart collecting data). This might trigger an MPI_Allreduce call, thus has to be called by all PEs! typedef void (*reset_pred_iter_f) (void); // Resets the predictor (eg, average) typedef struct fast_forward_values (*prepare_fast_forward_f) (void); // Checking if it is a good time to switch (enough data has been collected) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 36bed9f8..539b8299 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -434,6 +434,7 @@ struct nw_message int saved_syn_length; int saved_perm; // Used by PERMUTATION unsigned long saved_prev_switch; // Used by PERMUTATION + unsigned long long saved_gen_data; } gen; // For CLI_BCKGND_ARRIVE and MPI_SEND_ARRIVED_CB @@ -907,10 +908,9 @@ static void gen_synthetic_tr_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_lp s->saved_perm_dest = m->rc.gen.saved_perm; tw_rand_reverse_unif(lp->rng); } - int i; - for (i=0; i < m->rc.gen.saved_syn_length; i++){ + s->gen_data = m->rc.gen.saved_gen_data; + for (int i=0; i < m->rc.gen.saved_syn_length; i++){ model_net_event_rc2(lp, &m->event_rc); - s->gen_data -= payload_sz; num_syn_bytes_sent -= payload_sz; s->num_bytes_sent -= payload_sz; s->ross_sample.num_bytes_sent -= payload_sz; @@ -925,6 +925,9 @@ static void gen_synthetic_tr_rc(nw_state * s, tw_bf * bf, nw_message * m, tw_lp s->saved_perm_dest = m->rc.gen.saved_perm; tw_rand_reverse_unif(lp->rng); } + if (bf->c13) { + iter_predictor->model.predict_rc(lp, s->nw_id_in_pe); + } } /* generate synthetic traffic */ @@ -1078,6 +1081,9 @@ static void gen_synthetic_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * l length = 0; } } + + m->rc.gen.saved_gen_data = s->gen_data; + if(length > 0) { // m->event_array_rc = (model_net_event_return) malloc(length * sizeof(model_net_event_return)); @@ -1112,10 +1118,19 @@ static void gen_synthetic_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * l /* New event after MEAN_INTERVAL */ tw_stime ts = mean_interval_of_job[s->app_id]; - tw_event * e; - nw_message * m_new; - e = tw_event_new(lp->gid, ts, lp); - m_new = (struct nw_message*)tw_event_data(e); + if (iter_predictor && iter_predictor->model.have_we_hit_switch(lp, s->nw_id_in_pe, 0)) { // background synthetic lps have no iterations + bf->c13 = 1; + struct iteration_pred iter_pred = iter_predictor->model.predict(lp, s->nw_id_in_pe); + double const restarting_background_at = iter_pred.restart_at; + // this check is necessary because we don't rely on iteration count for switch like applications do + if (restarting_background_at > tw_now(lp)) { + long const periods_to_jump = ceil((restarting_background_at - tw_now(lp)) / mean_interval_of_job[s->app_id]); + ts *= periods_to_jump; + s->gen_data += periods_to_jump * (length + payload_sz); + } + } + tw_event * e = tw_event_new(lp->gid, ts, lp); + nw_message * m_new = (struct nw_message*)tw_event_data(e); m_new->msg_type = CLI_BCKGND_GEN; tw_event_send(e); @@ -2625,9 +2640,6 @@ void nw_test_init(nw_state* s, tw_lp* lp) s->compute_time = 0; s->elapsed_time = 0; - s->app_id = lid.job; - s->local_rank = lid.rank; - bool am_i_synthetic = false; if(strncmp(file_name_of_job[lid.job], "synthetic", 9) == 0) { @@ -2685,17 +2697,26 @@ void nw_test_init(nw_state* s, tw_lp* lp) } } - if (iter_predictor && !am_i_synthetic) { - assert(s->wrkld_id != -1); - int const ending_iter = codes_workload_get_final_iteration(s->wrkld_id, s->app_id, s->local_rank); - if (ending_iter == -1) { - tw_warning(TW_LOC, "Predictor for non-synthetic job cannot be initialized. app id=%d", s->app_id); - } else { + if (iter_predictor) { + if (am_i_synthetic) { struct app_iter_node_config conf = { .app_id = s->app_id, - .app_ending_iter = ending_iter, + .type = NODE_TYPE_background_noise, }; iter_predictor->model.init(lp, s->nw_id_in_pe, &conf); + } else { + assert(s->wrkld_id != -1); + int const ending_iter = codes_workload_get_final_iteration(s->wrkld_id, s->app_id, s->local_rank); + if (ending_iter == -1) { + tw_warning(TW_LOC, "Predictor for non-synthetic job cannot be initialized. app id=%d", s->app_id); + } else { + struct app_iter_node_config conf = { + .app_id = s->app_id, + .type = NODE_TYPE_app, + .app_ending_iter = ending_iter, + }; + iter_predictor->model.init(lp, s->nw_id_in_pe, &conf); + } } } diff --git a/src/surrogate/app-iteration-predictor/average.c b/src/surrogate/app-iteration-predictor/average.c index db098307..eb6355bd 100644 --- a/src/surrogate/app-iteration-predictor/average.c +++ b/src/surrogate/app-iteration-predictor/average.c @@ -1,10 +1,13 @@ #include "surrogate/app-iteration-predictor/average.h" #include "codes/codes.h" +#include "surrogate/app-iteration-predictor/common.h" #include #include #include +#include -#define master_printf(str, ...) if (g_tw_mynode == 0) { printf(str, __VA_ARGS__); } +#define master_printf(...) if (g_tw_mynode == 0) { printf(__VA_ARGS__); } +#define master_printf_if(val, ...) if (val && g_tw_mynode == 0) { printf(__VA_ARGS__); } static struct avg_app_config my_config = {0}; @@ -24,7 +27,8 @@ enum APP_STATUS { }; struct app_data { - int num_nodes; + enum NODE_TYPE type; + int num_nodes; // nodes in PE int nodes_with_enough_iters; int ending_iteration; // last iteration the simulation will run (aka, num of iterations) int nodes_that_have_ended; @@ -39,6 +43,15 @@ struct app_data { static struct app_data * arr_app_data = NULL; // array containing info for all apps static bool ready_to_skip = false; +static inline char const * string_node_type(enum NODE_TYPE type) { + switch (type) { + case NODE_TYPE_unassigned: return "Unassigned app"; + case NODE_TYPE_background_noise: return "Background noise/synthetic pattern"; + case NODE_TYPE_app: return "App that runs on predictable iterations"; + default: return "Unknown type!"; + } +} + static void find_max_iter_per_app(int * save_last_iter); static inline void mpi_allreduce_int_max(int const * local_data, int * result_data, int count); @@ -64,27 +77,54 @@ static void model_calls_init(tw_lp * lp, int nw_id_in_pe, struct app_iter_node_c // Storing app data info arr_app_data[config->app_id].num_nodes++; + + if (arr_app_data[config->app_id].type == NODE_TYPE_unassigned) { + arr_app_data[config->app_id].type = config->type; + } else if (arr_app_data[config->app_id].type != config->type) { + tw_error(TW_LOC, "Two different ranks for application %d have signaded different compute node types. LP ID %d is of type '%s', but app had been configured as '%s'", lp->gid, string_node_type(arr_app_data[config->app_id].type), string_node_type(config->type)); + } + + if (config->type == NODE_TYPE_background_noise) { + return; // nothing left to set for synthetic workloads + } + if (arr_app_data[config->app_id].ending_iteration == INT_MIN) { arr_app_data[config->app_id].ending_iteration = config->app_ending_iter; - } else { - if (arr_app_data[config->app_id].ending_iteration != config->app_ending_iter) { - tw_error(TW_LOC, "Two different ranks for application %d have differing total iterations they will run (%d != %d)", config->app_id, config->app_ending_iter, arr_app_data[config->app_id].ending_iteration); - } + } else if (arr_app_data[config->app_id].ending_iteration != config->app_ending_iter) { + tw_error(TW_LOC, "Two different ranks for application %d have differing total iterations they will run (%d != %d)", config->app_id, config->app_ending_iter, arr_app_data[config->app_id].ending_iteration); } } +static inline void assert_app_initialized(int nw_id_in_pe) { + int const app_id = app_id_for(nw_id_in_pe); + if (app_id == -1) { + assert(arr_app_data[app_id].type == NODE_TYPE_unassigned); + tw_error(TW_LOC, "Predictor for node was not initialized! Node ID (on PE) %d", nw_id_in_pe); + } +} static void model_calls_feed(tw_lp * lp, int nw_id_in_pe, int iter, double iteration_time) { (void) lp; assert(my_config.num_nodes_in_pe > (size_t) nw_id_in_pe); - if (app_id_for(nw_id_in_pe) == -1) { - tw_error(TW_LOC, "Predictor for node was not initialized! Node ID (on PE) %d", nw_id_in_pe); + assert_app_initialized(nw_id_in_pe); + + int const app_id = app_id_for(nw_id_in_pe); + + // We should only be handling non-synthetic workloads (aka, no background noise) + static bool shown_warning = false; + if (!shown_warning && arr_app_data[app_id].type == NODE_TYPE_background_noise) { + shown_warning = true; + tw_warning(TW_LOC, "`feed` has been called in App %d, which was determined to be Background traffic (aka, a synthetic workload)", app_id); + return; } + + assert(arr_app_data[app_id].type == NODE_TYPE_app); struct node_data * node_data = &arr_node_data[nw_id_in_pe]; - if (node_data->last_iter >= iter) { // we only collect iteration data past the previous `last_iter` + // we only collect iteration data past the previous `last_iter` + if (node_data->last_iter >= iter) { return; } - if (arr_app_data[node_data->app_id].status != APP_STATUS_running) { + if (arr_app_data[app_id].status != APP_STATUS_running) { tw_warning(TW_LOC, "Attempting to feed data to application predictor for an application that has either been marked as completed or not configured"); } node_data->acc_iteration_time += iteration_time - node_data->prev_iteration_time; @@ -93,13 +133,13 @@ static void model_calls_feed(tw_lp * lp, int nw_id_in_pe, int iter, double itera node_data->last_iter = iter; // We've hit the required number of iterations to feed our predictor if (node_data->acc_iters == my_config.num_iters_to_collect) { - arr_app_data[node_data->app_id].nodes_with_enough_iters++; + arr_app_data[app_id].nodes_with_enough_iters++; } } static void model_calls_ended(tw_lp * lp, int nw_id_in_pe, double iteration_time) { - assert(app_id_for(nw_id_in_pe) != -1); + assert_app_initialized(nw_id_in_pe); struct app_data * app_data = &arr_app_data[app_id_for(nw_id_in_pe)]; app_data->nodes_that_have_ended++; if (app_data->nodes_that_have_ended == app_data->num_nodes) { @@ -110,7 +150,7 @@ static void model_calls_ended(tw_lp * lp, int nw_id_in_pe, double iteration_time static struct iteration_pred model_calls_predict(tw_lp * lp, int nw_id_in_pe) { assert(my_config.num_nodes_in_pe > (size_t) nw_id_in_pe); - assert(app_id_for(nw_id_in_pe) != -1); + assert_app_initialized(nw_id_in_pe); struct app_data * app_data = &arr_app_data[app_id_for(nw_id_in_pe)]; return (struct iteration_pred) { .resume_at_iter = app_data->pred.resume_at_iter, @@ -151,10 +191,24 @@ static void reset_with(bool const * app_just_ended) { static bool model_calls_have_we_hit_switch(tw_lp * lp, int nw_id_in_pe, int iteration_id) { assert(my_config.num_nodes_in_pe > (size_t) nw_id_in_pe); - int const app_id = app_id_for(nw_id_in_pe); - if (ready_to_skip && iteration_id == arr_app_data[app_id].pred.jump_at_iter) { - return true; + assert_app_initialized(nw_id_in_pe); + + if (!ready_to_skip) { + return false; } + + struct app_data * app_data = &arr_app_data[app_id_for(nw_id_in_pe)]; + switch (app_data->type) { + case NODE_TYPE_background_noise: + return true; + case NODE_TYPE_app: + if (iteration_id == app_data->pred.jump_at_iter) { + return true; + } + default: + break; + } + return false; } @@ -173,7 +227,7 @@ static inline void post_init_share_ending_iteration(void) { if (app_data->ending_iteration == INT_MIN) { if (ending_iteration[i] == INT_MIN) { app_data->status = APP_STATUS_completed_everywhere; - master_printf("Workload/app %d has not been configured to be tracked by iteration predictor (it might be a synthetic workload)\n", i); + master_printf_if(app_data->type == NODE_TYPE_unassigned, "Workload/app %d has not been configured to be tracked by iteration predictor (it might be a synthetic workload)\n", i); } else { // The application has "completed" in this PE already! app_data->status = APP_STATUS_just_completed; @@ -416,19 +470,39 @@ static double find_latest_restart_time(bool const * is_running, double const * a return last_to_finish; } +static double find_earliest_restart_time(bool const * is_running, double const * apps_restart_at_time) { + // Compute last application to restart (this is restarting_at) + double first_to_finish = DBL_MAX; + for (int i = 0; i < my_config.num_apps; i++) { + if (is_running[i] && first_to_finish > apps_restart_at_time[i]) { + first_to_finish = apps_restart_at_time[i]; + } + } + return first_to_finish; +} + static void set_app_prediction_data( bool const * is_running, int const * last_iter, int const * apps_restart_at_iter, - double const * apps_restart_at_time) { + double const * apps_restart_at_time, + double const earliest_app_restart) { // Set values for iteration to restart at and iterations to jump for each application for (int i = 0; i < my_config.num_apps; i++) { - if (!is_running[i]) { - continue; + switch (arr_app_data[i].type) { + case NODE_TYPE_unassigned: + break; + case NODE_TYPE_background_noise: + arr_app_data[i].pred.restart_at = earliest_app_restart; + break; + case NODE_TYPE_app: + if (is_running[i]) { + arr_app_data[i].pred.jump_at_iter = last_iter[i] + 1; + arr_app_data[i].pred.resume_at_iter = apps_restart_at_iter[i]; + arr_app_data[i].pred.restart_at = apps_restart_at_time[i]; + } + break; } - arr_app_data[i].pred.jump_at_iter = last_iter[i] + 1; - arr_app_data[i].pred.resume_at_iter = apps_restart_at_iter[i]; - arr_app_data[i].pred.restart_at = apps_restart_at_time[i]; } } @@ -456,7 +530,8 @@ static struct fast_forward_values director_calls_prepare_fast_forward_jump(void) bool worth_switching = compute_restart_params(is_running, avg_iter_time, last_iter, last_iter_time, switch_time, apps_restart_at_time, apps_restart_at_iter); // b. Compute last application to restart (this is restarting_at) - double last_to_finish = find_latest_restart_time(is_running, apps_restart_at_time); + double const last_to_finish = find_latest_restart_time(is_running, apps_restart_at_time); + double const first_to_finish = find_earliest_restart_time(is_running, apps_restart_at_time); // c. If the number of iterations to skip is zero for any app, force reset of predictor tracking if (!worth_switching) { @@ -467,7 +542,7 @@ static struct fast_forward_values director_calls_prepare_fast_forward_jump(void) } // 3. Set values for iteration to restart at and iterations to jump for each application - set_app_prediction_data(is_running, last_iter, apps_restart_at_iter, apps_restart_at_time); + set_app_prediction_data(is_running, last_iter, apps_restart_at_iter, apps_restart_at_time, first_to_finish); ready_to_skip = true; return (struct fast_forward_values) { From b992e4a45abaabea0d16c4d553ab3f7f8024c9e3 Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 24 Jun 2025 12:39:57 -0400 Subject: [PATCH 097/110] Making post_init_share_ending_iteration intent clearer --- .../app-iteration-predictor/average.c | 57 ++++++++++++++----- 1 file changed, 44 insertions(+), 13 deletions(-) diff --git a/src/surrogate/app-iteration-predictor/average.c b/src/surrogate/app-iteration-predictor/average.c index eb6355bd..1dfae210 100644 --- a/src/surrogate/app-iteration-predictor/average.c +++ b/src/surrogate/app-iteration-predictor/average.c @@ -7,7 +7,6 @@ #include #define master_printf(...) if (g_tw_mynode == 0) { printf(__VA_ARGS__); } -#define master_printf_if(val, ...) if (val && g_tw_mynode == 0) { printf(__VA_ARGS__); } static struct avg_app_config my_config = {0}; @@ -212,6 +211,20 @@ static bool model_calls_have_we_hit_switch(tw_lp * lp, int nw_id_in_pe, int iter return false; } +static inline void find_app_types(enum NODE_TYPE * app_type) { + int app_type_here[my_config.num_apps]; + for (int i = 0; i < my_config.num_apps; i++) { + app_type_here[i] = arr_app_data[i].type; + } + int app_type_int[my_config.num_apps]; + mpi_allreduce_int_max(app_type_here, app_type_int, my_config.num_apps); + + // Convert back to enums + for (int i = 0; i < my_config.num_apps; i++) { + app_type[i] = app_type_int[i]; + } +} + static inline void post_init_share_ending_iteration(void) { // Sharing ending_iteration results across PEs int ending_iteration_here[my_config.num_apps]; @@ -221,20 +234,38 @@ static inline void post_init_share_ending_iteration(void) { int ending_iteration[my_config.num_apps]; mpi_allreduce_int_max(ending_iteration_here, ending_iteration, my_config.num_apps); + enum NODE_TYPE app_type[my_config.num_apps]; + find_app_types(app_type); + // Checking that total iterations are the same across nodes for (int i = 0; i < my_config.num_apps; i++) { - struct app_data * app_data = &arr_app_data[i]; - if (app_data->ending_iteration == INT_MIN) { - if (ending_iteration[i] == INT_MIN) { - app_data->status = APP_STATUS_completed_everywhere; - master_printf_if(app_data->type == NODE_TYPE_unassigned, "Workload/app %d has not been configured to be tracked by iteration predictor (it might be a synthetic workload)\n", i); - } else { - // The application has "completed" in this PE already! - app_data->status = APP_STATUS_just_completed; - } - app_data->ending_iteration = ending_iteration[i]; - } else if (ending_iteration[i] != app_data->ending_iteration) { - tw_error(TW_LOC, "Two different ranks for application %d (on different PEs) have differing total iterations they will run (%d != %d)", i, ending_iteration[i], app_data->ending_iteration); + struct app_data * app_data_here = &arr_app_data[i]; + switch (app_type[i]) { + case NODE_TYPE_unassigned: + assert(app_data_here->type == NODE_TYPE_unassigned); + master_printf("Workload/app %d has not been configured to be tracked by iteration predictor\n", i); + app_data_here->status = APP_STATUS_completed_everywhere; + break; + case NODE_TYPE_background_noise: + if (app_data_here->type == NODE_TYPE_app) { + tw_error(TW_LOC, "Two different ranks for application %d (on different PEs) have signaled conflicting node type (here: application, other: background noise)", i); + } + // We assume the background noise stays the same forever, thus we can think of it as not running. But if the background noise were to change, we would have to keep it APP_STATUS_running. And, possibly, we would have to call .ended() from the background process + app_data_here->status = APP_STATUS_completed_everywhere; + app_data_here->type = NODE_TYPE_background_noise; + break; + case NODE_TYPE_app: + if (app_data_here->type == NODE_TYPE_unassigned) { + // There are no nodes for this application on this PE + app_data_here->status = APP_STATUS_just_completed; + } else if (app_data_here->type == NODE_TYPE_background_noise) { + tw_error(TW_LOC, "Two different ranks for application %d (on different PEs) have signaled conflicting node type (here: background noise, other: application)", i); + } else if (ending_iteration[i] != app_data_here->ending_iteration) { + tw_error(TW_LOC, "Two different ranks for application %d (on different PEs) have differing total iterations they will run (%d != %d)", i, ending_iteration[i], app_data_here->ending_iteration); + } + app_data_here->ending_iteration = ending_iteration[i]; + app_data_here->type = NODE_TYPE_app; + break; } } } From 82a69f81429908a2bc158bdd20e5546cfd102427 Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 24 Jun 2025 18:39:37 -0400 Subject: [PATCH 098/110] Fixed cross-platform fscanf EOF handling Replaced fscanf loop with fgets/sscanf to handle trailing newlines consistently across systems (this bug was silently showing up in the GHC200 system). Also added error reporting for malformed lines. btw, this code was written by Claude and audited by me ;) --- src/network-workloads/model-net-mpi-replay.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 539b8299..27c0ba87 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -4094,13 +4094,15 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) tw_error(TW_LOC, "\n Could not open file %s ", workloads_conf_file); int i = 0; - char ref = '\n'; - while(!feof(name_file)) + char line[1024]; + while(fgets(line, sizeof(line), name_file)) { - //TODO: can we allow for a 2 item line but with defaults for the last two? - ref = fscanf(name_file, "%d %s %d %f", &num_traces_of_job[i], file_name_of_job[i], &qos_level_of_job[i], &mean_interval_of_job[i]); + int const fields = sscanf(line, "%d %s %d %f", &num_traces_of_job[i], file_name_of_job[i], &qos_level_of_job[i], &mean_interval_of_job[i]); + if(fields != 4) { + tw_error(TW_LOC, "Invalid format in %s at line %d: expected 4 fields, got %d", workloads_conf_file, i+1, fields); + } - if(ref != EOF && strncmp(file_name_of_job[i], "synthetic", 9) == 0) + if(strncmp(file_name_of_job[i], "synthetic", 9) == 0) { num_syn_clients = num_traces_of_job[i]; num_net_traces += num_traces_of_job[i]; @@ -4112,7 +4114,7 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) tw_error(TW_LOC, "BISECTION requires and even number of nodes."); } - else if(ref!=EOF) + else { if(enable_debug) printf("\n%d traces of app %s (default qos class: %d)\n", num_traces_of_job[i], file_name_of_job[i], qos_level_of_job[i]); From 3295653de3aa95faeb20cd4011290d4dbb832eb7 Mon Sep 17 00:00:00 2001 From: helq Date: Wed, 25 Jun 2025 20:32:11 -0400 Subject: [PATCH 099/110] Fixing some errors found with valgrind --- src/networks/model-net/dragonfly-dally.C | 8 +++++--- src/surrogate/app-iteration-predictor/average.c | 16 ++++++++++++++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 5465605a..91befa1b 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -4647,9 +4647,10 @@ static void packet_send_rc(terminal_state * s, tw_bf * bf, terminal_dally_messag { int num_qos_levels = s->params->num_qos_levels; + assert(msg->rail_id < s->params->num_rails); if(msg->qos_reset1) s->qos_status[msg->rail_id][0] = Q_ACTIVE; - if(msg->qos_reset2) + if(msg->qos_reset2 && s->params->num_qos_levels > 1) s->qos_status[msg->rail_id][1] = Q_ACTIVE; if(msg->last_saved_qos >= 0) @@ -6435,9 +6436,10 @@ static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_m int src_term_id = msg->dfdally_src_terminal_id; int app_id = msg->saved_app_id; + assert(output_port < s->params->radix); if(msg->qos_reset1) s->qos_status[output_port][0] = Q_ACTIVE; - if(msg->qos_reset2) + if(msg->qos_reset2 && s->params->num_qos_levels > 1) s->qos_status[output_port][1] = Q_ACTIVE; if(msg->last_saved_qos) @@ -8682,7 +8684,7 @@ static Connection dfdally_prog_adaptive_routing(router_state *s, tw_bf *bf, term vector< Connection > poss_nonmin_next_stops = get_legal_nonminimal_stops(s, bf, msg, lp, fdest_router_id); Connection best_min_conn, best_nonmin_conn; - ConnectionType conn_type_of_mins, conn_type_of_nonmins; + ConnectionType conn_type_of_mins = CONN_LOCAL, conn_type_of_nonmins = CONN_LOCAL; if (poss_min_next_stops.size() > 0) { diff --git a/src/surrogate/app-iteration-predictor/average.c b/src/surrogate/app-iteration-predictor/average.c index 1dfae210..b529be7f 100644 --- a/src/surrogate/app-iteration-predictor/average.c +++ b/src/surrogate/app-iteration-predictor/average.c @@ -169,6 +169,9 @@ static void reset_with(bool const * app_just_ended) { for (int i=0; i < my_config.num_nodes_in_pe; i++) { struct node_data * node_data = &arr_node_data[i]; + if (node_data->app_id == -1) { + continue; + } node_data->acc_iters = 0; node_data->acc_iteration_time = 0; if (node_data->last_iter < arr_app_data[node_data->app_id].pred.resume_at_iter) { @@ -349,6 +352,9 @@ static void find_avg_iteration_time(double * save_avg_time) { for (int i=0; i < my_config.num_nodes_in_pe; i++) { struct node_data * node_data = &arr_node_data[i]; int const app_id = node_data->app_id; + if (app_id == -1) { + continue; + } acc_iter_time_here[app_id] += node_data->acc_iteration_time; acc_iters_here[app_id] += node_data->acc_iters; } @@ -407,6 +413,9 @@ static void find_max_iter_per_app(int * save_last_iter) { for (int i=0; i < my_config.num_nodes_in_pe; i++) { struct node_data * node_data = &arr_node_data[i]; int const app_id = node_data->app_id; + if (app_id == -1) { + continue; + } if (last_iter_here[app_id] < node_data->last_iter) { last_iter_here[app_id] = node_data->last_iter; } @@ -422,6 +431,9 @@ static void find_avg_time_for_max_iter(double * save_last_iter_time, int const * for (int i=0; i < my_config.num_nodes_in_pe; i++) { struct node_data * node_data = &arr_node_data[i]; int const app_id = node_data->app_id; + if (app_id == -1) { + continue; + } if (node_data->last_iter == last_iter[app_id]) { acc_last_iter_time[app_id] += node_data->prev_iteration_time; acc_iters_here[app_id]++; @@ -482,8 +494,8 @@ static bool compute_restart_params( apps_restart_at_time[i] = last_iter_time[i] + iters_to_skip * avg_iter_time[i]; apps_restart_at_iter[i] = last_iter[i] + iters_to_skip; - // if we are not skipping at least two iterations, there is no point in trying to fastforward - if (iters_to_skip <= 2) { + // if we are not skipping at least one iteration, there is no point in trying to fastforward + if (iters_to_skip <= 1) { worth_switching = false; } } From 73cdbd54237addce142718e62ebba08d412301fc Mon Sep 17 00:00:00 2001 From: helq Date: Thu, 26 Jun 2025 17:27:59 -0400 Subject: [PATCH 100/110] Updating CODES-compile-instructions.sh --- CODES-compile-instructions.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CODES-compile-instructions.sh b/CODES-compile-instructions.sh index ac15c087..694f0c95 100644 --- a/CODES-compile-instructions.sh +++ b/CODES-compile-instructions.sh @@ -21,13 +21,13 @@ CUR_DIR="$PWD" ##### Downloading everything ##### -git clone https://github.com/codes-org/codes --branch=kronos-develop -git clone https://github.com/ross-org/ross --depth=20 --branch=at_gvt_arbitrary_function +git clone https://github.com/codes-org/codes --branch=director-app-automatic +git clone https://github.com/ross-org/ross --depth=100 --branch=gvt-hook-util if [ $swm_enable = 1 ]; then git clone https://github.com/pmodels/argobots --depth=1 # This version is one commit ahead - git clone https://github.com/helq/swm-workloads --depth=1 --branch=fix-global-variable-rem + git clone https://github.com/helq/swm-workloads --branch=total-iterations-communication fi if [ $union_enable = 1 ]; then @@ -35,7 +35,7 @@ if [ $union_enable = 1 ]; then curl -L https://sourceforge.net/projects/conceptual/files/conceptual/1.5.1b/conceptual-1.5.1b.tar.gz -o conceptual-1.5.1b.tar.gz tar xvf conceptual-1.5.1b.tar.gz # Downloading union - git clone https://github.com/SPEAR-UIC/Union + git clone https://github.com/helq/Union --branch=total-iterations-communication fi ##### COMPILING ##### From 667dc2847306a327ec7280715ece78250e620b7e Mon Sep 17 00:00:00 2001 From: helq Date: Thu, 26 Jun 2025 20:09:11 -0400 Subject: [PATCH 101/110] Saving to file when an iteration has been skipped by the surrogate --- src/network-workloads/model-net-mpi-replay.c | 21 ++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 27c0ba87..58a27098 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -427,7 +427,7 @@ struct nw_message int resume_at_iter; } fwd; - // A different struct for each type of MPI_NW_EVENTS + // A different struct for each type of MPI_NW_EVENTS (it can be used for the commit or the reverse handler) union { // For CLI_BCKGND_GEN struct { @@ -472,6 +472,7 @@ struct nw_message // CODES_WK_END and CODES_WK_MARK struct { double saved_marker_time; + bool was_skipped; } mark; }; } mpi_next; @@ -489,6 +490,11 @@ struct nw_message struct { int64_t saved_num_bytes; } mpi_ack; + + // For SURR_SKIP_ITERATION + struct { + double saved_marker_time; + } surr_skip; } rc; }; @@ -1216,6 +1222,7 @@ static void skip_to_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * { struct codes_workload_op mpi_op; int resume_at_iter = m->fwd.resume_at_iter; + m->rc.surr_skip.saved_marker_time = tw_now(lp); // consuming all events until indicated iteration is reached bool reached_end = false; @@ -1242,6 +1249,7 @@ static void skip_to_iteration(nw_state * s, tw_lp * lp, tw_bf * bf, nw_message * tw_event *e = tw_event_new(lp->gid, 0.0, lp); nw_message* msg = (nw_message*) tw_event_data(e); msg->msg_type = MPI_OP_GET_NEXT; + msg->rc.mpi_next.mark.was_skipped = true; tw_event_send(e); } @@ -1761,6 +1769,7 @@ static void codes_issue_next_event(tw_lp* lp) msg = (nw_message*)tw_event_data(e); msg->msg_type = MPI_OP_GET_NEXT; + msg->rc.mpi_next.mark.was_skipped = false; tw_event_send(e); } @@ -1799,6 +1808,7 @@ static void codes_exec_comp_delay( e = tw_event_new( lp->gid, ts , lp ); msg = (nw_message*)tw_event_data(e); msg->msg_type = MPI_OP_GET_NEXT; + msg->rc.mpi_next.mark.was_skipped = false; tw_event_send(e); } @@ -3346,9 +3356,11 @@ void nw_test_event_handler_commit(nw_state* s, tw_bf * bf, nw_message * m, tw_lp break; case CODES_WK_MARK: - fprintf(iteration_log, "ITERATION %d node %llu job %d rank %d time %lf\n", m->mpi_op->u.send.tag, LLU(s->nw_id), s->app_id, s->local_rank, m->rc.mpi_next.mark.saved_marker_time); - if (iter_predictor) { - iter_predictor->model.feed(lp, s->nw_id_in_pe, m->mpi_op->u.send.tag, m->rc.mpi_next.mark.saved_marker_time); + if (! m->rc.mpi_next.mark.was_skipped) { + fprintf(iteration_log, "ITERATION %d node %llu job %d rank %d time %lf\n", m->mpi_op->u.send.tag, LLU(s->nw_id), s->app_id, s->local_rank, m->rc.mpi_next.mark.saved_marker_time); + if (iter_predictor) { + iter_predictor->model.feed(lp, s->nw_id_in_pe, m->mpi_op->u.send.tag, m->rc.mpi_next.mark.saved_marker_time); + } } if (OUTPUT_MARKS) @@ -3374,6 +3386,7 @@ void nw_test_event_handler_commit(nw_state* s, tw_bf * bf, nw_message * m, tw_lp free(m->mpi_op); break; case SURR_SKIP_ITERATION: + fprintf(iteration_log, "SKIPPED TO ITERATION %d node %llu job %d rank %d time %lf\n", m->fwd.resume_at_iter, LLU(s->nw_id), s->app_id, s->local_rank, m->rc.surr_skip.saved_marker_time); break; case CLI_BCKGND_CHANGE: From 789c4693170dd2aa648210dcd39c0e8e24e8a78f Mon Sep 17 00:00:00 2001 From: helq Date: Sun, 29 Jun 2025 18:37:16 -0400 Subject: [PATCH 102/110] Updating compilation instructions --- CODES-compile-instructions.sh | 4 ++-- README.md | 42 +++++++++++++++++------------------ 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/CODES-compile-instructions.sh b/CODES-compile-instructions.sh index 694f0c95..0856d597 100644 --- a/CODES-compile-instructions.sh +++ b/CODES-compile-instructions.sh @@ -21,8 +21,8 @@ CUR_DIR="$PWD" ##### Downloading everything ##### -git clone https://github.com/codes-org/codes --branch=director-app-automatic -git clone https://github.com/ross-org/ross --depth=100 --branch=gvt-hook-util +git clone https://github.com/codes-org/codes --branch=develop +git clone https://github.com/ross-org/ross --depth=100 --branch=develop if [ $swm_enable = 1 ]; then git clone https://github.com/pmodels/argobots --depth=1 diff --git a/README.md b/README.md index a86424be..7740a222 100644 --- a/README.md +++ b/README.md @@ -21,27 +21,6 @@ The easiest way to build CODES is using our automated compilation script that ha The script will create a new directory with all dependencies and CODES compiled and ready to use. -## Features - -CODES provides comprehensive simulation capabilities for: - -### Network Topologies -- **Dragonfly**: High-radix interconnect with adaptive routing (most up to date) -- **Torus**: Multi-dimensional torus networks -- **Fat-tree**: Hierarchical tree topologies -- **Express Mesh**: Enhanced mesh networks -- **Simple P2P**: Point-to-point networks - -### Workload Generation -- **SWM and UNION**: Workload generation -- **MPI trace replay**: Support for DUMPI traces -- **Synthetic patterns**: Uniform random, nearest neighbor, and custom patterns - -### Multi-fidelity Simulation -- **Network surrogate models**: Switch between high-fidelity and surrogate modes -- **Application surrogate models**: Accelerate application-level simulation -- **Adaptive directors**: Intelligent switching between simulation modes - ## Prerequisites - **MPI**: OpenMPI or MPICH for parallel execution @@ -127,6 +106,27 @@ bash run-experiment.sh path-to-experiment/script.sh A folder will be created under `path-to-experiment/results` containing the result of running the experiment. +## Features + +CODES provides comprehensive simulation capabilities for: + +### Network Topologies +- **Dragonfly**: High-radix interconnect with adaptive routing (most up to date) +- **Torus**: Multi-dimensional torus networks +- **Fat-tree**: Hierarchical tree topologies +- **Express Mesh**: Enhanced mesh networks +- **Simple P2P**: Point-to-point networks + +### Workload Generation +- **SWM and UNION**: Workload generation +- **MPI trace replay**: Support for DUMPI traces +- **Synthetic patterns**: Uniform random, nearest neighbor, and custom patterns + +### Multi-fidelity Simulation +- **Network surrogate models**: Switch between high-fidelity and surrogate modes +- **Application surrogate models**: Accelerate application-level simulation +- **Adaptive directors**: Intelligent switching between simulation modes + ## Contributing Before contributing please run the full test suite. Some tests verify our determinism guarantees (every simulation should be reproducible), i.e, the number of net events processed between two runs in parallel mode should be the same. We want to keep our determinism guarantees forever. Non-deterministic simulations are often the result of faulty reverse handlers, which have caused serious bug failures and hundreds of hours of debugging. From 45453ad0766070ced950e3c4fa634c708b8880c2 Mon Sep 17 00:00:00 2001 From: helq Date: Sun, 29 Jun 2025 18:38:05 -0400 Subject: [PATCH 103/110] Max iteration per app should be computed across all MPI ranks --- src/network-workloads/model-net-mpi-replay.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 58a27098..018c4337 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -4387,6 +4387,7 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) double total_avg_send_time, total_max_send_time; double total_avg_wait_time, total_max_wait_time; double total_avg_recv_time, total_max_recv_time; + double g_max_elapsed_time_per_job[MAX_JOBS]; double g_total_syn_data = 0; MPI_Reduce(&num_bytes_sent, &total_bytes_sent, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); @@ -4403,6 +4404,7 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) MPI_Reduce(&avg_wait_time, &total_avg_wait_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES); MPI_Reduce(&avg_send_time, &total_avg_send_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES); MPI_Reduce(&total_syn_data, &g_total_syn_data, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(max_elapsed_time_per_job, g_max_elapsed_time_per_job, num_total_jobs, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_CODES); assert(num_net_traces); @@ -4421,19 +4423,20 @@ int modelnet_mpi_replay(MPI_Comm comm, int* argc, char*** argv ) printf("Per App Max Elapsed Times:\n"); for(int i = 0; i < num_total_jobs; i++) { - printf("\tApp %d: %.4f\n",i,max_elapsed_time_per_job[i]); + printf("\tApp %d: %.4f\n",i,g_max_elapsed_time_per_job[i]); } printf("----------\n"); if(synthetic_pattern == PERMUTATION) printf("\n Threshold for random permutation %ld ", perm_switch_thresh); + + if(is_synthetic) + printf("\n Synthetic traffic stats: data received per proc %lf bytes \n", g_total_syn_data/num_syn_clients); } if (do_lp_io){ int ret = lp_io_flush(io_handle, MPI_COMM_CODES); assert(ret == 0 || !"lp_io_flush failure"); } - if(is_synthetic) - printf("\n PE%d: Synthetic traffic stats: data received per proc %lf bytes \n",rank, g_total_syn_data/num_syn_clients); model_net_report_stats(net_id); From 242707e42e32d52ca9f5cc48c2558d61f2752948 Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 15 Jul 2025 15:58:43 -0400 Subject: [PATCH 104/110] Updating compilation instructions --- CODES-compile-instructions.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CODES-compile-instructions.sh b/CODES-compile-instructions.sh index 0856d597..76d4c6a1 100644 --- a/CODES-compile-instructions.sh +++ b/CODES-compile-instructions.sh @@ -35,7 +35,7 @@ if [ $union_enable = 1 ]; then curl -L https://sourceforge.net/projects/conceptual/files/conceptual/1.5.1b/conceptual-1.5.1b.tar.gz -o conceptual-1.5.1b.tar.gz tar xvf conceptual-1.5.1b.tar.gz # Downloading union - git clone https://github.com/helq/Union --branch=total-iterations-communication + git clone https://github.com/helq/Union --branch=master fi ##### COMPILING ##### @@ -85,7 +85,7 @@ if [ $union_enable = 1 ]; then pushd Union ./prepare.sh - ./configure --disable-shared --with-conceptual="$(realpath ../conceptual-1.5.1b/install)" --prefix="$(realpath ./install)" CC=mpicc CXX=mpicxx + PYTHON=python2 ./configure --disable-shared --with-conceptual="$(realpath ../conceptual-1.5.1b/install)" --with-conceptual-src="$(realpath ../conceptual-1.5.1b)" --prefix="$(realpath ./install)" CC=mpicc CXX=mpicxx make -j4 && make install err=$? [[ $err -ne 0 ]] && exit $err From 34275e3a947e6862c28313ed5ae73961f4d1a56f Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 22 Jul 2025 12:04:11 -0400 Subject: [PATCH 105/110] Removing support for Autoconf Autoconf is now far too outdated and keeping it on synch with the changes made in the CMakefile --- LICENSE.md | 22 - Make.rules | 40 - Makefile.am | 103 - configure.ac | 262 -- m4/ax_check_compile_flag.m4 | 75 - m4/ax_compare_version.m4 | 177 - m4/ax_cxx_compile_stdcxx.m4 | 972 ---- m4/ax_prog_bison.m4 | 68 - m4/ax_prog_bison_clfeatures.m4 | 137 - m4/ax_prog_flex.m4 | 62 - m4/libtool.m4 | 7986 -------------------------------- m4/ltoptions.m4 | 384 -- m4/ltsugar.m4 | 123 - m4/ltversion.m4 | 23 - m4/lt~obsolete.m4 | 98 - m4/m4_ax_boost_base.m4 | 301 -- m4/m4_ax_boost_filesystem.m4 | 118 - m4/m4_ax_boost_system.m4 | 121 - m4/pkg.m4 | 233 - maint/codes-net.pc.in | 12 - maint/codes.pc.in | 32 - prepare.sh | 4 - 22 files changed, 11353 deletions(-) delete mode 100644 LICENSE.md delete mode 100644 Make.rules delete mode 100644 Makefile.am delete mode 100755 configure.ac delete mode 100644 m4/ax_check_compile_flag.m4 delete mode 100644 m4/ax_compare_version.m4 delete mode 100644 m4/ax_cxx_compile_stdcxx.m4 delete mode 100755 m4/ax_prog_bison.m4 delete mode 100755 m4/ax_prog_bison_clfeatures.m4 delete mode 100755 m4/ax_prog_flex.m4 delete mode 100644 m4/libtool.m4 delete mode 100644 m4/ltoptions.m4 delete mode 100644 m4/ltsugar.m4 delete mode 100644 m4/ltversion.m4 delete mode 100644 m4/lt~obsolete.m4 delete mode 100644 m4/m4_ax_boost_base.m4 delete mode 100644 m4/m4_ax_boost_filesystem.m4 delete mode 100644 m4/m4_ax_boost_system.m4 delete mode 100644 m4/pkg.m4 delete mode 100644 maint/codes-net.pc.in delete mode 100644 maint/codes.pc.in delete mode 100755 prepare.sh diff --git a/LICENSE.md b/LICENSE.md deleted file mode 100644 index a6de0500..00000000 --- a/LICENSE.md +++ /dev/null @@ -1,22 +0,0 @@ -************** Copyright © 2019, UChicago Argonne, LLC *************** - -All Rights Reserved - -Software Name: CO-Design of Exascale Storage and Network Architectures (CODES) - -By: Argonne National Laboratory, Rensselaer Polytechnic Institute, Lawrence Livermore National Laboratory, and Illinois Institute of Technology - -OPEN SOURCE LICENSE - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. -3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. - - -****************************************************************************************************** -DISCLAIMER - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************************************************** diff --git a/Make.rules b/Make.rules deleted file mode 100644 index aedcde1e..00000000 --- a/Make.rules +++ /dev/null @@ -1,40 +0,0 @@ -# flex & bison deps -# -%.c %.h: %.l - $(AM_V_GEN)$(LEX) --header-file=$(@:.c=.h) -o $(@:.h=.c) $< \ - || ( $(RM) $(basename $@).h $(basename $@).c ; exit 1) - -# -# specific rule for codesparser generation; we want the header to land in -# the codes/ directory because it will be installed for use by other repos -#src/iokernellang/codesparser.c codes/codesparser.h: src/iokernellang/codesparser.y -# mkdir -p codes -# @test "x$(bison_ok)" != "yes" || echo "*** WARNING *** Bison version might be too old" -# $(AM_V_GEN)$(YACC) --defines=codes/codesparser.h -o src/iokernellang/codesparser.c $< \ -# || ( $(RM) $(basename $@).h $(basename $@).c ; exit 1) - - -%.c %.h: %.y - @test "x$(bison_ok)" != "yes" || echo "*** WARNING *** Bison version might be too old" - $(AM_V_GEN)$(YACC) --defines=$(@:.c=.h) -o $(@:.h=.c) $< \ - || ( $(RM) $(basename $@).h $(basename $@).c ; exit 1) - - - -# %.y: %.y.in Makefile -# $(AM_V_GEN)$(SED) -e 's,[@]CODES_PURE_PARSER_DEFINES[@],$(CODES_PURE_PARSER_DEFINES),g' \ -# -e 's,[@]CODES_PUSH_PARSER_DEFINES[@],$(CODES_PUSH_PARSER_DEFINES),g' \ -# < src/common/iokernellang/codesparser.y.in > src/common/iokernellang/codesparser.y - -# -# Output dist version -# -.phony: distversion -distversion: - @echo $(VERSION) - -# -# Easy way to build unit tests without running them -# -.phony: tests -tests: $(check_PROGRAMS) diff --git a/Makefile.am b/Makefile.am deleted file mode 100644 index f18d8d63..00000000 --- a/Makefile.am +++ /dev/null @@ -1,103 +0,0 @@ -AUTOMAKE_OPTIONS = foreign -ACLOCAL_AMFLAGS = -I m4 - -bin_PROGRAMS = -bin_SCRIPTS = -noinst_LIBRARIES = -noinst_PROGRAMS = -lib_LTLIBRARIES = -noinst_HEADERS = -TESTS = -check_PROGRAMS = -EXTRA_PROGRAMS = -CLEANFILES = $(bin_SCRIPTS) -EXTRA_DIST = -BUILT_SOURCES = -AM_LDFLAGS = - - -# pkgconfig files -pkgconfigdir = $(libdir)/pkgconfig -pkgconfig_DATA = maint/codes.pc -$(pkgconfig_DATA): config.status - -EXTRA_DIST += \ - prepare.sh LICENSE.md configure.ac uc-codes.cfg reformat.sh \ - misc/README misc/ptrn_loggp-2.4.6.patch CONTRIBUTORS.md \ - README.md - -AM_CPPFLAGS = -I$(top_srcdir)/src ${ROSS_CFLAGS} - -AM_CFLAGS = - -AM_CXXFLAGS = $(AM_CFLAGS) - -LDADD = $(lib_LTLIBRARIES) $(ROSS_LIBS) - -include Make.rules - -include $(top_srcdir)/scripts/Makefile.subdir -include $(top_srcdir)/src/Makefile.subdir -include $(top_srcdir)/tests/Makefile.subdir -include $(top_srcdir)/doc/Makefile.subdir - -if USE_DEBUG -AM_CPPFLAGS += -g -AM_CFLAGS += -g -AM_CXXFLAGS += -g -endif - -if USE_DARSHAN -AM_CPPFLAGS += ${DARSHAN_CFLAGS} -DUSE_DARSHAN=1 -src_libcodes_la_SOURCES += src/workload/methods/codes-darshan3-io-wrkld.c -LDADD += ${DARSHAN_LIBS} -TESTS += tests/workload/darshan-dump.sh -endif - -if USE_RECORDER -AM_CPPFLAGS += ${RECORDER_CPPFLAGS} -src_libcodes_la_SOURCES += src/workload/methods/codes-recorder-io-wrkld.c -endif - -if USE_ONLINE -AM_CPPFLAGS += ${ARGOBOTS_CFLAGS} -DUSE_ONLINE=1 -LDADD += ${ARGOBOTS_LIBS} -if USE_SWM -AM_CPPFLAGS += ${SWM_CFLAGS} -DUSE_SWM=1 -LDADD += ${SWM_LIBS} -src_libcodes_la_SOURCES += src/workload/methods/codes-online-comm-wrkld.C -endif -if USE_UNION -src_libcodes_la_SOURCES += src/workload/methods/codes-conc-online-comm-wrkld.C -AM_CPPFLAGS += ${UNION_CFLAGS} ${SWM_CFLAGS} -DUSE_UNION=1 -LDADD += ${UNION_LIBS} ${SWM_LIBS} -endif -endif - -if USE_DUMPI -AM_CPPFLAGS += ${DUMPI_CFLAGS} -DUSE_DUMPI=1 -src_libcodes_la_SOURCES += src/workload/methods/codes-dumpi-trace-nw-wrkld.c -TESTS += tests/modelnet-test-dragonfly-traces.sh \ - tests/modelnet-test-dragonfly-custom-traces.sh \ - tests/modelnet-test-slimfly-traces.sh \ - tests/modelnet-test-torus-traces.sh -check_PROGRAMS += src/network-workloads/model-net-mpi-replay -if USE_CORTEX -if USE_PYTHON -if USE_CORTEX_PYTHON -AM_CPPFLAGS += ${CORTEX_PYTHON_CFLAGS} -DENABLE_CORTEX_PYTHON=1 -LDADD += ${CORTEX_PYTHON_LIBS} -AM_CPPFLAGS += ${PYTHON_CFLAGS} -LDADD += ${PYTHON_LIBS} -endif -endif -AM_CPPFLAGS += ${CORTEX_CFLAGS} -DENABLE_CORTEX=1 -LDADD += ${CORTEX_LIBS} -endif -LDADD += ${DUMPI_LIBS} -endif - -if USE_RDAMARIS -AM_CPPFLAGS += ${ROSS_Damaris_CFLAGS} -DUSE_RDAMARIS=1 -LDADD += ${ROSS_Damaris_LIBS} -endif diff --git a/configure.ac b/configure.ac deleted file mode 100755 index 2c4b7fea..00000000 --- a/configure.ac +++ /dev/null @@ -1,262 +0,0 @@ -# -*- Autoconf -*- -# Process this file with autoconf to produce a configure script. - -AC_PREREQ([2.67]) -AC_INIT([codes], [1.4.2], [http://trac.mcs.anl.gov/projects/codes/newticket],[],[http://www.mcs.anl.gov/projects/codes/]) -LT_INIT - -AC_CANONICAL_TARGET -AC_CANONICAL_SYSTEM -AC_CANONICAL_HOST - -AM_INIT_AUTOMAKE([foreign subdir-objects -Wall]) - -m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) - -AC_CONFIG_SRCDIR([doc/BUILD_STEPS]) -AC_CONFIG_HEADERS([codes_config.h]) - -AX_PROG_BISON([],[AC_MSG_ERROR([could not find required package bison])]) -AX_PROG_FLEX([],[AC_MSG_ERROR([could not find required package flex])]) -AC_SUBST([BISON]) -AC_SUBST([FLEX]) - -# Checks for programs. -AC_PROG_CC -AM_PROG_CC_C_O -AC_PROG_CXX -AC_PROG_CXXCPP -AC_PROG_RANLIB - -PKG_PROG_PKG_CONFIG - -if test -z "$PKG_CONFIG" ; then - AC_MSG_ERROR([pkg-config is required. Please install the pkg-config program on your PATH or set the PKG_CONFIG environment variable to the appropriate package.]) -fi - -# Check for C99 -AC_PROG_CC_C99 - -AC_REQUIRE_CPP - -# Checks for header files. -AC_HEADER_STDC -AC_CHECK_HEADERS([stdlib.h string.h unistd.h execinfo.h pthread.h malloc.h]) - -# Checks for typedefs, structures, and compiler characteristics. -AC_C_CONST -AC_C_INLINE -AC_TYPE_INT8_T -AC_TYPE_INT16_T -AC_TYPE_INT32_T -AC_TYPE_INT64_T -AC_TYPE_UINT8_T -AC_TYPE_UINT16_T -AC_TYPE_UINT32_T -AC_TYPE_UINT64_T -AC_TYPE_SIZE_T -AC_TYPE_SSIZE_T - -# Add warning flags by default -AX_CHECK_COMPILE_FLAG([-Wall], [CFLAGS="$CFLAGS -Wall"]) -AX_CHECK_COMPILE_FLAG([-Wextra], [CFLAGS="$CFLAGS -Wextra"]) -AX_CHECK_COMPILE_FLAG([-Wshadow], [CFLAGS="$CFLAGS -Wshadow"]) - -# Checks for library functions. -AC_CHECK_FUNCS([memset]) -AC_CHECK_LIB([pthread],[pthread_create],,[AC_MSG_ERROR([Could not find pthread_create!])]) -AC_CHECK_LIB([m],[sqrt],,[AC_MSG_ERROR([Could not find sqrt!])]) - - -AX_PROG_BISON_CLFEATURES([],[AC_MSG_WARN([Could not find bison])], -[bison_ok="yes"], [bison_ok="no"]) -AC_SUBST([bison_ok]) - -dnl Check to see if CC is an MPI compiler -AC_MSG_CHECKING(whether the mpicc compiler works) -AC_TRY_COMPILE([#include ], [int ret = MPI_Init(0, (void*)0)], - AC_MSG_RESULT(yes), - AC_MSG_RESULT(no) - AC_MSG_ERROR(CC doesnt appear to be a valid MPI compiler. See INSTALL document or try adding CC=mpicc to your configure command line.) -) - -# check for ROSS -PKG_CHECK_MODULES_STATIC([ROSS], [ross], [], - [AC_MSG_ERROR([Could not find working ross installation via pkg-config])]) - -#check for Damaris -AC_ARG_WITH([damaris],[AS_HELP_STRING([--with-damaris], - [build with ROSS-Damaris in situ analysis support])], - [use_rdamaris=yes],[use_rdamaris=no]) -if test "x${use_rdamaris}" = xyes ; then - PKG_CHECK_MODULES_STATIC([ROSS_Damaris], [ross-damaris], [], - [AC_MSG_ERROR([Could not find working ROSS-Damaris installation via pkg-config])]) -fi -AM_CONDITIONAL(USE_RDAMARIS, [test "x${use_rdamaris}" = xyes]) - -# check for enable-g -AC_ARG_ENABLE([g],[AS_HELP_STRING([--enable-g], - [Build with GDB symbols])], - [use_debug=yes],[use_debug=no]) -AM_CONDITIONAL(USE_DEBUG, [test "x${use_debug}" = xyes]) - -# check for Darshan -AC_ARG_WITH([darshan],[AS_HELP_STRING([--with-darshan], - [Build with the darshan workload support])], - [use_darshan=yes],[use_darshan=no]) -if test "x${use_darshan}" = xyes ; then - PKG_CHECK_MODULES_STATIC([DARSHAN], [darshan-util], [], - [AC_MSG_ERROR([Could not find working darshan installation via pkg-config])]) - DARSHAN_VER=`pkg-config --modversion darshan-util` - AX_COMPARE_VERSION([$DARSHAN_VER],[ge],[2.3],[], - [AC_MSG_ERROR([Found Darshan $DARSHAN_VER but 2.3 or greater is needed])]) -fi -AM_CONDITIONAL(USE_DARSHAN, [test "x${use_darshan}" = xyes]) - -# check for Argobots -AC_ARG_WITH([online],[AS_HELP_STRING([--with-online@<:@=DIR@:>@], - [Build with the online workloads and argobots support])]) -if test "x${with_online}" != "x" ; then - AM_CONDITIONAL(USE_ONLINE, true) - AX_BOOST_BASE([1.66]) - AX_CXX_COMPILE_STDCXX(11, noext, mandatory) - PKG_CHECK_MODULES_STATIC([ARGOBOTS], [argobots], [], - [AC_MSG_ERROR([Could not find working argobots installation via pkg-config])]) -else - AM_CONDITIONAL(USE_ONLINE, false) -fi - -#check for SWM -AC_ARG_WITH([swm],[AS_HELP_STRING([--with-swm@<:@=DIR@:>@], - [location of SWM installation])]) -if test "x${with_swm}" != "x" ; then - AM_CONDITIONAL(USE_SWM, true) - PKG_CHECK_MODULES_STATIC([SWM], [swm], [], - [AC_MSG_ERROR([Could not find working swm installation via pkg-config])]) - PKG_CHECK_VAR([SWM_DATAROOTDIR], [swm], [datarootdir], [], - [AC_MSG_ERROR[Could not find shared directory in SWM]]) - AC_DEFINE_UNQUOTED([SWM_DATAROOTDIR], ["$SWM_DATAROOTDIR"], [if using json - data files]) -else - AM_CONDITIONAL(USE_SWM, false) -fi - -#check for UNION -AC_ARG_WITH([union],[AS_HELP_STRING([--with-union@<:@=DIR@:>@], - [location of Union installation])]) -if test "x${with_union}" != "x" ; then - AM_CONDITIONAL(USE_UNION, true) - PKG_CHECK_MODULES_STATIC([UNION], [union], [], - [AC_MSG_ERROR([Could not find working Union installation via pkg-config])]) - PKG_CHECK_VAR([UNION_DATADIR], [union], [datarootdir], [], - [AC_MSG_ERROR[Could not find shared directory in UNION]]) - AC_DEFINE_UNQUOTED([UNION_DATADIR], ["$UNION_DATADIR"], [if using json data files]) - PKG_CHECK_MODULES_STATIC([SWM], [swm], [], - [AC_MSG_ERROR([Could not find working swm installation via pkg-config])]) - PKG_CHECK_VAR([SWM_DATAROOTDIR], [swm], [datarootdir], [], - [AC_MSG_ERROR[Could not find shared directory in SWM]]) - AC_DEFINE_UNQUOTED([SWM_DATAROOTDIR], ["$SWM_DATAROOTDIR"], [if using json - data files]) -else - AM_CONDITIONAL(USE_UNION, false) -fi - - -# check for Recorder -AM_CONDITIONAL(USE_RECORDER, true) -RECORDER_CPPFLAGS="-DUSE_RECORDER=1" -AC_SUBST(RECORDER_CPPFLAGS) - -#check for Dumpi -AC_ARG_WITH([dumpi],[AS_HELP_STRING([--with-dumpi@<:@=DIR@:>@], - [location of Dumpi installation])]) -if test "x${with_dumpi}" != "x" ; then - CFLAGS="-I${with_dumpi}/include" - LIBS="-L${with_dumpi}/lib/ -lundumpi" - AC_CHECK_LIB([undumpi], - [undumpi_open], [], [AC_MSG_ERROR(Could not find dumpi)]) - AM_CONDITIONAL(USE_DUMPI, true) - DUMPI_CFLAGS="-I${with_dumpi}/include" - DUMPI_LIBS="-L${with_dumpi}/lib/ -lundumpi" - AC_SUBST(DUMPI_LIBS) - AC_SUBST(DUMPI_CFLAGS) -else - AM_CONDITIONAL(USE_DUMPI, false) -fi - -# check for Cortex -AC_ARG_WITH([cortex],[AS_HELP_STRING([--with-cortex@<:@=DIR@:>@], - [location of Cortex installation])]) - -# check for Python -AC_ARG_WITH([python],[AS_HELP_STRING([--with-python@<:@=DIR@:>@], - [location of Python 2.7 installation])]) - -# check for Boost Python -AC_ARG_WITH([boost],[AS_HELP_STRING([--with-boost@<:@=DIR@:>@], - [location of Boost Python installation])]) - -if [ test "x${with_python}" != "x" -a "x${with_boost}" != "x"] ; then - AC_CHECK_FILES([${with_python}/lib/libpython2.7.so ${with_boost}/lib/libboost_python.a], - AM_CONDITIONAL(USE_PYTHON, true), - AC_MSG_ERROR(Could not find Python and/or Boost-Python libraries)) - PYTHON_CFLAGS="-I${with_python}/include -I${with_boost}/include" - PYTHON_LIBS="-L${with_boost}/lib -lboost_python -L${with_python}/lib/ -lpython2.7" - AC_SUBST(PYTHON_LIBS) - AC_SUBST(PYTHON_CFLAGS) -else - AM_CONDITIONAL(USE_PYTHON, false) -fi - -if test "x${with_cortex}" != "x" ; then - AC_CHECK_FILES([${with_cortex}/lib/libcortex.a ${with_cortex}/lib/libcortex-mpich.a], - AM_CONDITIONAL(USE_CORTEX, true), - AC_MSG_ERROR(Could not find Cortex libraries libcortex.a and/or libcortex-mpich.a)) - CORTEX_CFLAGS="-I${with_cortex}/include" - CORTEX_LIBS="-L${with_cortex}/lib/ -lcortex-mpich -lcortex -lstdc++" - AC_SUBST(CORTEX_LIBS) - AC_SUBST(CORTEX_CFLAGS) -else - AM_CONDITIONAL(USE_CORTEX, false) -fi - -if [ test "x${with_cortex}" != "x" -a "x${with_python}" != "x" -a "x${with_boost}" != "x"] ; then - AC_CHECK_FILE([${with_cortex}/lib/libcortex-python.a], - AM_CONDITIONAL(USE_CORTEX_PYTHON, true), - AC_MSG_ERROR(Could not find library libcortex-python.a)) - CORTEX_PYTHON_CFLAGS="-I${with_cortex}/include" - CORTEX_PYTHON_LIBS="-L${with_cortex}/lib/ -lcortex-python" - AC_SUBST(CORTEX_PYTHON_LIBS) - AC_SUBST(CORTEX_PYTHON_CFLAGS) -else - AM_CONDITIONAL(USE_CORTEX_PYTHON, false) -fi - -dnl ====================================================================== -dnl Try harder to be valgrind safe -dnl ====================================================================== -AC_ARG_ENABLE(valgrind-clean, - [AS_HELP_STRING( - [--enable-valgrind-clean], - [Try harder to avoid valgrind warnings]) - ]) - -AS_IF([test "x$enable_valgrind_clean" = "xyes"], [ - AC_DEFINE([VALGRIND], [1], [If enabling valgrind-clean build]) -]) - - -dnl AC_CONFIG_FILES([src/iokernellang/codesparser.y]) -if test "x$srcdir" != "x."; then - AC_CONFIG_LINKS([tests/conf:$srcdir/tests/conf]) -fi - -AC_CONFIG_FILES([Makefile]) - -AC_OUTPUT([maint/codes.pc]) -AC_OUTPUT([src/network-workloads/conf/dragonfly-custom/modelnet-test-dragonfly-1728-nodes.conf]) -AC_OUTPUT([src/network-workloads/conf/dragonfly-plus/modelnet-test-dragonfly-plus.conf]) -AC_OUTPUT([src/network-workloads/conf/dragonfly-dally/modelnet-test-dragonfly-dally.conf]) -AC_OUTPUT([doc/example/tutorial-ping-pong.conf]) - - diff --git a/m4/ax_check_compile_flag.m4 b/m4/ax_check_compile_flag.m4 deleted file mode 100644 index a7680d72..00000000 --- a/m4/ax_check_compile_flag.m4 +++ /dev/null @@ -1,75 +0,0 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT]) -# -# DESCRIPTION -# -# Check whether the given FLAG works with the current language's compiler -# or gives an error. (Warnings, however, are ignored) -# -# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on -# success/failure. -# -# If EXTRA-FLAGS is defined, it is added to the current language's default -# flags (e.g. CFLAGS) when the check is done. The check is thus made with -# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to -# force the compiler to issue an error when a bad flag is given. -# -# INPUT gives an alternative input source to AC_COMPILE_IFELSE. -# -# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this -# macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG. -# -# LICENSE -# -# Copyright (c) 2008 Guido U. Draheim -# Copyright (c) 2011 Maarten Bosmans -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. - -#serial 4 - -AC_DEFUN([AX_CHECK_COMPILE_FLAG], -[AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF -AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl -AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [ - ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS - _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1" - AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])], - [AS_VAR_SET(CACHEVAR,[yes])], - [AS_VAR_SET(CACHEVAR,[no])]) - _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags]) -AS_VAR_IF(CACHEVAR,yes, - [m4_default([$2], :)], - [m4_default([$3], :)]) -AS_VAR_POPDEF([CACHEVAR])dnl -])dnl AX_CHECK_COMPILE_FLAGS - diff --git a/m4/ax_compare_version.m4 b/m4/ax_compare_version.m4 deleted file mode 100644 index 74dc0fdd..00000000 --- a/m4/ax_compare_version.m4 +++ /dev/null @@ -1,177 +0,0 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_compare_version.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_COMPARE_VERSION(VERSION_A, OP, VERSION_B, [ACTION-IF-TRUE], [ACTION-IF-FALSE]) -# -# DESCRIPTION -# -# This macro compares two version strings. Due to the various number of -# minor-version numbers that can exist, and the fact that string -# comparisons are not compatible with numeric comparisons, this is not -# necessarily trivial to do in a autoconf script. This macro makes doing -# these comparisons easy. -# -# The six basic comparisons are available, as well as checking equality -# limited to a certain number of minor-version levels. -# -# The operator OP determines what type of comparison to do, and can be one -# of: -# -# eq - equal (test A == B) -# ne - not equal (test A != B) -# le - less than or equal (test A <= B) -# ge - greater than or equal (test A >= B) -# lt - less than (test A < B) -# gt - greater than (test A > B) -# -# Additionally, the eq and ne operator can have a number after it to limit -# the test to that number of minor versions. -# -# eq0 - equal up to the length of the shorter version -# ne0 - not equal up to the length of the shorter version -# eqN - equal up to N sub-version levels -# neN - not equal up to N sub-version levels -# -# When the condition is true, shell commands ACTION-IF-TRUE are run, -# otherwise shell commands ACTION-IF-FALSE are run. The environment -# variable 'ax_compare_version' is always set to either 'true' or 'false' -# as well. -# -# Examples: -# -# AX_COMPARE_VERSION([3.15.7],[lt],[3.15.8]) -# AX_COMPARE_VERSION([3.15],[lt],[3.15.8]) -# -# would both be true. -# -# AX_COMPARE_VERSION([3.15.7],[eq],[3.15.8]) -# AX_COMPARE_VERSION([3.15],[gt],[3.15.8]) -# -# would both be false. -# -# AX_COMPARE_VERSION([3.15.7],[eq2],[3.15.8]) -# -# would be true because it is only comparing two minor versions. -# -# AX_COMPARE_VERSION([3.15.7],[eq0],[3.15]) -# -# would be true because it is only comparing the lesser number of minor -# versions of the two values. -# -# Note: The characters that separate the version numbers do not matter. An -# empty string is the same as version 0. OP is evaluated by autoconf, not -# configure, so must be a string, not a variable. -# -# The author would like to acknowledge Guido Draheim whose advice about -# the m4_case and m4_ifvaln functions make this macro only include the -# portions necessary to perform the specific comparison specified by the -# OP argument in the final configure script. -# -# LICENSE -# -# Copyright (c) 2008 Tim Toolan -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 11 - -dnl ######################################################################### -AC_DEFUN([AX_COMPARE_VERSION], [ - AC_REQUIRE([AC_PROG_AWK]) - - # Used to indicate true or false condition - ax_compare_version=false - - # Convert the two version strings to be compared into a format that - # allows a simple string comparison. The end result is that a version - # string of the form 1.12.5-r617 will be converted to the form - # 0001001200050617. In other words, each number is zero padded to four - # digits, and non digits are removed. - AS_VAR_PUSHDEF([A],[ax_compare_version_A]) - A=`echo "$1" | sed -e 's/\([[0-9]]*\)/Z\1Z/g' \ - -e 's/Z\([[0-9]]\)Z/Z0\1Z/g' \ - -e 's/Z\([[0-9]][[0-9]]\)Z/Z0\1Z/g' \ - -e 's/Z\([[0-9]][[0-9]][[0-9]]\)Z/Z0\1Z/g' \ - -e 's/[[^0-9]]//g'` - - AS_VAR_PUSHDEF([B],[ax_compare_version_B]) - B=`echo "$3" | sed -e 's/\([[0-9]]*\)/Z\1Z/g' \ - -e 's/Z\([[0-9]]\)Z/Z0\1Z/g' \ - -e 's/Z\([[0-9]][[0-9]]\)Z/Z0\1Z/g' \ - -e 's/Z\([[0-9]][[0-9]][[0-9]]\)Z/Z0\1Z/g' \ - -e 's/[[^0-9]]//g'` - - dnl # In the case of le, ge, lt, and gt, the strings are sorted as necessary - dnl # then the first line is used to determine if the condition is true. - dnl # The sed right after the echo is to remove any indented white space. - m4_case(m4_tolower($2), - [lt],[ - ax_compare_version=`echo "x$A -x$B" | sed 's/^ *//' | sort -r | sed "s/x${A}/false/;s/x${B}/true/;1q"` - ], - [gt],[ - ax_compare_version=`echo "x$A -x$B" | sed 's/^ *//' | sort | sed "s/x${A}/false/;s/x${B}/true/;1q"` - ], - [le],[ - ax_compare_version=`echo "x$A -x$B" | sed 's/^ *//' | sort | sed "s/x${A}/true/;s/x${B}/false/;1q"` - ], - [ge],[ - ax_compare_version=`echo "x$A -x$B" | sed 's/^ *//' | sort -r | sed "s/x${A}/true/;s/x${B}/false/;1q"` - ],[ - dnl Split the operator from the subversion count if present. - m4_bmatch(m4_substr($2,2), - [0],[ - # A count of zero means use the length of the shorter version. - # Determine the number of characters in A and B. - ax_compare_version_len_A=`echo "$A" | $AWK '{print(length)}'` - ax_compare_version_len_B=`echo "$B" | $AWK '{print(length)}'` - - # Set A to no more than B's length and B to no more than A's length. - A=`echo "$A" | sed "s/\(.\{$ax_compare_version_len_B\}\).*/\1/"` - B=`echo "$B" | sed "s/\(.\{$ax_compare_version_len_A\}\).*/\1/"` - ], - [[0-9]+],[ - # A count greater than zero means use only that many subversions - A=`echo "$A" | sed "s/\(\([[0-9]]\{4\}\)\{m4_substr($2,2)\}\).*/\1/"` - B=`echo "$B" | sed "s/\(\([[0-9]]\{4\}\)\{m4_substr($2,2)\}\).*/\1/"` - ], - [.+],[ - AC_WARNING( - [illegal OP numeric parameter: $2]) - ],[]) - - # Pad zeros at end of numbers to make same length. - ax_compare_version_tmp_A="$A`echo $B | sed 's/./0/g'`" - B="$B`echo $A | sed 's/./0/g'`" - A="$ax_compare_version_tmp_A" - - # Check for equality or inequality as necessary. - m4_case(m4_tolower(m4_substr($2,0,2)), - [eq],[ - test "x$A" = "x$B" && ax_compare_version=true - ], - [ne],[ - test "x$A" != "x$B" && ax_compare_version=true - ],[ - AC_WARNING([illegal OP parameter: $2]) - ]) - ]) - - AS_VAR_POPDEF([A])dnl - AS_VAR_POPDEF([B])dnl - - dnl # Execute ACTION-IF-TRUE / ACTION-IF-FALSE. - if test "$ax_compare_version" = "true" ; then - m4_ifvaln([$4],[$4],[:])dnl - m4_ifvaln([$5],[else $5])dnl - fi -]) dnl AX_COMPARE_VERSION diff --git a/m4/ax_cxx_compile_stdcxx.m4 b/m4/ax_cxx_compile_stdcxx.m4 deleted file mode 100644 index 0b6cb3a7..00000000 --- a/m4/ax_cxx_compile_stdcxx.m4 +++ /dev/null @@ -1,972 +0,0 @@ -# =========================================================================== -# https://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_CXX_COMPILE_STDCXX(VERSION, [ext|noext], [mandatory|optional]) -# -# DESCRIPTION -# -# Check for baseline language coverage in the compiler for the specified -# version of the C++ standard. If necessary, add switches to CXX and -# CXXCPP to enable support. VERSION may be '11' (for the C++11 standard) -# or '14' (for the C++14 standard). -# -# The second argument, if specified, indicates whether you insist on an -# extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g. -# -std=c++11). If neither is specified, you get whatever works, with -# preference for an extended mode. -# -# The third argument, if specified 'mandatory' or if left unspecified, -# indicates that baseline support for the specified C++ standard is -# required and that the macro should error out if no mode with that -# support is found. If specified 'optional', then configuration proceeds -# regardless, after defining HAVE_CXX${VERSION} if and only if a -# supporting mode is found. -# -# LICENSE -# -# Copyright (c) 2008 Benjamin Kosnik -# Copyright (c) 2012 Zack Weinberg -# Copyright (c) 2013 Roy Stogner -# Copyright (c) 2014, 2015 Google Inc.; contributed by Alexey Sokolov -# Copyright (c) 2015 Paul Norman -# Copyright (c) 2015 Moritz Klammler -# Copyright (c) 2016, 2018 Krzesimir Nowak -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 9 - -dnl This macro is based on the code from the AX_CXX_COMPILE_STDCXX_11 macro -dnl (serial version number 13). - -AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl - m4_if([$1], [11], [ax_cxx_compile_alternatives="11 0x"], - [$1], [14], [ax_cxx_compile_alternatives="14 1y"], - [$1], [17], [ax_cxx_compile_alternatives="17 1z"], - [m4_fatal([invalid first argument `$1' to AX_CXX_COMPILE_STDCXX])])dnl - m4_if([$2], [], [], - [$2], [ext], [], - [$2], [noext], [], - [m4_fatal([invalid second argument `$2' to AX_CXX_COMPILE_STDCXX])])dnl - m4_if([$3], [], [ax_cxx_compile_cxx$1_required=true], - [$3], [mandatory], [ax_cxx_compile_cxx$1_required=true], - [$3], [optional], [ax_cxx_compile_cxx$1_required=false], - [m4_fatal([invalid third argument `$3' to AX_CXX_COMPILE_STDCXX])]) - AC_LANG_PUSH([C++])dnl - ac_success=no - - m4_if([$2], [noext], [], [dnl - if test x$ac_success = xno; then - for alternative in ${ax_cxx_compile_alternatives}; do - switch="-std=gnu++${alternative}" - cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch]) - AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch, - $cachevar, - [ac_save_CXX="$CXX" - CXX="$CXX $switch" - AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])], - [eval $cachevar=yes], - [eval $cachevar=no]) - CXX="$ac_save_CXX"]) - if eval test x\$$cachevar = xyes; then - CXX="$CXX $switch" - if test -n "$CXXCPP" ; then - CXXCPP="$CXXCPP $switch" - fi - ac_success=yes - break - fi - done - fi]) - - m4_if([$2], [ext], [], [dnl - if test x$ac_success = xno; then - dnl HP's aCC needs +std=c++11 according to: - dnl http://h21007.www2.hp.com/portal/download/files/unprot/aCxx/PDF_Release_Notes/769149-001.pdf - dnl Cray's crayCC needs "-h std=c++11" - for alternative in ${ax_cxx_compile_alternatives}; do - for switch in -std=c++${alternative} +std=c++${alternative} "-h std=c++${alternative}"; do - cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch]) - AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch, - $cachevar, - [ac_save_CXX="$CXX" - CXX="$CXX $switch" - AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])], - [eval $cachevar=yes], - [eval $cachevar=no]) - CXX="$ac_save_CXX"]) - if eval test x\$$cachevar = xyes; then - CXX="$CXX $switch" - if test -n "$CXXCPP" ; then - CXXCPP="$CXXCPP $switch" - fi - ac_success=yes - break - fi - done - if test x$ac_success = xyes; then - break - fi - done - fi]) - AC_LANG_POP([C++]) - if test x$ax_cxx_compile_cxx$1_required = xtrue; then - if test x$ac_success = xno; then - AC_MSG_ERROR([*** A compiler with support for C++$1 language features is required.]) - fi - fi - if test x$ac_success = xno; then - HAVE_CXX$1=0 - AC_MSG_NOTICE([No compiler with C++$1 support was found]) - else - HAVE_CXX$1=1 - AC_DEFINE(HAVE_CXX$1,1, - [define if the compiler supports basic C++$1 syntax]) - fi - AC_SUBST(HAVE_CXX$1) -]) - - -dnl Test body for checking C++11 support - -m4_define([_AX_CXX_COMPILE_STDCXX_testbody_11], - _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 -) - - -dnl Test body for checking C++14 support - -m4_define([_AX_CXX_COMPILE_STDCXX_testbody_14], - _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 - _AX_CXX_COMPILE_STDCXX_testbody_new_in_14 -) - -m4_define([_AX_CXX_COMPILE_STDCXX_testbody_17], - _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 - _AX_CXX_COMPILE_STDCXX_testbody_new_in_14 - _AX_CXX_COMPILE_STDCXX_testbody_new_in_17 -) - -dnl Tests for new features in C++11 - -m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_11], [[ - -// If the compiler admits that it is not ready for C++11, why torture it? -// Hopefully, this will speed up the test. - -#ifndef __cplusplus - -#error "This is not a C++ compiler" - -#elif __cplusplus < 201103L - -#error "This is not a C++11 compiler" - -#else - -namespace cxx11 -{ - - namespace test_static_assert - { - - template - struct check - { - static_assert(sizeof(int) <= sizeof(T), "not big enough"); - }; - - } - - namespace test_final_override - { - - struct Base - { - virtual void f() {} - }; - - struct Derived : public Base - { - virtual void f() override {} - }; - - } - - namespace test_double_right_angle_brackets - { - - template < typename T > - struct check {}; - - typedef check single_type; - typedef check> double_type; - typedef check>> triple_type; - typedef check>>> quadruple_type; - - } - - namespace test_decltype - { - - int - f() - { - int a = 1; - decltype(a) b = 2; - return a + b; - } - - } - - namespace test_type_deduction - { - - template < typename T1, typename T2 > - struct is_same - { - static const bool value = false; - }; - - template < typename T > - struct is_same - { - static const bool value = true; - }; - - template < typename T1, typename T2 > - auto - add(T1 a1, T2 a2) -> decltype(a1 + a2) - { - return a1 + a2; - } - - int - test(const int c, volatile int v) - { - static_assert(is_same::value == true, ""); - static_assert(is_same::value == false, ""); - static_assert(is_same::value == false, ""); - auto ac = c; - auto av = v; - auto sumi = ac + av + 'x'; - auto sumf = ac + av + 1.0; - static_assert(is_same::value == true, ""); - static_assert(is_same::value == true, ""); - static_assert(is_same::value == true, ""); - static_assert(is_same::value == false, ""); - static_assert(is_same::value == true, ""); - return (sumf > 0.0) ? sumi : add(c, v); - } - - } - - namespace test_noexcept - { - - int f() { return 0; } - int g() noexcept { return 0; } - - static_assert(noexcept(f()) == false, ""); - static_assert(noexcept(g()) == true, ""); - - } - - namespace test_constexpr - { - - template < typename CharT > - unsigned long constexpr - strlen_c_r(const CharT *const s, const unsigned long acc) noexcept - { - return *s ? strlen_c_r(s + 1, acc + 1) : acc; - } - - template < typename CharT > - unsigned long constexpr - strlen_c(const CharT *const s) noexcept - { - return strlen_c_r(s, 0UL); - } - - static_assert(strlen_c("") == 0UL, ""); - static_assert(strlen_c("1") == 1UL, ""); - static_assert(strlen_c("example") == 7UL, ""); - static_assert(strlen_c("another\0example") == 7UL, ""); - - } - - namespace test_rvalue_references - { - - template < int N > - struct answer - { - static constexpr int value = N; - }; - - answer<1> f(int&) { return answer<1>(); } - answer<2> f(const int&) { return answer<2>(); } - answer<3> f(int&&) { return answer<3>(); } - - void - test() - { - int i = 0; - const int c = 0; - static_assert(decltype(f(i))::value == 1, ""); - static_assert(decltype(f(c))::value == 2, ""); - static_assert(decltype(f(0))::value == 3, ""); - } - - } - - namespace test_uniform_initialization - { - - struct test - { - static const int zero {}; - static const int one {1}; - }; - - static_assert(test::zero == 0, ""); - static_assert(test::one == 1, ""); - - } - - namespace test_lambdas - { - - void - test1() - { - auto lambda1 = [](){}; - auto lambda2 = lambda1; - lambda1(); - lambda2(); - } - - int - test2() - { - auto a = [](int i, int j){ return i + j; }(1, 2); - auto b = []() -> int { return '0'; }(); - auto c = [=](){ return a + b; }(); - auto d = [&](){ return c; }(); - auto e = [a, &b](int x) mutable { - const auto identity = [](int y){ return y; }; - for (auto i = 0; i < a; ++i) - a += b--; - return x + identity(a + b); - }(0); - return a + b + c + d + e; - } - - int - test3() - { - const auto nullary = [](){ return 0; }; - const auto unary = [](int x){ return x; }; - using nullary_t = decltype(nullary); - using unary_t = decltype(unary); - const auto higher1st = [](nullary_t f){ return f(); }; - const auto higher2nd = [unary](nullary_t f1){ - return [unary, f1](unary_t f2){ return f2(unary(f1())); }; - }; - return higher1st(nullary) + higher2nd(nullary)(unary); - } - - } - - namespace test_variadic_templates - { - - template - struct sum; - - template - struct sum - { - static constexpr auto value = N0 + sum::value; - }; - - template <> - struct sum<> - { - static constexpr auto value = 0; - }; - - static_assert(sum<>::value == 0, ""); - static_assert(sum<1>::value == 1, ""); - static_assert(sum<23>::value == 23, ""); - static_assert(sum<1, 2>::value == 3, ""); - static_assert(sum<5, 5, 11>::value == 21, ""); - static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, ""); - - } - - // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae - // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function - // because of this. - namespace test_template_alias_sfinae - { - - struct foo {}; - - template - using member = typename T::member_type; - - template - void func(...) {} - - template - void func(member*) {} - - void test(); - - void test() { func(0); } - - } - -} // namespace cxx11 - -#endif // __cplusplus >= 201103L - -]]) - - -dnl Tests for new features in C++14 - -m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_14], [[ - -// If the compiler admits that it is not ready for C++14, why torture it? -// Hopefully, this will speed up the test. - -#ifndef __cplusplus - -#error "This is not a C++ compiler" - -#elif __cplusplus < 201402L - -#error "This is not a C++14 compiler" - -#else - -namespace cxx14 -{ - - namespace test_polymorphic_lambdas - { - - int - test() - { - const auto lambda = [](auto&&... args){ - const auto istiny = [](auto x){ - return (sizeof(x) == 1UL) ? 1 : 0; - }; - const int aretiny[] = { istiny(args)... }; - return aretiny[0]; - }; - return lambda(1, 1L, 1.0f, '1'); - } - - } - - namespace test_binary_literals - { - - constexpr auto ivii = 0b0000000000101010; - static_assert(ivii == 42, "wrong value"); - - } - - namespace test_generalized_constexpr - { - - template < typename CharT > - constexpr unsigned long - strlen_c(const CharT *const s) noexcept - { - auto length = 0UL; - for (auto p = s; *p; ++p) - ++length; - return length; - } - - static_assert(strlen_c("") == 0UL, ""); - static_assert(strlen_c("x") == 1UL, ""); - static_assert(strlen_c("test") == 4UL, ""); - static_assert(strlen_c("another\0test") == 7UL, ""); - - } - - namespace test_lambda_init_capture - { - - int - test() - { - auto x = 0; - const auto lambda1 = [a = x](int b){ return a + b; }; - const auto lambda2 = [a = lambda1(x)](){ return a; }; - return lambda2(); - } - - } - - namespace test_digit_separators - { - - constexpr auto ten_million = 100'000'000; - static_assert(ten_million == 100000000, ""); - - } - - namespace test_return_type_deduction - { - - auto f(int& x) { return x; } - decltype(auto) g(int& x) { return x; } - - template < typename T1, typename T2 > - struct is_same - { - static constexpr auto value = false; - }; - - template < typename T > - struct is_same - { - static constexpr auto value = true; - }; - - int - test() - { - auto x = 0; - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - return x; - } - - } - -} // namespace cxx14 - -#endif // __cplusplus >= 201402L - -]]) - - -dnl Tests for new features in C++17 - -m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_17], [[ - -// If the compiler admits that it is not ready for C++17, why torture it? -// Hopefully, this will speed up the test. - -#ifndef __cplusplus - -#error "This is not a C++ compiler" - -#elif __cplusplus <= 201402L - -#error "This is not a C++17 compiler" - -#else - -#if defined(__clang__) - #define REALLY_CLANG -#else - #if defined(__GNUC__) - #define REALLY_GCC - #endif -#endif - -#include -#include -#include - -namespace cxx17 -{ - -#if !defined(REALLY_CLANG) - namespace test_constexpr_lambdas - { - - // TODO: test it with clang++ from git - - constexpr int foo = [](){return 42;}(); - - } -#endif // !defined(REALLY_CLANG) - - namespace test::nested_namespace::definitions - { - - } - - namespace test_fold_expression - { - - template - int multiply(Args... args) - { - return (args * ... * 1); - } - - template - bool all(Args... args) - { - return (args && ...); - } - - } - - namespace test_extended_static_assert - { - - static_assert (true); - - } - - namespace test_auto_brace_init_list - { - - auto foo = {5}; - auto bar {5}; - - static_assert(std::is_same, decltype(foo)>::value); - static_assert(std::is_same::value); - } - - namespace test_typename_in_template_template_parameter - { - - template typename X> struct D; - - } - - namespace test_fallthrough_nodiscard_maybe_unused_attributes - { - - int f1() - { - return 42; - } - - [[nodiscard]] int f2() - { - [[maybe_unused]] auto unused = f1(); - - switch (f1()) - { - case 17: - f1(); - [[fallthrough]]; - case 42: - f1(); - } - return f1(); - } - - } - - namespace test_extended_aggregate_initialization - { - - struct base1 - { - int b1, b2 = 42; - }; - - struct base2 - { - base2() { - b3 = 42; - } - int b3; - }; - - struct derived : base1, base2 - { - int d; - }; - - derived d1 {{1, 2}, {}, 4}; // full initialization - derived d2 {{}, {}, 4}; // value-initialized bases - - } - - namespace test_general_range_based_for_loop - { - - struct iter - { - int i; - - int& operator* () - { - return i; - } - - const int& operator* () const - { - return i; - } - - iter& operator++() - { - ++i; - return *this; - } - }; - - struct sentinel - { - int i; - }; - - bool operator== (const iter& i, const sentinel& s) - { - return i.i == s.i; - } - - bool operator!= (const iter& i, const sentinel& s) - { - return !(i == s); - } - - struct range - { - iter begin() const - { - return {0}; - } - - sentinel end() const - { - return {5}; - } - }; - - void f() - { - range r {}; - - for (auto i : r) - { - [[maybe_unused]] auto v = i; - } - } - - } - - namespace test_lambda_capture_asterisk_this_by_value - { - - struct t - { - int i; - int foo() - { - return [*this]() - { - return i; - }(); - } - }; - - } - - namespace test_enum_class_construction - { - - enum class byte : unsigned char - {}; - - byte foo {42}; - - } - - namespace test_constexpr_if - { - - template - int f () - { - if constexpr(cond) - { - return 13; - } - else - { - return 42; - } - } - - } - - namespace test_selection_statement_with_initializer - { - - int f() - { - return 13; - } - - int f2() - { - if (auto i = f(); i > 0) - { - return 3; - } - - switch (auto i = f(); i + 4) - { - case 17: - return 2; - - default: - return 1; - } - } - - } - -#if !defined(REALLY_CLANG) - namespace test_template_argument_deduction_for_class_templates - { - - // TODO: test it with clang++ from git - - template - struct pair - { - pair (T1 p1, T2 p2) - : m1 {p1}, - m2 {p2} - {} - - T1 m1; - T2 m2; - }; - - void f() - { - [[maybe_unused]] auto p = pair{13, 42u}; - } - - } -#endif // !defined(REALLY_CLANG) - - namespace test_non_type_auto_template_parameters - { - - template - struct B - {}; - - B<5> b1; - B<'a'> b2; - - } - -#if !defined(REALLY_CLANG) - namespace test_structured_bindings - { - - // TODO: test it with clang++ from git - - int arr[2] = { 1, 2 }; - std::pair pr = { 1, 2 }; - - auto f1() -> int(&)[2] - { - return arr; - } - - auto f2() -> std::pair& - { - return pr; - } - - struct S - { - int x1 : 2; - volatile double y1; - }; - - S f3() - { - return {}; - } - - auto [ x1, y1 ] = f1(); - auto& [ xr1, yr1 ] = f1(); - auto [ x2, y2 ] = f2(); - auto& [ xr2, yr2 ] = f2(); - const auto [ x3, y3 ] = f3(); - - } -#endif // !defined(REALLY_CLANG) - -#if !defined(REALLY_CLANG) - namespace test_exception_spec_type_system - { - - // TODO: test it with clang++ from git - - struct Good {}; - struct Bad {}; - - void g1() noexcept; - void g2(); - - template - Bad - f(T*, T*); - - template - Good - f(T1*, T2*); - - static_assert (std::is_same_v); - - } -#endif // !defined(REALLY_CLANG) - - namespace test_inline_variables - { - - template void f(T) - {} - - template inline T g(T) - { - return T{}; - } - - template<> inline void f<>(int) - {} - - template<> int g<>(int) - { - return 5; - } - - } - -} // namespace cxx17 - -#endif // __cplusplus <= 201402L - -]]) diff --git a/m4/ax_prog_bison.m4 b/m4/ax_prog_bison.m4 deleted file mode 100755 index aa3bb112..00000000 --- a/m4/ax_prog_bison.m4 +++ /dev/null @@ -1,68 +0,0 @@ -# =========================================================================== -# http://www.nongnu.org/autoconf-archive/ax_prog_bison.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_PROG_BISON(ACTION-IF-TRUE,ACTION-IF-FALSE) -# -# DESCRIPTION -# -# Check whether bison is the parser generator. Run ACTION-IF-TRUE if -# successful, ACTION-IF-FALSE otherwise -# -# LICENSE -# -# Copyright (c) 2009 Francesco Salvestrini -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation; either version 2 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. - -AC_DEFUN([AX_PROG_BISON], [ - AC_REQUIRE([AC_PROG_YACC]) - AC_REQUIRE([AC_PROG_SED]) - - AC_CACHE_CHECK([if bison is the parser generator],[ax_cv_prog_bison],[ - AS_IF([test "`echo \"$YACC\" | $SED 's,^.*\(bison\).*$,\1,'`" = "bison" ],[ - ax_cv_prog_bison=yes - ],[ - ax_cv_prog_bison=no - ]) - ]) - - AC_DEFINE([HAVE_YACC_OLD_PUSH],[0],[If old-style push parser syntax is supported by ${YACC}]) - AM_CONDITIONAL([HAVE_YACC_OLD_PUSH],[test "x${HAVE_YACC_OLD_PUSH}" == "x1"]) - AC_DEFINE([HAVE_YACC_OLD_PURE],[0],[If old-style pure reentrant parser syntax is supported by ${YACC}]) - AM_CONDITIONAL([HAVE_YACC_OLD_PURE],[test "x${HAVE_YACC_OLD_PURE}" == "x1"]) - - AS_IF([test "$ax_cv_prog_bison" = yes],[ - : - $1 - ],[ - : - $2 - ]) -]) diff --git a/m4/ax_prog_bison_clfeatures.m4 b/m4/ax_prog_bison_clfeatures.m4 deleted file mode 100755 index 46e82665..00000000 --- a/m4/ax_prog_bison_clfeatures.m4 +++ /dev/null @@ -1,137 +0,0 @@ -AC_DEFUN([AX_PROG_BISON_CLFEATURES], [ - AC_REQUIRE([AC_PROG_YACC]) - AC_REQUIRE([AC_PROG_SED]) - - AC_CACHE_CHECK([if bison is the parser generator],[ax_cv_prog_bison],[ - AS_IF([test "`echo \"$YACC\" | $SED 's,^.*\(bison\).*$,\1,'`" = "bison" ],[ - ax_cv_prog_bison=yes - ],[ - ax_cv_prog_bison=no - ]) - ]) - -cat > conftest.y < /dev/null 2>&1 && eval "$ac_compile_yacc" -then - AC_SUBST([CODES_PURE_PARSER_DEFINES], ["%pure-parser"]) - AC_MSG_RESULT([old-style]) - $3 -else - -cat > conftest.y < /dev/null 2>&1 && eval "$ac_compile_yacc" - then - AC_SUBST([CODES_PURE_PARSER_DEFINES], ["%define api.pure"]) - AC_MSG_RESULT([new-style]) - $3 - else - AC_MSG_RESULT([feature not supported]) - BVER=`${YACC} --version | head -n 1` - AC_MSG_WARN([${BVER} does not support pure / reentrant parser generation]) - $4 - fi -fi - -cat > conftest.y < /dev/null 2>&1 && eval "$ac_compile_yacc" -then - AC_SUBST([CODES_PUSH_PARSER_DEFINES], ["%define api.push_pull \"push\""]) - AC_MSG_RESULT([old-style]) - $3 -else - -cat > conftest.y < /dev/null 2>&1 && eval "$ac_compile_yacc" - then - AC_SUBST([CODES_PUSH_PARSER_DEFINES], ["%define api.push-pull push"]) - AC_MSG_RESULT([new-style]) - $3 - else - AC_MSG_RESULT([feature not supported]) - BVER=`${YACC} --version | head -n 1` - AC_MSG_WARN([${BVER} does not support push parser generation]) - $4 - fi -fi - - AS_IF([test "$ax_cv_prog_bison" = yes],[ - : - $1 - ],[ - : - $2 - ]) - - # cleanup bison / yacc tmp files - rm -rf y.output y.tab.h y.tab.c y.tab.o -]) diff --git a/m4/ax_prog_flex.m4 b/m4/ax_prog_flex.m4 deleted file mode 100755 index 6f8c6107..00000000 --- a/m4/ax_prog_flex.m4 +++ /dev/null @@ -1,62 +0,0 @@ -# =========================================================================== -# http://www.nongnu.org/autoconf-archive/ax_prog_flex.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_PROG_FLEX(ACTION-IF-TRUE,ACTION-IF-FALSE) -# -# DESCRIPTION -# -# Check whether flex is the scanner generator. Run ACTION-IF-TRUE if -# successful, ACTION-IF-FALSE otherwise -# -# LICENSE -# -# Copyright (c) 2009 Francesco Salvestrini -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation; either version 2 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. - -AC_DEFUN([AX_PROG_FLEX], [ - AC_REQUIRE([AC_PROG_LEX]) - AC_REQUIRE([AC_PROG_SED]) - - AC_CACHE_CHECK([if flex is the lexer generator],[ax_cv_prog_flex],[ - AS_IF([test "`echo \"$LEX\" | $SED 's,^.*\(flex\).*$,\1,'`" = "flex"],[ - ax_cv_prog_flex=yes - ],[ - ax_cv_prog_flex=no - ]) - ]) - AS_IF([test "$ax_cv_prog_flex" = yes],[ - : - $1 - ],[ - : - $2 - ]) -]) diff --git a/m4/libtool.m4 b/m4/libtool.m4 deleted file mode 100644 index 56666f0e..00000000 --- a/m4/libtool.m4 +++ /dev/null @@ -1,7986 +0,0 @@ -# libtool.m4 - Configure libtool for the host system. -*-Autoconf-*- -# -# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, -# 2006, 2007, 2008, 2009, 2010, 2011 Free Software -# Foundation, Inc. -# Written by Gordon Matzigkeit, 1996 -# -# This file is free software; the Free Software Foundation gives -# unlimited permission to copy and/or distribute it, with or without -# modifications, as long as this notice is preserved. - -m4_define([_LT_COPYING], [dnl -# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, -# 2006, 2007, 2008, 2009, 2010, 2011 Free Software -# Foundation, Inc. -# Written by Gordon Matzigkeit, 1996 -# -# This file is part of GNU Libtool. -# -# GNU Libtool is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; either version 2 of -# the License, or (at your option) any later version. -# -# As a special exception to the GNU General Public License, -# if you distribute this file as part of a program or library that -# is built using GNU Libtool, you may include this file under the -# same distribution terms that you use for the rest of that program. -# -# GNU Libtool is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with GNU Libtool; see the file COPYING. If not, a copy -# can be downloaded from http://www.gnu.org/licenses/gpl.html, or -# obtained by writing to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -]) - -# serial 57 LT_INIT - - -# LT_PREREQ(VERSION) -# ------------------ -# Complain and exit if this libtool version is less that VERSION. -m4_defun([LT_PREREQ], -[m4_if(m4_version_compare(m4_defn([LT_PACKAGE_VERSION]), [$1]), -1, - [m4_default([$3], - [m4_fatal([Libtool version $1 or higher is required], - 63)])], - [$2])]) - - -# _LT_CHECK_BUILDDIR -# ------------------ -# Complain if the absolute build directory name contains unusual characters -m4_defun([_LT_CHECK_BUILDDIR], -[case `pwd` in - *\ * | *\ *) - AC_MSG_WARN([Libtool does not cope well with whitespace in `pwd`]) ;; -esac -]) - - -# LT_INIT([OPTIONS]) -# ------------------ -AC_DEFUN([LT_INIT], -[AC_PREREQ([2.58])dnl We use AC_INCLUDES_DEFAULT -AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl -AC_BEFORE([$0], [LT_LANG])dnl -AC_BEFORE([$0], [LT_OUTPUT])dnl -AC_BEFORE([$0], [LTDL_INIT])dnl -m4_require([_LT_CHECK_BUILDDIR])dnl - -dnl Autoconf doesn't catch unexpanded LT_ macros by default: -m4_pattern_forbid([^_?LT_[A-Z_]+$])dnl -m4_pattern_allow([^(_LT_EOF|LT_DLGLOBAL|LT_DLLAZY_OR_NOW|LT_MULTI_MODULE)$])dnl -dnl aclocal doesn't pull ltoptions.m4, ltsugar.m4, or ltversion.m4 -dnl unless we require an AC_DEFUNed macro: -AC_REQUIRE([LTOPTIONS_VERSION])dnl -AC_REQUIRE([LTSUGAR_VERSION])dnl -AC_REQUIRE([LTVERSION_VERSION])dnl -AC_REQUIRE([LTOBSOLETE_VERSION])dnl -m4_require([_LT_PROG_LTMAIN])dnl - -_LT_SHELL_INIT([SHELL=${CONFIG_SHELL-/bin/sh}]) - -dnl Parse OPTIONS -_LT_SET_OPTIONS([$0], [$1]) - -# This can be used to rebuild libtool when needed -LIBTOOL_DEPS="$ltmain" - -# Always use our own libtool. -LIBTOOL='$(SHELL) $(top_builddir)/libtool' -AC_SUBST(LIBTOOL)dnl - -_LT_SETUP - -# Only expand once: -m4_define([LT_INIT]) -])# LT_INIT - -# Old names: -AU_ALIAS([AC_PROG_LIBTOOL], [LT_INIT]) -AU_ALIAS([AM_PROG_LIBTOOL], [LT_INIT]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_PROG_LIBTOOL], []) -dnl AC_DEFUN([AM_PROG_LIBTOOL], []) - - -# _LT_CC_BASENAME(CC) -# ------------------- -# Calculate cc_basename. Skip known compiler wrappers and cross-prefix. -m4_defun([_LT_CC_BASENAME], -[for cc_temp in $1""; do - case $cc_temp in - compile | *[[\\/]]compile | ccache | *[[\\/]]ccache ) ;; - distcc | *[[\\/]]distcc | purify | *[[\\/]]purify ) ;; - \-*) ;; - *) break;; - esac -done -cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` -]) - - -# _LT_FILEUTILS_DEFAULTS -# ---------------------- -# It is okay to use these file commands and assume they have been set -# sensibly after `m4_require([_LT_FILEUTILS_DEFAULTS])'. -m4_defun([_LT_FILEUTILS_DEFAULTS], -[: ${CP="cp -f"} -: ${MV="mv -f"} -: ${RM="rm -f"} -])# _LT_FILEUTILS_DEFAULTS - - -# _LT_SETUP -# --------- -m4_defun([_LT_SETUP], -[AC_REQUIRE([AC_CANONICAL_HOST])dnl -AC_REQUIRE([AC_CANONICAL_BUILD])dnl -AC_REQUIRE([_LT_PREPARE_SED_QUOTE_VARS])dnl -AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl - -_LT_DECL([], [PATH_SEPARATOR], [1], [The PATH separator for the build system])dnl -dnl -_LT_DECL([], [host_alias], [0], [The host system])dnl -_LT_DECL([], [host], [0])dnl -_LT_DECL([], [host_os], [0])dnl -dnl -_LT_DECL([], [build_alias], [0], [The build system])dnl -_LT_DECL([], [build], [0])dnl -_LT_DECL([], [build_os], [0])dnl -dnl -AC_REQUIRE([AC_PROG_CC])dnl -AC_REQUIRE([LT_PATH_LD])dnl -AC_REQUIRE([LT_PATH_NM])dnl -dnl -AC_REQUIRE([AC_PROG_LN_S])dnl -test -z "$LN_S" && LN_S="ln -s" -_LT_DECL([], [LN_S], [1], [Whether we need soft or hard links])dnl -dnl -AC_REQUIRE([LT_CMD_MAX_LEN])dnl -_LT_DECL([objext], [ac_objext], [0], [Object file suffix (normally "o")])dnl -_LT_DECL([], [exeext], [0], [Executable file suffix (normally "")])dnl -dnl -m4_require([_LT_FILEUTILS_DEFAULTS])dnl -m4_require([_LT_CHECK_SHELL_FEATURES])dnl -m4_require([_LT_PATH_CONVERSION_FUNCTIONS])dnl -m4_require([_LT_CMD_RELOAD])dnl -m4_require([_LT_CHECK_MAGIC_METHOD])dnl -m4_require([_LT_CHECK_SHAREDLIB_FROM_LINKLIB])dnl -m4_require([_LT_CMD_OLD_ARCHIVE])dnl -m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl -m4_require([_LT_WITH_SYSROOT])dnl - -_LT_CONFIG_LIBTOOL_INIT([ -# See if we are running on zsh, and set the options which allow our -# commands through without removal of \ escapes INIT. -if test -n "\${ZSH_VERSION+set}" ; then - setopt NO_GLOB_SUBST -fi -]) -if test -n "${ZSH_VERSION+set}" ; then - setopt NO_GLOB_SUBST -fi - -_LT_CHECK_OBJDIR - -m4_require([_LT_TAG_COMPILER])dnl - -case $host_os in -aix3*) - # AIX sometimes has problems with the GCC collect2 program. For some - # reason, if we set the COLLECT_NAMES environment variable, the problems - # vanish in a puff of smoke. - if test "X${COLLECT_NAMES+set}" != Xset; then - COLLECT_NAMES= - export COLLECT_NAMES - fi - ;; -esac - -# Global variables: -ofile=libtool -can_build_shared=yes - -# All known linkers require a `.a' archive for static linking (except MSVC, -# which needs '.lib'). -libext=a - -with_gnu_ld="$lt_cv_prog_gnu_ld" - -old_CC="$CC" -old_CFLAGS="$CFLAGS" - -# Set sane defaults for various variables -test -z "$CC" && CC=cc -test -z "$LTCC" && LTCC=$CC -test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS -test -z "$LD" && LD=ld -test -z "$ac_objext" && ac_objext=o - -_LT_CC_BASENAME([$compiler]) - -# Only perform the check for file, if the check method requires it -test -z "$MAGIC_CMD" && MAGIC_CMD=file -case $deplibs_check_method in -file_magic*) - if test "$file_magic_cmd" = '$MAGIC_CMD'; then - _LT_PATH_MAGIC - fi - ;; -esac - -# Use C for the default configuration in the libtool script -LT_SUPPORTED_TAG([CC]) -_LT_LANG_C_CONFIG -_LT_LANG_DEFAULT_CONFIG -_LT_CONFIG_COMMANDS -])# _LT_SETUP - - -# _LT_PREPARE_SED_QUOTE_VARS -# -------------------------- -# Define a few sed substitution that help us do robust quoting. -m4_defun([_LT_PREPARE_SED_QUOTE_VARS], -[# Backslashify metacharacters that are still active within -# double-quoted strings. -sed_quote_subst='s/\([["`$\\]]\)/\\\1/g' - -# Same as above, but do not quote variable references. -double_quote_subst='s/\([["`\\]]\)/\\\1/g' - -# Sed substitution to delay expansion of an escaped shell variable in a -# double_quote_subst'ed string. -delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' - -# Sed substitution to delay expansion of an escaped single quote. -delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g' - -# Sed substitution to avoid accidental globbing in evaled expressions -no_glob_subst='s/\*/\\\*/g' -]) - -# _LT_PROG_LTMAIN -# --------------- -# Note that this code is called both from `configure', and `config.status' -# now that we use AC_CONFIG_COMMANDS to generate libtool. Notably, -# `config.status' has no value for ac_aux_dir unless we are using Automake, -# so we pass a copy along to make sure it has a sensible value anyway. -m4_defun([_LT_PROG_LTMAIN], -[m4_ifdef([AC_REQUIRE_AUX_FILE], [AC_REQUIRE_AUX_FILE([ltmain.sh])])dnl -_LT_CONFIG_LIBTOOL_INIT([ac_aux_dir='$ac_aux_dir']) -ltmain="$ac_aux_dir/ltmain.sh" -])# _LT_PROG_LTMAIN - - -## ------------------------------------- ## -## Accumulate code for creating libtool. ## -## ------------------------------------- ## - -# So that we can recreate a full libtool script including additional -# tags, we accumulate the chunks of code to send to AC_CONFIG_COMMANDS -# in macros and then make a single call at the end using the `libtool' -# label. - - -# _LT_CONFIG_LIBTOOL_INIT([INIT-COMMANDS]) -# ---------------------------------------- -# Register INIT-COMMANDS to be passed to AC_CONFIG_COMMANDS later. -m4_define([_LT_CONFIG_LIBTOOL_INIT], -[m4_ifval([$1], - [m4_append([_LT_OUTPUT_LIBTOOL_INIT], - [$1 -])])]) - -# Initialize. -m4_define([_LT_OUTPUT_LIBTOOL_INIT]) - - -# _LT_CONFIG_LIBTOOL([COMMANDS]) -# ------------------------------ -# Register COMMANDS to be passed to AC_CONFIG_COMMANDS later. -m4_define([_LT_CONFIG_LIBTOOL], -[m4_ifval([$1], - [m4_append([_LT_OUTPUT_LIBTOOL_COMMANDS], - [$1 -])])]) - -# Initialize. -m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS]) - - -# _LT_CONFIG_SAVE_COMMANDS([COMMANDS], [INIT_COMMANDS]) -# ----------------------------------------------------- -m4_defun([_LT_CONFIG_SAVE_COMMANDS], -[_LT_CONFIG_LIBTOOL([$1]) -_LT_CONFIG_LIBTOOL_INIT([$2]) -]) - - -# _LT_FORMAT_COMMENT([COMMENT]) -# ----------------------------- -# Add leading comment marks to the start of each line, and a trailing -# full-stop to the whole comment if one is not present already. -m4_define([_LT_FORMAT_COMMENT], -[m4_ifval([$1], [ -m4_bpatsubst([m4_bpatsubst([$1], [^ *], [# ])], - [['`$\]], [\\\&])]m4_bmatch([$1], [[!?.]$], [], [.]) -)]) - - - -## ------------------------ ## -## FIXME: Eliminate VARNAME ## -## ------------------------ ## - - -# _LT_DECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION], [IS-TAGGED?]) -# ------------------------------------------------------------------- -# CONFIGNAME is the name given to the value in the libtool script. -# VARNAME is the (base) name used in the configure script. -# VALUE may be 0, 1 or 2 for a computed quote escaped value based on -# VARNAME. Any other value will be used directly. -m4_define([_LT_DECL], -[lt_if_append_uniq([lt_decl_varnames], [$2], [, ], - [lt_dict_add_subkey([lt_decl_dict], [$2], [libtool_name], - [m4_ifval([$1], [$1], [$2])]) - lt_dict_add_subkey([lt_decl_dict], [$2], [value], [$3]) - m4_ifval([$4], - [lt_dict_add_subkey([lt_decl_dict], [$2], [description], [$4])]) - lt_dict_add_subkey([lt_decl_dict], [$2], - [tagged?], [m4_ifval([$5], [yes], [no])])]) -]) - - -# _LT_TAGDECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION]) -# -------------------------------------------------------- -m4_define([_LT_TAGDECL], [_LT_DECL([$1], [$2], [$3], [$4], [yes])]) - - -# lt_decl_tag_varnames([SEPARATOR], [VARNAME1...]) -# ------------------------------------------------ -m4_define([lt_decl_tag_varnames], -[_lt_decl_filter([tagged?], [yes], $@)]) - - -# _lt_decl_filter(SUBKEY, VALUE, [SEPARATOR], [VARNAME1..]) -# --------------------------------------------------------- -m4_define([_lt_decl_filter], -[m4_case([$#], - [0], [m4_fatal([$0: too few arguments: $#])], - [1], [m4_fatal([$0: too few arguments: $#: $1])], - [2], [lt_dict_filter([lt_decl_dict], [$1], [$2], [], lt_decl_varnames)], - [3], [lt_dict_filter([lt_decl_dict], [$1], [$2], [$3], lt_decl_varnames)], - [lt_dict_filter([lt_decl_dict], $@)])[]dnl -]) - - -# lt_decl_quote_varnames([SEPARATOR], [VARNAME1...]) -# -------------------------------------------------- -m4_define([lt_decl_quote_varnames], -[_lt_decl_filter([value], [1], $@)]) - - -# lt_decl_dquote_varnames([SEPARATOR], [VARNAME1...]) -# --------------------------------------------------- -m4_define([lt_decl_dquote_varnames], -[_lt_decl_filter([value], [2], $@)]) - - -# lt_decl_varnames_tagged([SEPARATOR], [VARNAME1...]) -# --------------------------------------------------- -m4_define([lt_decl_varnames_tagged], -[m4_assert([$# <= 2])dnl -_$0(m4_quote(m4_default([$1], [[, ]])), - m4_ifval([$2], [[$2]], [m4_dquote(lt_decl_tag_varnames)]), - m4_split(m4_normalize(m4_quote(_LT_TAGS)), [ ]))]) -m4_define([_lt_decl_varnames_tagged], -[m4_ifval([$3], [lt_combine([$1], [$2], [_], $3)])]) - - -# lt_decl_all_varnames([SEPARATOR], [VARNAME1...]) -# ------------------------------------------------ -m4_define([lt_decl_all_varnames], -[_$0(m4_quote(m4_default([$1], [[, ]])), - m4_if([$2], [], - m4_quote(lt_decl_varnames), - m4_quote(m4_shift($@))))[]dnl -]) -m4_define([_lt_decl_all_varnames], -[lt_join($@, lt_decl_varnames_tagged([$1], - lt_decl_tag_varnames([[, ]], m4_shift($@))))dnl -]) - - -# _LT_CONFIG_STATUS_DECLARE([VARNAME]) -# ------------------------------------ -# Quote a variable value, and forward it to `config.status' so that its -# declaration there will have the same value as in `configure'. VARNAME -# must have a single quote delimited value for this to work. -m4_define([_LT_CONFIG_STATUS_DECLARE], -[$1='`$ECHO "$][$1" | $SED "$delay_single_quote_subst"`']) - - -# _LT_CONFIG_STATUS_DECLARATIONS -# ------------------------------ -# We delimit libtool config variables with single quotes, so when -# we write them to config.status, we have to be sure to quote all -# embedded single quotes properly. In configure, this macro expands -# each variable declared with _LT_DECL (and _LT_TAGDECL) into: -# -# ='`$ECHO "$" | $SED "$delay_single_quote_subst"`' -m4_defun([_LT_CONFIG_STATUS_DECLARATIONS], -[m4_foreach([_lt_var], m4_quote(lt_decl_all_varnames), - [m4_n([_LT_CONFIG_STATUS_DECLARE(_lt_var)])])]) - - -# _LT_LIBTOOL_TAGS -# ---------------- -# Output comment and list of tags supported by the script -m4_defun([_LT_LIBTOOL_TAGS], -[_LT_FORMAT_COMMENT([The names of the tagged configurations supported by this script])dnl -available_tags="_LT_TAGS"dnl -]) - - -# _LT_LIBTOOL_DECLARE(VARNAME, [TAG]) -# ----------------------------------- -# Extract the dictionary values for VARNAME (optionally with TAG) and -# expand to a commented shell variable setting: -# -# # Some comment about what VAR is for. -# visible_name=$lt_internal_name -m4_define([_LT_LIBTOOL_DECLARE], -[_LT_FORMAT_COMMENT(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], - [description])))[]dnl -m4_pushdef([_libtool_name], - m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [libtool_name])))[]dnl -m4_case(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [value])), - [0], [_libtool_name=[$]$1], - [1], [_libtool_name=$lt_[]$1], - [2], [_libtool_name=$lt_[]$1], - [_libtool_name=lt_dict_fetch([lt_decl_dict], [$1], [value])])[]dnl -m4_ifval([$2], [_$2])[]m4_popdef([_libtool_name])[]dnl -]) - - -# _LT_LIBTOOL_CONFIG_VARS -# ----------------------- -# Produce commented declarations of non-tagged libtool config variables -# suitable for insertion in the LIBTOOL CONFIG section of the `libtool' -# script. Tagged libtool config variables (even for the LIBTOOL CONFIG -# section) are produced by _LT_LIBTOOL_TAG_VARS. -m4_defun([_LT_LIBTOOL_CONFIG_VARS], -[m4_foreach([_lt_var], - m4_quote(_lt_decl_filter([tagged?], [no], [], lt_decl_varnames)), - [m4_n([_LT_LIBTOOL_DECLARE(_lt_var)])])]) - - -# _LT_LIBTOOL_TAG_VARS(TAG) -# ------------------------- -m4_define([_LT_LIBTOOL_TAG_VARS], -[m4_foreach([_lt_var], m4_quote(lt_decl_tag_varnames), - [m4_n([_LT_LIBTOOL_DECLARE(_lt_var, [$1])])])]) - - -# _LT_TAGVAR(VARNAME, [TAGNAME]) -# ------------------------------ -m4_define([_LT_TAGVAR], [m4_ifval([$2], [$1_$2], [$1])]) - - -# _LT_CONFIG_COMMANDS -# ------------------- -# Send accumulated output to $CONFIG_STATUS. Thanks to the lists of -# variables for single and double quote escaping we saved from calls -# to _LT_DECL, we can put quote escaped variables declarations -# into `config.status', and then the shell code to quote escape them in -# for loops in `config.status'. Finally, any additional code accumulated -# from calls to _LT_CONFIG_LIBTOOL_INIT is expanded. -m4_defun([_LT_CONFIG_COMMANDS], -[AC_PROVIDE_IFELSE([LT_OUTPUT], - dnl If the libtool generation code has been placed in $CONFIG_LT, - dnl instead of duplicating it all over again into config.status, - dnl then we will have config.status run $CONFIG_LT later, so it - dnl needs to know what name is stored there: - [AC_CONFIG_COMMANDS([libtool], - [$SHELL $CONFIG_LT || AS_EXIT(1)], [CONFIG_LT='$CONFIG_LT'])], - dnl If the libtool generation code is destined for config.status, - dnl expand the accumulated commands and init code now: - [AC_CONFIG_COMMANDS([libtool], - [_LT_OUTPUT_LIBTOOL_COMMANDS], [_LT_OUTPUT_LIBTOOL_COMMANDS_INIT])]) -])#_LT_CONFIG_COMMANDS - - -# Initialize. -m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS_INIT], -[ - -# The HP-UX ksh and POSIX shell print the target directory to stdout -# if CDPATH is set. -(unset CDPATH) >/dev/null 2>&1 && unset CDPATH - -sed_quote_subst='$sed_quote_subst' -double_quote_subst='$double_quote_subst' -delay_variable_subst='$delay_variable_subst' -_LT_CONFIG_STATUS_DECLARATIONS -LTCC='$LTCC' -LTCFLAGS='$LTCFLAGS' -compiler='$compiler_DEFAULT' - -# A function that is used when there is no print builtin or printf. -func_fallback_echo () -{ - eval 'cat <<_LTECHO_EOF -\$[]1 -_LTECHO_EOF' -} - -# Quote evaled strings. -for var in lt_decl_all_varnames([[ \ -]], lt_decl_quote_varnames); do - case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in - *[[\\\\\\\`\\"\\\$]]*) - eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\"" - ;; - *) - eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" - ;; - esac -done - -# Double-quote double-evaled strings. -for var in lt_decl_all_varnames([[ \ -]], lt_decl_dquote_varnames); do - case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in - *[[\\\\\\\`\\"\\\$]]*) - eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\"" - ;; - *) - eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" - ;; - esac -done - -_LT_OUTPUT_LIBTOOL_INIT -]) - -# _LT_GENERATED_FILE_INIT(FILE, [COMMENT]) -# ------------------------------------ -# Generate a child script FILE with all initialization necessary to -# reuse the environment learned by the parent script, and make the -# file executable. If COMMENT is supplied, it is inserted after the -# `#!' sequence but before initialization text begins. After this -# macro, additional text can be appended to FILE to form the body of -# the child script. The macro ends with non-zero status if the -# file could not be fully written (such as if the disk is full). -m4_ifdef([AS_INIT_GENERATED], -[m4_defun([_LT_GENERATED_FILE_INIT],[AS_INIT_GENERATED($@)])], -[m4_defun([_LT_GENERATED_FILE_INIT], -[m4_require([AS_PREPARE])]dnl -[m4_pushdef([AS_MESSAGE_LOG_FD])]dnl -[lt_write_fail=0 -cat >$1 <<_ASEOF || lt_write_fail=1 -#! $SHELL -# Generated by $as_me. -$2 -SHELL=\${CONFIG_SHELL-$SHELL} -export SHELL -_ASEOF -cat >>$1 <<\_ASEOF || lt_write_fail=1 -AS_SHELL_SANITIZE -_AS_PREPARE -exec AS_MESSAGE_FD>&1 -_ASEOF -test $lt_write_fail = 0 && chmod +x $1[]dnl -m4_popdef([AS_MESSAGE_LOG_FD])])])# _LT_GENERATED_FILE_INIT - -# LT_OUTPUT -# --------- -# This macro allows early generation of the libtool script (before -# AC_OUTPUT is called), incase it is used in configure for compilation -# tests. -AC_DEFUN([LT_OUTPUT], -[: ${CONFIG_LT=./config.lt} -AC_MSG_NOTICE([creating $CONFIG_LT]) -_LT_GENERATED_FILE_INIT(["$CONFIG_LT"], -[# Run this file to recreate a libtool stub with the current configuration.]) - -cat >>"$CONFIG_LT" <<\_LTEOF -lt_cl_silent=false -exec AS_MESSAGE_LOG_FD>>config.log -{ - echo - AS_BOX([Running $as_me.]) -} >&AS_MESSAGE_LOG_FD - -lt_cl_help="\ -\`$as_me' creates a local libtool stub from the current configuration, -for use in further configure time tests before the real libtool is -generated. - -Usage: $[0] [[OPTIONS]] - - -h, --help print this help, then exit - -V, --version print version number, then exit - -q, --quiet do not print progress messages - -d, --debug don't remove temporary files - -Report bugs to ." - -lt_cl_version="\ -m4_ifset([AC_PACKAGE_NAME], [AC_PACKAGE_NAME ])config.lt[]dnl -m4_ifset([AC_PACKAGE_VERSION], [ AC_PACKAGE_VERSION]) -configured by $[0], generated by m4_PACKAGE_STRING. - -Copyright (C) 2011 Free Software Foundation, Inc. -This config.lt script is free software; the Free Software Foundation -gives unlimited permision to copy, distribute and modify it." - -while test $[#] != 0 -do - case $[1] in - --version | --v* | -V ) - echo "$lt_cl_version"; exit 0 ;; - --help | --h* | -h ) - echo "$lt_cl_help"; exit 0 ;; - --debug | --d* | -d ) - debug=: ;; - --quiet | --q* | --silent | --s* | -q ) - lt_cl_silent=: ;; - - -*) AC_MSG_ERROR([unrecognized option: $[1] -Try \`$[0] --help' for more information.]) ;; - - *) AC_MSG_ERROR([unrecognized argument: $[1] -Try \`$[0] --help' for more information.]) ;; - esac - shift -done - -if $lt_cl_silent; then - exec AS_MESSAGE_FD>/dev/null -fi -_LTEOF - -cat >>"$CONFIG_LT" <<_LTEOF -_LT_OUTPUT_LIBTOOL_COMMANDS_INIT -_LTEOF - -cat >>"$CONFIG_LT" <<\_LTEOF -AC_MSG_NOTICE([creating $ofile]) -_LT_OUTPUT_LIBTOOL_COMMANDS -AS_EXIT(0) -_LTEOF -chmod +x "$CONFIG_LT" - -# configure is writing to config.log, but config.lt does its own redirection, -# appending to config.log, which fails on DOS, as config.log is still kept -# open by configure. Here we exec the FD to /dev/null, effectively closing -# config.log, so it can be properly (re)opened and appended to by config.lt. -lt_cl_success=: -test "$silent" = yes && - lt_config_lt_args="$lt_config_lt_args --quiet" -exec AS_MESSAGE_LOG_FD>/dev/null -$SHELL "$CONFIG_LT" $lt_config_lt_args || lt_cl_success=false -exec AS_MESSAGE_LOG_FD>>config.log -$lt_cl_success || AS_EXIT(1) -])# LT_OUTPUT - - -# _LT_CONFIG(TAG) -# --------------- -# If TAG is the built-in tag, create an initial libtool script with a -# default configuration from the untagged config vars. Otherwise add code -# to config.status for appending the configuration named by TAG from the -# matching tagged config vars. -m4_defun([_LT_CONFIG], -[m4_require([_LT_FILEUTILS_DEFAULTS])dnl -_LT_CONFIG_SAVE_COMMANDS([ - m4_define([_LT_TAG], m4_if([$1], [], [C], [$1]))dnl - m4_if(_LT_TAG, [C], [ - # See if we are running on zsh, and set the options which allow our - # commands through without removal of \ escapes. - if test -n "${ZSH_VERSION+set}" ; then - setopt NO_GLOB_SUBST - fi - - cfgfile="${ofile}T" - trap "$RM \"$cfgfile\"; exit 1" 1 2 15 - $RM "$cfgfile" - - cat <<_LT_EOF >> "$cfgfile" -#! $SHELL - -# `$ECHO "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services. -# Generated automatically by $as_me ($PACKAGE$TIMESTAMP) $VERSION -# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`: -# NOTE: Changes made to this file will be lost: look at ltmain.sh. -# -_LT_COPYING -_LT_LIBTOOL_TAGS - -# ### BEGIN LIBTOOL CONFIG -_LT_LIBTOOL_CONFIG_VARS -_LT_LIBTOOL_TAG_VARS -# ### END LIBTOOL CONFIG - -_LT_EOF - - case $host_os in - aix3*) - cat <<\_LT_EOF >> "$cfgfile" -# AIX sometimes has problems with the GCC collect2 program. For some -# reason, if we set the COLLECT_NAMES environment variable, the problems -# vanish in a puff of smoke. -if test "X${COLLECT_NAMES+set}" != Xset; then - COLLECT_NAMES= - export COLLECT_NAMES -fi -_LT_EOF - ;; - esac - - _LT_PROG_LTMAIN - - # We use sed instead of cat because bash on DJGPP gets confused if - # if finds mixed CR/LF and LF-only lines. Since sed operates in - # text mode, it properly converts lines to CR/LF. This bash problem - # is reportedly fixed, but why not run on old versions too? - sed '$q' "$ltmain" >> "$cfgfile" \ - || (rm -f "$cfgfile"; exit 1) - - _LT_PROG_REPLACE_SHELLFNS - - mv -f "$cfgfile" "$ofile" || - (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") - chmod +x "$ofile" -], -[cat <<_LT_EOF >> "$ofile" - -dnl Unfortunately we have to use $1 here, since _LT_TAG is not expanded -dnl in a comment (ie after a #). -# ### BEGIN LIBTOOL TAG CONFIG: $1 -_LT_LIBTOOL_TAG_VARS(_LT_TAG) -# ### END LIBTOOL TAG CONFIG: $1 -_LT_EOF -])dnl /m4_if -], -[m4_if([$1], [], [ - PACKAGE='$PACKAGE' - VERSION='$VERSION' - TIMESTAMP='$TIMESTAMP' - RM='$RM' - ofile='$ofile'], []) -])dnl /_LT_CONFIG_SAVE_COMMANDS -])# _LT_CONFIG - - -# LT_SUPPORTED_TAG(TAG) -# --------------------- -# Trace this macro to discover what tags are supported by the libtool -# --tag option, using: -# autoconf --trace 'LT_SUPPORTED_TAG:$1' -AC_DEFUN([LT_SUPPORTED_TAG], []) - - -# C support is built-in for now -m4_define([_LT_LANG_C_enabled], []) -m4_define([_LT_TAGS], []) - - -# LT_LANG(LANG) -# ------------- -# Enable libtool support for the given language if not already enabled. -AC_DEFUN([LT_LANG], -[AC_BEFORE([$0], [LT_OUTPUT])dnl -m4_case([$1], - [C], [_LT_LANG(C)], - [C++], [_LT_LANG(CXX)], - [Go], [_LT_LANG(GO)], - [Java], [_LT_LANG(GCJ)], - [Fortran 77], [_LT_LANG(F77)], - [Fortran], [_LT_LANG(FC)], - [Windows Resource], [_LT_LANG(RC)], - [m4_ifdef([_LT_LANG_]$1[_CONFIG], - [_LT_LANG($1)], - [m4_fatal([$0: unsupported language: "$1"])])])dnl -])# LT_LANG - - -# _LT_LANG(LANGNAME) -# ------------------ -m4_defun([_LT_LANG], -[m4_ifdef([_LT_LANG_]$1[_enabled], [], - [LT_SUPPORTED_TAG([$1])dnl - m4_append([_LT_TAGS], [$1 ])dnl - m4_define([_LT_LANG_]$1[_enabled], [])dnl - _LT_LANG_$1_CONFIG($1)])dnl -])# _LT_LANG - - -m4_ifndef([AC_PROG_GO], [ -############################################################ -# NOTE: This macro has been submitted for inclusion into # -# GNU Autoconf as AC_PROG_GO. When it is available in # -# a released version of Autoconf we should remove this # -# macro and use it instead. # -############################################################ -m4_defun([AC_PROG_GO], -[AC_LANG_PUSH(Go)dnl -AC_ARG_VAR([GOC], [Go compiler command])dnl -AC_ARG_VAR([GOFLAGS], [Go compiler flags])dnl -_AC_ARG_VAR_LDFLAGS()dnl -AC_CHECK_TOOL(GOC, gccgo) -if test -z "$GOC"; then - if test -n "$ac_tool_prefix"; then - AC_CHECK_PROG(GOC, [${ac_tool_prefix}gccgo], [${ac_tool_prefix}gccgo]) - fi -fi -if test -z "$GOC"; then - AC_CHECK_PROG(GOC, gccgo, gccgo, false) -fi -])#m4_defun -])#m4_ifndef - - -# _LT_LANG_DEFAULT_CONFIG -# ----------------------- -m4_defun([_LT_LANG_DEFAULT_CONFIG], -[AC_PROVIDE_IFELSE([AC_PROG_CXX], - [LT_LANG(CXX)], - [m4_define([AC_PROG_CXX], defn([AC_PROG_CXX])[LT_LANG(CXX)])]) - -AC_PROVIDE_IFELSE([AC_PROG_F77], - [LT_LANG(F77)], - [m4_define([AC_PROG_F77], defn([AC_PROG_F77])[LT_LANG(F77)])]) - -AC_PROVIDE_IFELSE([AC_PROG_FC], - [LT_LANG(FC)], - [m4_define([AC_PROG_FC], defn([AC_PROG_FC])[LT_LANG(FC)])]) - -dnl The call to [A][M_PROG_GCJ] is quoted like that to stop aclocal -dnl pulling things in needlessly. -AC_PROVIDE_IFELSE([AC_PROG_GCJ], - [LT_LANG(GCJ)], - [AC_PROVIDE_IFELSE([A][M_PROG_GCJ], - [LT_LANG(GCJ)], - [AC_PROVIDE_IFELSE([LT_PROG_GCJ], - [LT_LANG(GCJ)], - [m4_ifdef([AC_PROG_GCJ], - [m4_define([AC_PROG_GCJ], defn([AC_PROG_GCJ])[LT_LANG(GCJ)])]) - m4_ifdef([A][M_PROG_GCJ], - [m4_define([A][M_PROG_GCJ], defn([A][M_PROG_GCJ])[LT_LANG(GCJ)])]) - m4_ifdef([LT_PROG_GCJ], - [m4_define([LT_PROG_GCJ], defn([LT_PROG_GCJ])[LT_LANG(GCJ)])])])])]) - -AC_PROVIDE_IFELSE([AC_PROG_GO], - [LT_LANG(GO)], - [m4_define([AC_PROG_GO], defn([AC_PROG_GO])[LT_LANG(GO)])]) - -AC_PROVIDE_IFELSE([LT_PROG_RC], - [LT_LANG(RC)], - [m4_define([LT_PROG_RC], defn([LT_PROG_RC])[LT_LANG(RC)])]) -])# _LT_LANG_DEFAULT_CONFIG - -# Obsolete macros: -AU_DEFUN([AC_LIBTOOL_CXX], [LT_LANG(C++)]) -AU_DEFUN([AC_LIBTOOL_F77], [LT_LANG(Fortran 77)]) -AU_DEFUN([AC_LIBTOOL_FC], [LT_LANG(Fortran)]) -AU_DEFUN([AC_LIBTOOL_GCJ], [LT_LANG(Java)]) -AU_DEFUN([AC_LIBTOOL_RC], [LT_LANG(Windows Resource)]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_LIBTOOL_CXX], []) -dnl AC_DEFUN([AC_LIBTOOL_F77], []) -dnl AC_DEFUN([AC_LIBTOOL_FC], []) -dnl AC_DEFUN([AC_LIBTOOL_GCJ], []) -dnl AC_DEFUN([AC_LIBTOOL_RC], []) - - -# _LT_TAG_COMPILER -# ---------------- -m4_defun([_LT_TAG_COMPILER], -[AC_REQUIRE([AC_PROG_CC])dnl - -_LT_DECL([LTCC], [CC], [1], [A C compiler])dnl -_LT_DECL([LTCFLAGS], [CFLAGS], [1], [LTCC compiler flags])dnl -_LT_TAGDECL([CC], [compiler], [1], [A language specific compiler])dnl -_LT_TAGDECL([with_gcc], [GCC], [0], [Is the compiler the GNU compiler?])dnl - -# If no C compiler was specified, use CC. -LTCC=${LTCC-"$CC"} - -# If no C compiler flags were specified, use CFLAGS. -LTCFLAGS=${LTCFLAGS-"$CFLAGS"} - -# Allow CC to be a program name with arguments. -compiler=$CC -])# _LT_TAG_COMPILER - - -# _LT_COMPILER_BOILERPLATE -# ------------------------ -# Check for compiler boilerplate output or warnings with -# the simple compiler test code. -m4_defun([_LT_COMPILER_BOILERPLATE], -[m4_require([_LT_DECL_SED])dnl -ac_outfile=conftest.$ac_objext -echo "$lt_simple_compile_test_code" >conftest.$ac_ext -eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err -_lt_compiler_boilerplate=`cat conftest.err` -$RM conftest* -])# _LT_COMPILER_BOILERPLATE - - -# _LT_LINKER_BOILERPLATE -# ---------------------- -# Check for linker boilerplate output or warnings with -# the simple link test code. -m4_defun([_LT_LINKER_BOILERPLATE], -[m4_require([_LT_DECL_SED])dnl -ac_outfile=conftest.$ac_objext -echo "$lt_simple_link_test_code" >conftest.$ac_ext -eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err -_lt_linker_boilerplate=`cat conftest.err` -$RM -r conftest* -])# _LT_LINKER_BOILERPLATE - -# _LT_REQUIRED_DARWIN_CHECKS -# ------------------------- -m4_defun_once([_LT_REQUIRED_DARWIN_CHECKS],[ - case $host_os in - rhapsody* | darwin*) - AC_CHECK_TOOL([DSYMUTIL], [dsymutil], [:]) - AC_CHECK_TOOL([NMEDIT], [nmedit], [:]) - AC_CHECK_TOOL([LIPO], [lipo], [:]) - AC_CHECK_TOOL([OTOOL], [otool], [:]) - AC_CHECK_TOOL([OTOOL64], [otool64], [:]) - _LT_DECL([], [DSYMUTIL], [1], - [Tool to manipulate archived DWARF debug symbol files on Mac OS X]) - _LT_DECL([], [NMEDIT], [1], - [Tool to change global to local symbols on Mac OS X]) - _LT_DECL([], [LIPO], [1], - [Tool to manipulate fat objects and archives on Mac OS X]) - _LT_DECL([], [OTOOL], [1], - [ldd/readelf like tool for Mach-O binaries on Mac OS X]) - _LT_DECL([], [OTOOL64], [1], - [ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4]) - - AC_CACHE_CHECK([for -single_module linker flag],[lt_cv_apple_cc_single_mod], - [lt_cv_apple_cc_single_mod=no - if test -z "${LT_MULTI_MODULE}"; then - # By default we will add the -single_module flag. You can override - # by either setting the environment variable LT_MULTI_MODULE - # non-empty at configure time, or by adding -multi_module to the - # link flags. - rm -rf libconftest.dylib* - echo "int foo(void){return 1;}" > conftest.c - echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ --dynamiclib -Wl,-single_module conftest.c" >&AS_MESSAGE_LOG_FD - $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ - -dynamiclib -Wl,-single_module conftest.c 2>conftest.err - _lt_result=$? - # If there is a non-empty error log, and "single_module" - # appears in it, assume the flag caused a linker warning - if test -s conftest.err && $GREP single_module conftest.err; then - cat conftest.err >&AS_MESSAGE_LOG_FD - # Otherwise, if the output was created with a 0 exit code from - # the compiler, it worked. - elif test -f libconftest.dylib && test $_lt_result -eq 0; then - lt_cv_apple_cc_single_mod=yes - else - cat conftest.err >&AS_MESSAGE_LOG_FD - fi - rm -rf libconftest.dylib* - rm -f conftest.* - fi]) - - AC_CACHE_CHECK([for -exported_symbols_list linker flag], - [lt_cv_ld_exported_symbols_list], - [lt_cv_ld_exported_symbols_list=no - save_LDFLAGS=$LDFLAGS - echo "_main" > conftest.sym - LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" - AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], - [lt_cv_ld_exported_symbols_list=yes], - [lt_cv_ld_exported_symbols_list=no]) - LDFLAGS="$save_LDFLAGS" - ]) - - AC_CACHE_CHECK([for -force_load linker flag],[lt_cv_ld_force_load], - [lt_cv_ld_force_load=no - cat > conftest.c << _LT_EOF -int forced_loaded() { return 2;} -_LT_EOF - echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&AS_MESSAGE_LOG_FD - $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&AS_MESSAGE_LOG_FD - echo "$AR cru libconftest.a conftest.o" >&AS_MESSAGE_LOG_FD - $AR cru libconftest.a conftest.o 2>&AS_MESSAGE_LOG_FD - echo "$RANLIB libconftest.a" >&AS_MESSAGE_LOG_FD - $RANLIB libconftest.a 2>&AS_MESSAGE_LOG_FD - cat > conftest.c << _LT_EOF -int main() { return 0;} -_LT_EOF - echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&AS_MESSAGE_LOG_FD - $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err - _lt_result=$? - if test -s conftest.err && $GREP force_load conftest.err; then - cat conftest.err >&AS_MESSAGE_LOG_FD - elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load conftest >/dev/null 2>&1 ; then - lt_cv_ld_force_load=yes - else - cat conftest.err >&AS_MESSAGE_LOG_FD - fi - rm -f conftest.err libconftest.a conftest conftest.c - rm -rf conftest.dSYM - ]) - case $host_os in - rhapsody* | darwin1.[[012]]) - _lt_dar_allow_undefined='${wl}-undefined ${wl}suppress' ;; - darwin1.*) - _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; - darwin*) # darwin 5.x on - # if running on 10.5 or later, the deployment target defaults - # to the OS version, if on x86, and 10.4, the deployment - # target defaults to 10.4. Don't you love it? - case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in - 10.0,*86*-darwin8*|10.0,*-darwin[[91]]*) - _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; - 10.[[012]]*) - _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; - 10.*) - _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; - esac - ;; - esac - if test "$lt_cv_apple_cc_single_mod" = "yes"; then - _lt_dar_single_mod='$single_module' - fi - if test "$lt_cv_ld_exported_symbols_list" = "yes"; then - _lt_dar_export_syms=' ${wl}-exported_symbols_list,$output_objdir/${libname}-symbols.expsym' - else - _lt_dar_export_syms='~$NMEDIT -s $output_objdir/${libname}-symbols.expsym ${lib}' - fi - if test "$DSYMUTIL" != ":" && test "$lt_cv_ld_force_load" = "no"; then - _lt_dsymutil='~$DSYMUTIL $lib || :' - else - _lt_dsymutil= - fi - ;; - esac -]) - - -# _LT_DARWIN_LINKER_FEATURES([TAG]) -# --------------------------------- -# Checks for linker and compiler features on darwin -m4_defun([_LT_DARWIN_LINKER_FEATURES], -[ - m4_require([_LT_REQUIRED_DARWIN_CHECKS]) - _LT_TAGVAR(archive_cmds_need_lc, $1)=no - _LT_TAGVAR(hardcode_direct, $1)=no - _LT_TAGVAR(hardcode_automatic, $1)=yes - _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported - if test "$lt_cv_ld_force_load" = "yes"; then - _LT_TAGVAR(whole_archive_flag_spec, $1)='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' - m4_case([$1], [F77], [_LT_TAGVAR(compiler_needs_object, $1)=yes], - [FC], [_LT_TAGVAR(compiler_needs_object, $1)=yes]) - else - _LT_TAGVAR(whole_archive_flag_spec, $1)='' - fi - _LT_TAGVAR(link_all_deplibs, $1)=yes - _LT_TAGVAR(allow_undefined_flag, $1)="$_lt_dar_allow_undefined" - case $cc_basename in - ifort*) _lt_dar_can_shared=yes ;; - *) _lt_dar_can_shared=$GCC ;; - esac - if test "$_lt_dar_can_shared" = "yes"; then - output_verbose_link_cmd=func_echo_all - _LT_TAGVAR(archive_cmds, $1)="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}" - _LT_TAGVAR(module_cmds, $1)="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}" - _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}" - _LT_TAGVAR(module_expsym_cmds, $1)="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}" - m4_if([$1], [CXX], -[ if test "$lt_cv_apple_cc_single_mod" != "yes"; then - _LT_TAGVAR(archive_cmds, $1)="\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dsymutil}" - _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dar_export_syms}${_lt_dsymutil}" - fi -],[]) - else - _LT_TAGVAR(ld_shlibs, $1)=no - fi -]) - -# _LT_SYS_MODULE_PATH_AIX([TAGNAME]) -# ---------------------------------- -# Links a minimal program and checks the executable -# for the system default hardcoded library path. In most cases, -# this is /usr/lib:/lib, but when the MPI compilers are used -# the location of the communication and MPI libs are included too. -# If we don't find anything, use the default library path according -# to the aix ld manual. -# Store the results from the different compilers for each TAGNAME. -# Allow to override them for all tags through lt_cv_aix_libpath. -m4_defun([_LT_SYS_MODULE_PATH_AIX], -[m4_require([_LT_DECL_SED])dnl -if test "${lt_cv_aix_libpath+set}" = set; then - aix_libpath=$lt_cv_aix_libpath -else - AC_CACHE_VAL([_LT_TAGVAR([lt_cv_aix_libpath_], [$1])], - [AC_LINK_IFELSE([AC_LANG_PROGRAM],[ - lt_aix_libpath_sed='[ - /Import File Strings/,/^$/ { - /^0/ { - s/^0 *\([^ ]*\) *$/\1/ - p - } - }]' - _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` - # Check for a 64-bit object if we didn't find anything. - if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then - _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` - fi],[]) - if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then - _LT_TAGVAR([lt_cv_aix_libpath_], [$1])="/usr/lib:/lib" - fi - ]) - aix_libpath=$_LT_TAGVAR([lt_cv_aix_libpath_], [$1]) -fi -])# _LT_SYS_MODULE_PATH_AIX - - -# _LT_SHELL_INIT(ARG) -# ------------------- -m4_define([_LT_SHELL_INIT], -[m4_divert_text([M4SH-INIT], [$1 -])])# _LT_SHELL_INIT - - - -# _LT_PROG_ECHO_BACKSLASH -# ----------------------- -# Find how we can fake an echo command that does not interpret backslash. -# In particular, with Autoconf 2.60 or later we add some code to the start -# of the generated configure script which will find a shell with a builtin -# printf (which we can use as an echo command). -m4_defun([_LT_PROG_ECHO_BACKSLASH], -[ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' -ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO -ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO - -AC_MSG_CHECKING([how to print strings]) -# Test print first, because it will be a builtin if present. -if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \ - test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then - ECHO='print -r --' -elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then - ECHO='printf %s\n' -else - # Use this function as a fallback that always works. - func_fallback_echo () - { - eval 'cat <<_LTECHO_EOF -$[]1 -_LTECHO_EOF' - } - ECHO='func_fallback_echo' -fi - -# func_echo_all arg... -# Invoke $ECHO with all args, space-separated. -func_echo_all () -{ - $ECHO "$*" -} - -case "$ECHO" in - printf*) AC_MSG_RESULT([printf]) ;; - print*) AC_MSG_RESULT([print -r]) ;; - *) AC_MSG_RESULT([cat]) ;; -esac - -m4_ifdef([_AS_DETECT_SUGGESTED], -[_AS_DETECT_SUGGESTED([ - test -n "${ZSH_VERSION+set}${BASH_VERSION+set}" || ( - ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' - ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO - ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO - PATH=/empty FPATH=/empty; export PATH FPATH - test "X`printf %s $ECHO`" = "X$ECHO" \ - || test "X`print -r -- $ECHO`" = "X$ECHO" )])]) - -_LT_DECL([], [SHELL], [1], [Shell to use when invoking shell scripts]) -_LT_DECL([], [ECHO], [1], [An echo program that protects backslashes]) -])# _LT_PROG_ECHO_BACKSLASH - - -# _LT_WITH_SYSROOT -# ---------------- -AC_DEFUN([_LT_WITH_SYSROOT], -[AC_MSG_CHECKING([for sysroot]) -AC_ARG_WITH([sysroot], -[ --with-sysroot[=DIR] Search for dependent libraries within DIR - (or the compiler's sysroot if not specified).], -[], [with_sysroot=no]) - -dnl lt_sysroot will always be passed unquoted. We quote it here -dnl in case the user passed a directory name. -lt_sysroot= -case ${with_sysroot} in #( - yes) - if test "$GCC" = yes; then - lt_sysroot=`$CC --print-sysroot 2>/dev/null` - fi - ;; #( - /*) - lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"` - ;; #( - no|'') - ;; #( - *) - AC_MSG_RESULT([${with_sysroot}]) - AC_MSG_ERROR([The sysroot must be an absolute path.]) - ;; -esac - - AC_MSG_RESULT([${lt_sysroot:-no}]) -_LT_DECL([], [lt_sysroot], [0], [The root where to search for ]dnl -[dependent libraries, and in which our libraries should be installed.])]) - -# _LT_ENABLE_LOCK -# --------------- -m4_defun([_LT_ENABLE_LOCK], -[AC_ARG_ENABLE([libtool-lock], - [AS_HELP_STRING([--disable-libtool-lock], - [avoid locking (might break parallel builds)])]) -test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes - -# Some flags need to be propagated to the compiler or linker for good -# libtool support. -case $host in -ia64-*-hpux*) - # Find out which ABI we are using. - echo 'int i;' > conftest.$ac_ext - if AC_TRY_EVAL(ac_compile); then - case `/usr/bin/file conftest.$ac_objext` in - *ELF-32*) - HPUX_IA64_MODE="32" - ;; - *ELF-64*) - HPUX_IA64_MODE="64" - ;; - esac - fi - rm -rf conftest* - ;; -*-*-irix6*) - # Find out which ABI we are using. - echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext - if AC_TRY_EVAL(ac_compile); then - if test "$lt_cv_prog_gnu_ld" = yes; then - case `/usr/bin/file conftest.$ac_objext` in - *32-bit*) - LD="${LD-ld} -melf32bsmip" - ;; - *N32*) - LD="${LD-ld} -melf32bmipn32" - ;; - *64-bit*) - LD="${LD-ld} -melf64bmip" - ;; - esac - else - case `/usr/bin/file conftest.$ac_objext` in - *32-bit*) - LD="${LD-ld} -32" - ;; - *N32*) - LD="${LD-ld} -n32" - ;; - *64-bit*) - LD="${LD-ld} -64" - ;; - esac - fi - fi - rm -rf conftest* - ;; - -x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \ -s390*-*linux*|s390*-*tpf*|sparc*-*linux*) - # Find out which ABI we are using. - echo 'int i;' > conftest.$ac_ext - if AC_TRY_EVAL(ac_compile); then - case `/usr/bin/file conftest.o` in - *32-bit*) - case $host in - x86_64-*kfreebsd*-gnu) - LD="${LD-ld} -m elf_i386_fbsd" - ;; - x86_64-*linux*) - LD="${LD-ld} -m elf_i386" - ;; - ppc64-*linux*|powerpc64-*linux*) - LD="${LD-ld} -m elf32ppclinux" - ;; - s390x-*linux*) - LD="${LD-ld} -m elf_s390" - ;; - sparc64-*linux*) - LD="${LD-ld} -m elf32_sparc" - ;; - esac - ;; - *64-bit*) - case $host in - x86_64-*kfreebsd*-gnu) - LD="${LD-ld} -m elf_x86_64_fbsd" - ;; - x86_64-*linux*) - LD="${LD-ld} -m elf_x86_64" - ;; - ppc*-*linux*|powerpc*-*linux*) - LD="${LD-ld} -m elf64ppc" - ;; - s390*-*linux*|s390*-*tpf*) - LD="${LD-ld} -m elf64_s390" - ;; - sparc*-*linux*) - LD="${LD-ld} -m elf64_sparc" - ;; - esac - ;; - esac - fi - rm -rf conftest* - ;; - -*-*-sco3.2v5*) - # On SCO OpenServer 5, we need -belf to get full-featured binaries. - SAVE_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS -belf" - AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf, - [AC_LANG_PUSH(C) - AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no]) - AC_LANG_POP]) - if test x"$lt_cv_cc_needs_belf" != x"yes"; then - # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf - CFLAGS="$SAVE_CFLAGS" - fi - ;; -*-*solaris*) - # Find out which ABI we are using. - echo 'int i;' > conftest.$ac_ext - if AC_TRY_EVAL(ac_compile); then - case `/usr/bin/file conftest.o` in - *64-bit*) - case $lt_cv_prog_gnu_ld in - yes*) - case $host in - i?86-*-solaris*) - LD="${LD-ld} -m elf_x86_64" - ;; - sparc*-*-solaris*) - LD="${LD-ld} -m elf64_sparc" - ;; - esac - # GNU ld 2.21 introduced _sol2 emulations. Use them if available. - if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then - LD="${LD-ld}_sol2" - fi - ;; - *) - if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then - LD="${LD-ld} -64" - fi - ;; - esac - ;; - esac - fi - rm -rf conftest* - ;; -esac - -need_locks="$enable_libtool_lock" -])# _LT_ENABLE_LOCK - - -# _LT_PROG_AR -# ----------- -m4_defun([_LT_PROG_AR], -[AC_CHECK_TOOLS(AR, [ar], false) -: ${AR=ar} -: ${AR_FLAGS=cru} -_LT_DECL([], [AR], [1], [The archiver]) -_LT_DECL([], [AR_FLAGS], [1], [Flags to create an archive]) - -AC_CACHE_CHECK([for archiver @FILE support], [lt_cv_ar_at_file], - [lt_cv_ar_at_file=no - AC_COMPILE_IFELSE([AC_LANG_PROGRAM], - [echo conftest.$ac_objext > conftest.lst - lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&AS_MESSAGE_LOG_FD' - AC_TRY_EVAL([lt_ar_try]) - if test "$ac_status" -eq 0; then - # Ensure the archiver fails upon bogus file names. - rm -f conftest.$ac_objext libconftest.a - AC_TRY_EVAL([lt_ar_try]) - if test "$ac_status" -ne 0; then - lt_cv_ar_at_file=@ - fi - fi - rm -f conftest.* libconftest.a - ]) - ]) - -if test "x$lt_cv_ar_at_file" = xno; then - archiver_list_spec= -else - archiver_list_spec=$lt_cv_ar_at_file -fi -_LT_DECL([], [archiver_list_spec], [1], - [How to feed a file listing to the archiver]) -])# _LT_PROG_AR - - -# _LT_CMD_OLD_ARCHIVE -# ------------------- -m4_defun([_LT_CMD_OLD_ARCHIVE], -[_LT_PROG_AR - -AC_CHECK_TOOL(STRIP, strip, :) -test -z "$STRIP" && STRIP=: -_LT_DECL([], [STRIP], [1], [A symbol stripping program]) - -AC_CHECK_TOOL(RANLIB, ranlib, :) -test -z "$RANLIB" && RANLIB=: -_LT_DECL([], [RANLIB], [1], - [Commands used to install an old-style archive]) - -# Determine commands to create old-style static archives. -old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' -old_postinstall_cmds='chmod 644 $oldlib' -old_postuninstall_cmds= - -if test -n "$RANLIB"; then - case $host_os in - openbsd*) - old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib" - ;; - *) - old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" - ;; - esac - old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib" -fi - -case $host_os in - darwin*) - lock_old_archive_extraction=yes ;; - *) - lock_old_archive_extraction=no ;; -esac -_LT_DECL([], [old_postinstall_cmds], [2]) -_LT_DECL([], [old_postuninstall_cmds], [2]) -_LT_TAGDECL([], [old_archive_cmds], [2], - [Commands used to build an old-style archive]) -_LT_DECL([], [lock_old_archive_extraction], [0], - [Whether to use a lock for old archive extraction]) -])# _LT_CMD_OLD_ARCHIVE - - -# _LT_COMPILER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, -# [OUTPUT-FILE], [ACTION-SUCCESS], [ACTION-FAILURE]) -# ---------------------------------------------------------------- -# Check whether the given compiler option works -AC_DEFUN([_LT_COMPILER_OPTION], -[m4_require([_LT_FILEUTILS_DEFAULTS])dnl -m4_require([_LT_DECL_SED])dnl -AC_CACHE_CHECK([$1], [$2], - [$2=no - m4_if([$4], , [ac_outfile=conftest.$ac_objext], [ac_outfile=$4]) - echo "$lt_simple_compile_test_code" > conftest.$ac_ext - lt_compiler_flag="$3" - # Insert the option either (1) after the last *FLAGS variable, or - # (2) before a word containing "conftest.", or (3) at the end. - # Note that $ac_compile itself does not contain backslashes and begins - # with a dollar sign (not a hyphen), so the echo should work correctly. - # The option is referenced via a variable to avoid confusing sed. - lt_compile=`echo "$ac_compile" | $SED \ - -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ - -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ - -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) - (eval "$lt_compile" 2>conftest.err) - ac_status=$? - cat conftest.err >&AS_MESSAGE_LOG_FD - echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD - if (exit $ac_status) && test -s "$ac_outfile"; then - # The compiler can only warn and ignore the option if not recognized - # So say no if there are warnings other than the usual output. - $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp - $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 - if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then - $2=yes - fi - fi - $RM conftest* -]) - -if test x"[$]$2" = xyes; then - m4_if([$5], , :, [$5]) -else - m4_if([$6], , :, [$6]) -fi -])# _LT_COMPILER_OPTION - -# Old name: -AU_ALIAS([AC_LIBTOOL_COMPILER_OPTION], [_LT_COMPILER_OPTION]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_LIBTOOL_COMPILER_OPTION], []) - - -# _LT_LINKER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, -# [ACTION-SUCCESS], [ACTION-FAILURE]) -# ---------------------------------------------------- -# Check whether the given linker option works -AC_DEFUN([_LT_LINKER_OPTION], -[m4_require([_LT_FILEUTILS_DEFAULTS])dnl -m4_require([_LT_DECL_SED])dnl -AC_CACHE_CHECK([$1], [$2], - [$2=no - save_LDFLAGS="$LDFLAGS" - LDFLAGS="$LDFLAGS $3" - echo "$lt_simple_link_test_code" > conftest.$ac_ext - if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then - # The linker can only warn and ignore the option if not recognized - # So say no if there are warnings - if test -s conftest.err; then - # Append any errors to the config.log. - cat conftest.err 1>&AS_MESSAGE_LOG_FD - $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp - $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 - if diff conftest.exp conftest.er2 >/dev/null; then - $2=yes - fi - else - $2=yes - fi - fi - $RM -r conftest* - LDFLAGS="$save_LDFLAGS" -]) - -if test x"[$]$2" = xyes; then - m4_if([$4], , :, [$4]) -else - m4_if([$5], , :, [$5]) -fi -])# _LT_LINKER_OPTION - -# Old name: -AU_ALIAS([AC_LIBTOOL_LINKER_OPTION], [_LT_LINKER_OPTION]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_LIBTOOL_LINKER_OPTION], []) - - -# LT_CMD_MAX_LEN -#--------------- -AC_DEFUN([LT_CMD_MAX_LEN], -[AC_REQUIRE([AC_CANONICAL_HOST])dnl -# find the maximum length of command line arguments -AC_MSG_CHECKING([the maximum length of command line arguments]) -AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl - i=0 - teststring="ABCD" - - case $build_os in - msdosdjgpp*) - # On DJGPP, this test can blow up pretty badly due to problems in libc - # (any single argument exceeding 2000 bytes causes a buffer overrun - # during glob expansion). Even if it were fixed, the result of this - # check would be larger than it should be. - lt_cv_sys_max_cmd_len=12288; # 12K is about right - ;; - - gnu*) - # Under GNU Hurd, this test is not required because there is - # no limit to the length of command line arguments. - # Libtool will interpret -1 as no limit whatsoever - lt_cv_sys_max_cmd_len=-1; - ;; - - cygwin* | mingw* | cegcc*) - # On Win9x/ME, this test blows up -- it succeeds, but takes - # about 5 minutes as the teststring grows exponentially. - # Worse, since 9x/ME are not pre-emptively multitasking, - # you end up with a "frozen" computer, even though with patience - # the test eventually succeeds (with a max line length of 256k). - # Instead, let's just punt: use the minimum linelength reported by - # all of the supported platforms: 8192 (on NT/2K/XP). - lt_cv_sys_max_cmd_len=8192; - ;; - - mint*) - # On MiNT this can take a long time and run out of memory. - lt_cv_sys_max_cmd_len=8192; - ;; - - amigaos*) - # On AmigaOS with pdksh, this test takes hours, literally. - # So we just punt and use a minimum line length of 8192. - lt_cv_sys_max_cmd_len=8192; - ;; - - netbsd* | freebsd* | openbsd* | darwin* | dragonfly*) - # This has been around since 386BSD, at least. Likely further. - if test -x /sbin/sysctl; then - lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` - elif test -x /usr/sbin/sysctl; then - lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` - else - lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs - fi - # And add a safety zone - lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` - lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` - ;; - - interix*) - # We know the value 262144 and hardcode it with a safety zone (like BSD) - lt_cv_sys_max_cmd_len=196608 - ;; - - os2*) - # The test takes a long time on OS/2. - lt_cv_sys_max_cmd_len=8192 - ;; - - osf*) - # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure - # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not - # nice to cause kernel panics so lets avoid the loop below. - # First set a reasonable default. - lt_cv_sys_max_cmd_len=16384 - # - if test -x /sbin/sysconfig; then - case `/sbin/sysconfig -q proc exec_disable_arg_limit` in - *1*) lt_cv_sys_max_cmd_len=-1 ;; - esac - fi - ;; - sco3.2v5*) - lt_cv_sys_max_cmd_len=102400 - ;; - sysv5* | sco5v6* | sysv4.2uw2*) - kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` - if test -n "$kargmax"; then - lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[[ ]]//'` - else - lt_cv_sys_max_cmd_len=32768 - fi - ;; - *) - lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` - if test -n "$lt_cv_sys_max_cmd_len"; then - lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` - lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` - else - # Make teststring a little bigger before we do anything with it. - # a 1K string should be a reasonable start. - for i in 1 2 3 4 5 6 7 8 ; do - teststring=$teststring$teststring - done - SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} - # If test is not a shell built-in, we'll probably end up computing a - # maximum length that is only half of the actual maximum length, but - # we can't tell. - while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \ - = "X$teststring$teststring"; } >/dev/null 2>&1 && - test $i != 17 # 1/2 MB should be enough - do - i=`expr $i + 1` - teststring=$teststring$teststring - done - # Only check the string length outside the loop. - lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1` - teststring= - # Add a significant safety factor because C++ compilers can tack on - # massive amounts of additional arguments before passing them to the - # linker. It appears as though 1/2 is a usable value. - lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` - fi - ;; - esac -]) -if test -n $lt_cv_sys_max_cmd_len ; then - AC_MSG_RESULT($lt_cv_sys_max_cmd_len) -else - AC_MSG_RESULT(none) -fi -max_cmd_len=$lt_cv_sys_max_cmd_len -_LT_DECL([], [max_cmd_len], [0], - [What is the maximum length of a command?]) -])# LT_CMD_MAX_LEN - -# Old name: -AU_ALIAS([AC_LIBTOOL_SYS_MAX_CMD_LEN], [LT_CMD_MAX_LEN]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_LIBTOOL_SYS_MAX_CMD_LEN], []) - - -# _LT_HEADER_DLFCN -# ---------------- -m4_defun([_LT_HEADER_DLFCN], -[AC_CHECK_HEADERS([dlfcn.h], [], [], [AC_INCLUDES_DEFAULT])dnl -])# _LT_HEADER_DLFCN - - -# _LT_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE, -# ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING) -# ---------------------------------------------------------------- -m4_defun([_LT_TRY_DLOPEN_SELF], -[m4_require([_LT_HEADER_DLFCN])dnl -if test "$cross_compiling" = yes; then : - [$4] -else - lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 - lt_status=$lt_dlunknown - cat > conftest.$ac_ext <<_LT_EOF -[#line $LINENO "configure" -#include "confdefs.h" - -#if HAVE_DLFCN_H -#include -#endif - -#include - -#ifdef RTLD_GLOBAL -# define LT_DLGLOBAL RTLD_GLOBAL -#else -# ifdef DL_GLOBAL -# define LT_DLGLOBAL DL_GLOBAL -# else -# define LT_DLGLOBAL 0 -# endif -#endif - -/* We may have to define LT_DLLAZY_OR_NOW in the command line if we - find out it does not work in some platform. */ -#ifndef LT_DLLAZY_OR_NOW -# ifdef RTLD_LAZY -# define LT_DLLAZY_OR_NOW RTLD_LAZY -# else -# ifdef DL_LAZY -# define LT_DLLAZY_OR_NOW DL_LAZY -# else -# ifdef RTLD_NOW -# define LT_DLLAZY_OR_NOW RTLD_NOW -# else -# ifdef DL_NOW -# define LT_DLLAZY_OR_NOW DL_NOW -# else -# define LT_DLLAZY_OR_NOW 0 -# endif -# endif -# endif -# endif -#endif - -/* When -fvisbility=hidden is used, assume the code has been annotated - correspondingly for the symbols needed. */ -#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) -int fnord () __attribute__((visibility("default"))); -#endif - -int fnord () { return 42; } -int main () -{ - void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); - int status = $lt_dlunknown; - - if (self) - { - if (dlsym (self,"fnord")) status = $lt_dlno_uscore; - else - { - if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; - else puts (dlerror ()); - } - /* dlclose (self); */ - } - else - puts (dlerror ()); - - return status; -}] -_LT_EOF - if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext} 2>/dev/null; then - (./conftest; exit; ) >&AS_MESSAGE_LOG_FD 2>/dev/null - lt_status=$? - case x$lt_status in - x$lt_dlno_uscore) $1 ;; - x$lt_dlneed_uscore) $2 ;; - x$lt_dlunknown|x*) $3 ;; - esac - else : - # compilation failed - $3 - fi -fi -rm -fr conftest* -])# _LT_TRY_DLOPEN_SELF - - -# LT_SYS_DLOPEN_SELF -# ------------------ -AC_DEFUN([LT_SYS_DLOPEN_SELF], -[m4_require([_LT_HEADER_DLFCN])dnl -if test "x$enable_dlopen" != xyes; then - enable_dlopen=unknown - enable_dlopen_self=unknown - enable_dlopen_self_static=unknown -else - lt_cv_dlopen=no - lt_cv_dlopen_libs= - - case $host_os in - beos*) - lt_cv_dlopen="load_add_on" - lt_cv_dlopen_libs= - lt_cv_dlopen_self=yes - ;; - - mingw* | pw32* | cegcc*) - lt_cv_dlopen="LoadLibrary" - lt_cv_dlopen_libs= - ;; - - cygwin*) - lt_cv_dlopen="dlopen" - lt_cv_dlopen_libs= - ;; - - darwin*) - # if libdl is installed we need to link against it - AC_CHECK_LIB([dl], [dlopen], - [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],[ - lt_cv_dlopen="dyld" - lt_cv_dlopen_libs= - lt_cv_dlopen_self=yes - ]) - ;; - - *) - AC_CHECK_FUNC([shl_load], - [lt_cv_dlopen="shl_load"], - [AC_CHECK_LIB([dld], [shl_load], - [lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld"], - [AC_CHECK_FUNC([dlopen], - [lt_cv_dlopen="dlopen"], - [AC_CHECK_LIB([dl], [dlopen], - [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"], - [AC_CHECK_LIB([svld], [dlopen], - [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"], - [AC_CHECK_LIB([dld], [dld_link], - [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld"]) - ]) - ]) - ]) - ]) - ]) - ;; - esac - - if test "x$lt_cv_dlopen" != xno; then - enable_dlopen=yes - else - enable_dlopen=no - fi - - case $lt_cv_dlopen in - dlopen) - save_CPPFLAGS="$CPPFLAGS" - test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" - - save_LDFLAGS="$LDFLAGS" - wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" - - save_LIBS="$LIBS" - LIBS="$lt_cv_dlopen_libs $LIBS" - - AC_CACHE_CHECK([whether a program can dlopen itself], - lt_cv_dlopen_self, [dnl - _LT_TRY_DLOPEN_SELF( - lt_cv_dlopen_self=yes, lt_cv_dlopen_self=yes, - lt_cv_dlopen_self=no, lt_cv_dlopen_self=cross) - ]) - - if test "x$lt_cv_dlopen_self" = xyes; then - wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" - AC_CACHE_CHECK([whether a statically linked program can dlopen itself], - lt_cv_dlopen_self_static, [dnl - _LT_TRY_DLOPEN_SELF( - lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=yes, - lt_cv_dlopen_self_static=no, lt_cv_dlopen_self_static=cross) - ]) - fi - - CPPFLAGS="$save_CPPFLAGS" - LDFLAGS="$save_LDFLAGS" - LIBS="$save_LIBS" - ;; - esac - - case $lt_cv_dlopen_self in - yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; - *) enable_dlopen_self=unknown ;; - esac - - case $lt_cv_dlopen_self_static in - yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; - *) enable_dlopen_self_static=unknown ;; - esac -fi -_LT_DECL([dlopen_support], [enable_dlopen], [0], - [Whether dlopen is supported]) -_LT_DECL([dlopen_self], [enable_dlopen_self], [0], - [Whether dlopen of programs is supported]) -_LT_DECL([dlopen_self_static], [enable_dlopen_self_static], [0], - [Whether dlopen of statically linked programs is supported]) -])# LT_SYS_DLOPEN_SELF - -# Old name: -AU_ALIAS([AC_LIBTOOL_DLOPEN_SELF], [LT_SYS_DLOPEN_SELF]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_LIBTOOL_DLOPEN_SELF], []) - - -# _LT_COMPILER_C_O([TAGNAME]) -# --------------------------- -# Check to see if options -c and -o are simultaneously supported by compiler. -# This macro does not hard code the compiler like AC_PROG_CC_C_O. -m4_defun([_LT_COMPILER_C_O], -[m4_require([_LT_DECL_SED])dnl -m4_require([_LT_FILEUTILS_DEFAULTS])dnl -m4_require([_LT_TAG_COMPILER])dnl -AC_CACHE_CHECK([if $compiler supports -c -o file.$ac_objext], - [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)], - [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=no - $RM -r conftest 2>/dev/null - mkdir conftest - cd conftest - mkdir out - echo "$lt_simple_compile_test_code" > conftest.$ac_ext - - lt_compiler_flag="-o out/conftest2.$ac_objext" - # Insert the option either (1) after the last *FLAGS variable, or - # (2) before a word containing "conftest.", or (3) at the end. - # Note that $ac_compile itself does not contain backslashes and begins - # with a dollar sign (not a hyphen), so the echo should work correctly. - lt_compile=`echo "$ac_compile" | $SED \ - -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ - -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ - -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) - (eval "$lt_compile" 2>out/conftest.err) - ac_status=$? - cat out/conftest.err >&AS_MESSAGE_LOG_FD - echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD - if (exit $ac_status) && test -s out/conftest2.$ac_objext - then - # The compiler can only warn and ignore the option if not recognized - # So say no if there are warnings - $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp - $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 - if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then - _LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes - fi - fi - chmod u+w . 2>&AS_MESSAGE_LOG_FD - $RM conftest* - # SGI C++ compiler will create directory out/ii_files/ for - # template instantiation - test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files - $RM out/* && rmdir out - cd .. - $RM -r conftest - $RM conftest* -]) -_LT_TAGDECL([compiler_c_o], [lt_cv_prog_compiler_c_o], [1], - [Does compiler simultaneously support -c and -o options?]) -])# _LT_COMPILER_C_O - - -# _LT_COMPILER_FILE_LOCKS([TAGNAME]) -# ---------------------------------- -# Check to see if we can do hard links to lock some files if needed -m4_defun([_LT_COMPILER_FILE_LOCKS], -[m4_require([_LT_ENABLE_LOCK])dnl -m4_require([_LT_FILEUTILS_DEFAULTS])dnl -_LT_COMPILER_C_O([$1]) - -hard_links="nottested" -if test "$_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)" = no && test "$need_locks" != no; then - # do not overwrite the value of need_locks provided by the user - AC_MSG_CHECKING([if we can lock with hard links]) - hard_links=yes - $RM conftest* - ln conftest.a conftest.b 2>/dev/null && hard_links=no - touch conftest.a - ln conftest.a conftest.b 2>&5 || hard_links=no - ln conftest.a conftest.b 2>/dev/null && hard_links=no - AC_MSG_RESULT([$hard_links]) - if test "$hard_links" = no; then - AC_MSG_WARN([`$CC' does not support `-c -o', so `make -j' may be unsafe]) - need_locks=warn - fi -else - need_locks=no -fi -_LT_DECL([], [need_locks], [1], [Must we lock files when doing compilation?]) -])# _LT_COMPILER_FILE_LOCKS - - -# _LT_CHECK_OBJDIR -# ---------------- -m4_defun([_LT_CHECK_OBJDIR], -[AC_CACHE_CHECK([for objdir], [lt_cv_objdir], -[rm -f .libs 2>/dev/null -mkdir .libs 2>/dev/null -if test -d .libs; then - lt_cv_objdir=.libs -else - # MS-DOS does not allow filenames that begin with a dot. - lt_cv_objdir=_libs -fi -rmdir .libs 2>/dev/null]) -objdir=$lt_cv_objdir -_LT_DECL([], [objdir], [0], - [The name of the directory that contains temporary libtool files])dnl -m4_pattern_allow([LT_OBJDIR])dnl -AC_DEFINE_UNQUOTED(LT_OBJDIR, "$lt_cv_objdir/", - [Define to the sub-directory in which libtool stores uninstalled libraries.]) -])# _LT_CHECK_OBJDIR - - -# _LT_LINKER_HARDCODE_LIBPATH([TAGNAME]) -# -------------------------------------- -# Check hardcoding attributes. -m4_defun([_LT_LINKER_HARDCODE_LIBPATH], -[AC_MSG_CHECKING([how to hardcode library paths into programs]) -_LT_TAGVAR(hardcode_action, $1)= -if test -n "$_LT_TAGVAR(hardcode_libdir_flag_spec, $1)" || - test -n "$_LT_TAGVAR(runpath_var, $1)" || - test "X$_LT_TAGVAR(hardcode_automatic, $1)" = "Xyes" ; then - - # We can hardcode non-existent directories. - if test "$_LT_TAGVAR(hardcode_direct, $1)" != no && - # If the only mechanism to avoid hardcoding is shlibpath_var, we - # have to relink, otherwise we might link with an installed library - # when we should be linking with a yet-to-be-installed one - ## test "$_LT_TAGVAR(hardcode_shlibpath_var, $1)" != no && - test "$_LT_TAGVAR(hardcode_minus_L, $1)" != no; then - # Linking always hardcodes the temporary library directory. - _LT_TAGVAR(hardcode_action, $1)=relink - else - # We can link without hardcoding, and we can hardcode nonexisting dirs. - _LT_TAGVAR(hardcode_action, $1)=immediate - fi -else - # We cannot hardcode anything, or else we can only hardcode existing - # directories. - _LT_TAGVAR(hardcode_action, $1)=unsupported -fi -AC_MSG_RESULT([$_LT_TAGVAR(hardcode_action, $1)]) - -if test "$_LT_TAGVAR(hardcode_action, $1)" = relink || - test "$_LT_TAGVAR(inherit_rpath, $1)" = yes; then - # Fast installation is not supported - enable_fast_install=no -elif test "$shlibpath_overrides_runpath" = yes || - test "$enable_shared" = no; then - # Fast installation is not necessary - enable_fast_install=needless -fi -_LT_TAGDECL([], [hardcode_action], [0], - [How to hardcode a shared library path into an executable]) -])# _LT_LINKER_HARDCODE_LIBPATH - - -# _LT_CMD_STRIPLIB -# ---------------- -m4_defun([_LT_CMD_STRIPLIB], -[m4_require([_LT_DECL_EGREP]) -striplib= -old_striplib= -AC_MSG_CHECKING([whether stripping libraries is possible]) -if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then - test -z "$old_striplib" && old_striplib="$STRIP --strip-debug" - test -z "$striplib" && striplib="$STRIP --strip-unneeded" - AC_MSG_RESULT([yes]) -else -# FIXME - insert some real tests, host_os isn't really good enough - case $host_os in - darwin*) - if test -n "$STRIP" ; then - striplib="$STRIP -x" - old_striplib="$STRIP -S" - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - fi - ;; - *) - AC_MSG_RESULT([no]) - ;; - esac -fi -_LT_DECL([], [old_striplib], [1], [Commands to strip libraries]) -_LT_DECL([], [striplib], [1]) -])# _LT_CMD_STRIPLIB - - -# _LT_SYS_DYNAMIC_LINKER([TAG]) -# ----------------------------- -# PORTME Fill in your ld.so characteristics -m4_defun([_LT_SYS_DYNAMIC_LINKER], -[AC_REQUIRE([AC_CANONICAL_HOST])dnl -m4_require([_LT_DECL_EGREP])dnl -m4_require([_LT_FILEUTILS_DEFAULTS])dnl -m4_require([_LT_DECL_OBJDUMP])dnl -m4_require([_LT_DECL_SED])dnl -m4_require([_LT_CHECK_SHELL_FEATURES])dnl -AC_MSG_CHECKING([dynamic linker characteristics]) -m4_if([$1], - [], [ -if test "$GCC" = yes; then - case $host_os in - darwin*) lt_awk_arg="/^libraries:/,/LR/" ;; - *) lt_awk_arg="/^libraries:/" ;; - esac - case $host_os in - mingw* | cegcc*) lt_sed_strip_eq="s,=\([[A-Za-z]]:\),\1,g" ;; - *) lt_sed_strip_eq="s,=/,/,g" ;; - esac - lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` - case $lt_search_path_spec in - *\;*) - # if the path contains ";" then we assume it to be the separator - # otherwise default to the standard path separator (i.e. ":") - it is - # assumed that no part of a normal pathname contains ";" but that should - # okay in the real world where ";" in dirpaths is itself problematic. - lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'` - ;; - *) - lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"` - ;; - esac - # Ok, now we have the path, separated by spaces, we can step through it - # and add multilib dir if necessary. - lt_tmp_lt_search_path_spec= - lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` - for lt_sys_path in $lt_search_path_spec; do - if test -d "$lt_sys_path/$lt_multi_os_dir"; then - lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir" - else - test -d "$lt_sys_path" && \ - lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" - fi - done - lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk ' -BEGIN {RS=" "; FS="/|\n";} { - lt_foo=""; - lt_count=0; - for (lt_i = NF; lt_i > 0; lt_i--) { - if ($lt_i != "" && $lt_i != ".") { - if ($lt_i == "..") { - lt_count++; - } else { - if (lt_count == 0) { - lt_foo="/" $lt_i lt_foo; - } else { - lt_count--; - } - } - } - } - if (lt_foo != "") { lt_freq[[lt_foo]]++; } - if (lt_freq[[lt_foo]] == 1) { print lt_foo; } -}'` - # AWK program above erroneously prepends '/' to C:/dos/paths - # for these hosts. - case $host_os in - mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ - $SED 's,/\([[A-Za-z]]:\),\1,g'` ;; - esac - sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` -else - sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" -fi]) -library_names_spec= -libname_spec='lib$name' -soname_spec= -shrext_cmds=".so" -postinstall_cmds= -postuninstall_cmds= -finish_cmds= -finish_eval= -shlibpath_var= -shlibpath_overrides_runpath=unknown -version_type=none -dynamic_linker="$host_os ld.so" -sys_lib_dlsearch_path_spec="/lib /usr/lib" -need_lib_prefix=unknown -hardcode_into_libs=no - -# when you set need_version to no, make sure it does not cause -set_version -# flags to be left without arguments -need_version=unknown - -case $host_os in -aix3*) - version_type=linux # correct to gnu/linux during the next big refactor - library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a' - shlibpath_var=LIBPATH - - # AIX 3 has no versioning support, so we append a major version to the name. - soname_spec='${libname}${release}${shared_ext}$major' - ;; - -aix[[4-9]]*) - version_type=linux # correct to gnu/linux during the next big refactor - need_lib_prefix=no - need_version=no - hardcode_into_libs=yes - if test "$host_cpu" = ia64; then - # AIX 5 supports IA64 - library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}' - shlibpath_var=LD_LIBRARY_PATH - else - # With GCC up to 2.95.x, collect2 would create an import file - # for dependence libraries. The import file would start with - # the line `#! .'. This would cause the generated library to - # depend on `.', always an invalid library. This was fixed in - # development snapshots of GCC prior to 3.0. - case $host_os in - aix4 | aix4.[[01]] | aix4.[[01]].*) - if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' - echo ' yes ' - echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then - : - else - can_build_shared=no - fi - ;; - esac - # AIX (on Power*) has no versioning support, so currently we can not hardcode correct - # soname into executable. Probably we can add versioning support to - # collect2, so additional links can be useful in future. - if test "$aix_use_runtimelinking" = yes; then - # If using run time linking (on AIX 4.2 or later) use lib.so - # instead of lib.a to let people know that these are not - # typical AIX shared libraries. - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' - else - # We preserve .a as extension for shared libraries through AIX4.2 - # and later when we are not doing run time linking. - library_names_spec='${libname}${release}.a $libname.a' - soname_spec='${libname}${release}${shared_ext}$major' - fi - shlibpath_var=LIBPATH - fi - ;; - -amigaos*) - case $host_cpu in - powerpc) - # Since July 2007 AmigaOS4 officially supports .so libraries. - # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' - ;; - m68k) - library_names_spec='$libname.ixlibrary $libname.a' - # Create ${libname}_ixlibrary.a entries in /sys/libs. - finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([[^/]]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' - ;; - esac - ;; - -beos*) - library_names_spec='${libname}${shared_ext}' - dynamic_linker="$host_os ld.so" - shlibpath_var=LIBRARY_PATH - ;; - -bsdi[[45]]*) - version_type=linux # correct to gnu/linux during the next big refactor - need_version=no - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' - soname_spec='${libname}${release}${shared_ext}$major' - finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' - shlibpath_var=LD_LIBRARY_PATH - sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" - sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" - # the default ld.so.conf also contains /usr/contrib/lib and - # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow - # libtool to hard-code these into programs - ;; - -cygwin* | mingw* | pw32* | cegcc*) - version_type=windows - shrext_cmds=".dll" - need_version=no - need_lib_prefix=no - - case $GCC,$cc_basename in - yes,*) - # gcc - library_names_spec='$libname.dll.a' - # DLL is installed to $(libdir)/../bin by postinstall_cmds - postinstall_cmds='base_file=`basename \${file}`~ - dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ - dldir=$destdir/`dirname \$dlpath`~ - test -d \$dldir || mkdir -p \$dldir~ - $install_prog $dir/$dlname \$dldir/$dlname~ - chmod a+x \$dldir/$dlname~ - if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then - eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; - fi' - postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ - dlpath=$dir/\$dldll~ - $RM \$dlpath' - shlibpath_overrides_runpath=yes - - case $host_os in - cygwin*) - # Cygwin DLLs use 'cyg' prefix rather than 'lib' - soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' -m4_if([$1], [],[ - sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api"]) - ;; - mingw* | cegcc*) - # MinGW DLLs use traditional 'lib' prefix - soname_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' - ;; - pw32*) - # pw32 DLLs use 'pw' prefix rather than 'lib' - library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' - ;; - esac - dynamic_linker='Win32 ld.exe' - ;; - - *,cl*) - # Native MSVC - libname_spec='$name' - soname_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' - library_names_spec='${libname}.dll.lib' - - case $build_os in - mingw*) - sys_lib_search_path_spec= - lt_save_ifs=$IFS - IFS=';' - for lt_path in $LIB - do - IFS=$lt_save_ifs - # Let DOS variable expansion print the short 8.3 style file name. - lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` - sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" - done - IFS=$lt_save_ifs - # Convert to MSYS style. - sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([[a-zA-Z]]\\):| /\\1|g' -e 's|^ ||'` - ;; - cygwin*) - # Convert to unix form, then to dos form, then back to unix form - # but this time dos style (no spaces!) so that the unix form looks - # like /cygdrive/c/PROGRA~1:/cygdr... - sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` - sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` - sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` - ;; - *) - sys_lib_search_path_spec="$LIB" - if $ECHO "$sys_lib_search_path_spec" | [$GREP ';[c-zC-Z]:/' >/dev/null]; then - # It is most probably a Windows format PATH. - sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` - else - sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` - fi - # FIXME: find the short name or the path components, as spaces are - # common. (e.g. "Program Files" -> "PROGRA~1") - ;; - esac - - # DLL is installed to $(libdir)/../bin by postinstall_cmds - postinstall_cmds='base_file=`basename \${file}`~ - dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ - dldir=$destdir/`dirname \$dlpath`~ - test -d \$dldir || mkdir -p \$dldir~ - $install_prog $dir/$dlname \$dldir/$dlname' - postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ - dlpath=$dir/\$dldll~ - $RM \$dlpath' - shlibpath_overrides_runpath=yes - dynamic_linker='Win32 link.exe' - ;; - - *) - # Assume MSVC wrapper - library_names_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext} $libname.lib' - dynamic_linker='Win32 ld.exe' - ;; - esac - # FIXME: first we should search . and the directory the executable is in - shlibpath_var=PATH - ;; - -darwin* | rhapsody*) - dynamic_linker="$host_os dyld" - version_type=darwin - need_lib_prefix=no - need_version=no - library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext' - soname_spec='${libname}${release}${major}$shared_ext' - shlibpath_overrides_runpath=yes - shlibpath_var=DYLD_LIBRARY_PATH - shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' -m4_if([$1], [],[ - sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib"]) - sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' - ;; - -dgux*) - version_type=linux # correct to gnu/linux during the next big refactor - need_lib_prefix=no - need_version=no - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext' - soname_spec='${libname}${release}${shared_ext}$major' - shlibpath_var=LD_LIBRARY_PATH - ;; - -freebsd* | dragonfly*) - # DragonFly does not have aout. When/if they implement a new - # versioning mechanism, adjust this. - if test -x /usr/bin/objformat; then - objformat=`/usr/bin/objformat` - else - case $host_os in - freebsd[[23]].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi - version_type=freebsd-$objformat - case $version_type in - freebsd-elf*) - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' - need_version=no - need_lib_prefix=no - ;; - freebsd-*) - library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix' - need_version=yes - ;; - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in - freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[[01]]* | freebsdelf3.[[01]]*) - shlibpath_overrides_runpath=yes - hardcode_into_libs=yes - ;; - freebsd3.[[2-9]]* | freebsdelf3.[[2-9]]* | \ - freebsd4.[[0-5]] | freebsdelf4.[[0-5]] | freebsd4.1.1 | freebsdelf4.1.1) - shlibpath_overrides_runpath=no - hardcode_into_libs=yes - ;; - *) # from 4.6 on, and DragonFly - shlibpath_overrides_runpath=yes - hardcode_into_libs=yes - ;; - esac - ;; - -gnu*) - version_type=linux # correct to gnu/linux during the next big refactor - need_lib_prefix=no - need_version=no - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' - soname_spec='${libname}${release}${shared_ext}$major' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=no - hardcode_into_libs=yes - ;; - -haiku*) - version_type=linux # correct to gnu/linux during the next big refactor - need_lib_prefix=no - need_version=no - dynamic_linker="$host_os runtime_loader" - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' - soname_spec='${libname}${release}${shared_ext}$major' - shlibpath_var=LIBRARY_PATH - shlibpath_overrides_runpath=yes - sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' - hardcode_into_libs=yes - ;; - -hpux9* | hpux10* | hpux11*) - # Give a soname corresponding to the major version so that dld.sl refuses to - # link against other versions. - version_type=sunos - need_lib_prefix=no - need_version=no - case $host_cpu in - ia64*) - shrext_cmds='.so' - hardcode_into_libs=yes - dynamic_linker="$host_os dld.so" - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' - soname_spec='${libname}${release}${shared_ext}$major' - if test "X$HPUX_IA64_MODE" = X32; then - sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" - else - sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" - fi - sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec - ;; - hppa*64*) - shrext_cmds='.sl' - hardcode_into_libs=yes - dynamic_linker="$host_os dld.sl" - shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH - shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' - soname_spec='${libname}${release}${shared_ext}$major' - sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" - sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec - ;; - *) - shrext_cmds='.sl' - dynamic_linker="$host_os dld.sl" - shlibpath_var=SHLIB_PATH - shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' - soname_spec='${libname}${release}${shared_ext}$major' - ;; - esac - # HP-UX runs *really* slowly unless shared libraries are mode 555, ... - postinstall_cmds='chmod 555 $lib' - # or fails outright, so override atomically: - install_override_mode=555 - ;; - -interix[[3-9]]*) - version_type=linux # correct to gnu/linux during the next big refactor - need_lib_prefix=no - need_version=no - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' - soname_spec='${libname}${release}${shared_ext}$major' - dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=no - hardcode_into_libs=yes - ;; - -irix5* | irix6* | nonstopux*) - case $host_os in - nonstopux*) version_type=nonstopux ;; - *) - if test "$lt_cv_prog_gnu_ld" = yes; then - version_type=linux # correct to gnu/linux during the next big refactor - else - version_type=irix - fi ;; - esac - need_lib_prefix=no - need_version=no - soname_spec='${libname}${release}${shared_ext}$major' - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}' - case $host_os in - irix5* | nonstopux*) - libsuff= shlibsuff= - ;; - *) - case $LD in # libtool.m4 will add one of these switches to LD - *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") - libsuff= shlibsuff= libmagic=32-bit;; - *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") - libsuff=32 shlibsuff=N32 libmagic=N32;; - *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") - libsuff=64 shlibsuff=64 libmagic=64-bit;; - *) libsuff= shlibsuff= libmagic=never-match;; - esac - ;; - esac - shlibpath_var=LD_LIBRARY${shlibsuff}_PATH - shlibpath_overrides_runpath=no - sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" - sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" - hardcode_into_libs=yes - ;; - -# No shared lib support for Linux oldld, aout, or coff. -linux*oldld* | linux*aout* | linux*coff*) - dynamic_linker=no - ;; - -# This must be glibc/ELF. -linux* | k*bsd*-gnu | kopensolaris*-gnu) - version_type=linux # correct to gnu/linux during the next big refactor - need_lib_prefix=no - need_version=no - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' - soname_spec='${libname}${release}${shared_ext}$major' - finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=no - - # Some binutils ld are patched to set DT_RUNPATH - AC_CACHE_VAL([lt_cv_shlibpath_overrides_runpath], - [lt_cv_shlibpath_overrides_runpath=no - save_LDFLAGS=$LDFLAGS - save_libdir=$libdir - eval "libdir=/foo; wl=\"$_LT_TAGVAR(lt_prog_compiler_wl, $1)\"; \ - LDFLAGS=\"\$LDFLAGS $_LT_TAGVAR(hardcode_libdir_flag_spec, $1)\"" - AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], - [AS_IF([ ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null], - [lt_cv_shlibpath_overrides_runpath=yes])]) - LDFLAGS=$save_LDFLAGS - libdir=$save_libdir - ]) - shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath - - # This implies no fast_install, which is unacceptable. - # Some rework will be needed to allow for fast_install - # before this can be enabled. - hardcode_into_libs=yes - - # Add ABI-specific directories to the system library path. - sys_lib_dlsearch_path_spec="/lib64 /usr/lib64 /lib /usr/lib" - - # Append ld.so.conf contents to the search path - if test -f /etc/ld.so.conf; then - lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \[$]2)); skip = 1; } { if (!skip) print \[$]0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` - sys_lib_dlsearch_path_spec="$sys_lib_dlsearch_path_spec $lt_ld_extra" - - fi - - # We used to test for /lib/ld.so.1 and disable shared libraries on - # powerpc, because MkLinux only supported shared libraries with the - # GNU dynamic linker. Since this was broken with cross compilers, - # most powerpc-linux boxes support dynamic linking these days and - # people can always --disable-shared, the test was removed, and we - # assume the GNU/Linux dynamic linker is in use. - dynamic_linker='GNU/Linux ld.so' - ;; - -netbsd*) - version_type=sunos - need_lib_prefix=no - need_version=no - if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' - finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' - dynamic_linker='NetBSD (a.out) ld.so' - else - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' - soname_spec='${libname}${release}${shared_ext}$major' - dynamic_linker='NetBSD ld.elf_so' - fi - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=yes - hardcode_into_libs=yes - ;; - -newsos6) - version_type=linux # correct to gnu/linux during the next big refactor - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=yes - ;; - -*nto* | *qnx*) - version_type=qnx - need_lib_prefix=no - need_version=no - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' - soname_spec='${libname}${release}${shared_ext}$major' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=no - hardcode_into_libs=yes - dynamic_linker='ldqnx.so' - ;; - -openbsd*) - version_type=sunos - sys_lib_dlsearch_path_spec="/usr/lib" - need_lib_prefix=no - # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs. - case $host_os in - openbsd3.3 | openbsd3.3.*) need_version=yes ;; - *) need_version=no ;; - esac - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' - finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' - shlibpath_var=LD_LIBRARY_PATH - if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then - case $host_os in - openbsd2.[[89]] | openbsd2.[[89]].*) - shlibpath_overrides_runpath=no - ;; - *) - shlibpath_overrides_runpath=yes - ;; - esac - else - shlibpath_overrides_runpath=yes - fi - ;; - -os2*) - libname_spec='$name' - shrext_cmds=".dll" - need_lib_prefix=no - library_names_spec='$libname${shared_ext} $libname.a' - dynamic_linker='OS/2 ld.exe' - shlibpath_var=LIBPATH - ;; - -osf3* | osf4* | osf5*) - version_type=osf - need_lib_prefix=no - need_version=no - soname_spec='${libname}${release}${shared_ext}$major' - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' - shlibpath_var=LD_LIBRARY_PATH - sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" - sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" - ;; - -rdos*) - dynamic_linker=no - ;; - -solaris*) - version_type=linux # correct to gnu/linux during the next big refactor - need_lib_prefix=no - need_version=no - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' - soname_spec='${libname}${release}${shared_ext}$major' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=yes - hardcode_into_libs=yes - # ldd complains unless libraries are executable - postinstall_cmds='chmod +x $lib' - ;; - -sunos4*) - version_type=sunos - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' - finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=yes - if test "$with_gnu_ld" = yes; then - need_lib_prefix=no - fi - need_version=yes - ;; - -sysv4 | sysv4.3*) - version_type=linux # correct to gnu/linux during the next big refactor - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' - soname_spec='${libname}${release}${shared_ext}$major' - shlibpath_var=LD_LIBRARY_PATH - case $host_vendor in - sni) - shlibpath_overrides_runpath=no - need_lib_prefix=no - runpath_var=LD_RUN_PATH - ;; - siemens) - need_lib_prefix=no - ;; - motorola) - need_lib_prefix=no - need_version=no - shlibpath_overrides_runpath=no - sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' - ;; - esac - ;; - -sysv4*MP*) - if test -d /usr/nec ;then - version_type=linux # correct to gnu/linux during the next big refactor - library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}' - soname_spec='$libname${shared_ext}.$major' - shlibpath_var=LD_LIBRARY_PATH - fi - ;; - -sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) - version_type=freebsd-elf - need_lib_prefix=no - need_version=no - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' - soname_spec='${libname}${release}${shared_ext}$major' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=yes - hardcode_into_libs=yes - if test "$with_gnu_ld" = yes; then - sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' - else - sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' - case $host_os in - sco3.2v5*) - sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" - ;; - esac - fi - sys_lib_dlsearch_path_spec='/usr/lib' - ;; - -tpf*) - # TPF is a cross-target only. Preferred cross-host = GNU/Linux. - version_type=linux # correct to gnu/linux during the next big refactor - need_lib_prefix=no - need_version=no - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=no - hardcode_into_libs=yes - ;; - -uts4*) - version_type=linux # correct to gnu/linux during the next big refactor - library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' - soname_spec='${libname}${release}${shared_ext}$major' - shlibpath_var=LD_LIBRARY_PATH - ;; - -*) - dynamic_linker=no - ;; -esac -AC_MSG_RESULT([$dynamic_linker]) -test "$dynamic_linker" = no && can_build_shared=no - -variables_saved_for_relink="PATH $shlibpath_var $runpath_var" -if test "$GCC" = yes; then - variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" -fi - -if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then - sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec" -fi -if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then - sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec" -fi - -_LT_DECL([], [variables_saved_for_relink], [1], - [Variables whose values should be saved in libtool wrapper scripts and - restored at link time]) -_LT_DECL([], [need_lib_prefix], [0], - [Do we need the "lib" prefix for modules?]) -_LT_DECL([], [need_version], [0], [Do we need a version for libraries?]) -_LT_DECL([], [version_type], [0], [Library versioning type]) -_LT_DECL([], [runpath_var], [0], [Shared library runtime path variable]) -_LT_DECL([], [shlibpath_var], [0],[Shared library path variable]) -_LT_DECL([], [shlibpath_overrides_runpath], [0], - [Is shlibpath searched before the hard-coded library search path?]) -_LT_DECL([], [libname_spec], [1], [Format of library name prefix]) -_LT_DECL([], [library_names_spec], [1], - [[List of archive names. First name is the real one, the rest are links. - The last name is the one that the linker finds with -lNAME]]) -_LT_DECL([], [soname_spec], [1], - [[The coded name of the library, if different from the real name]]) -_LT_DECL([], [install_override_mode], [1], - [Permission mode override for installation of shared libraries]) -_LT_DECL([], [postinstall_cmds], [2], - [Command to use after installation of a shared archive]) -_LT_DECL([], [postuninstall_cmds], [2], - [Command to use after uninstallation of a shared archive]) -_LT_DECL([], [finish_cmds], [2], - [Commands used to finish a libtool library installation in a directory]) -_LT_DECL([], [finish_eval], [1], - [[As "finish_cmds", except a single script fragment to be evaled but - not shown]]) -_LT_DECL([], [hardcode_into_libs], [0], - [Whether we should hardcode library paths into libraries]) -_LT_DECL([], [sys_lib_search_path_spec], [2], - [Compile-time system search path for libraries]) -_LT_DECL([], [sys_lib_dlsearch_path_spec], [2], - [Run-time system search path for libraries]) -])# _LT_SYS_DYNAMIC_LINKER - - -# _LT_PATH_TOOL_PREFIX(TOOL) -# -------------------------- -# find a file program which can recognize shared library -AC_DEFUN([_LT_PATH_TOOL_PREFIX], -[m4_require([_LT_DECL_EGREP])dnl -AC_MSG_CHECKING([for $1]) -AC_CACHE_VAL(lt_cv_path_MAGIC_CMD, -[case $MAGIC_CMD in -[[\\/*] | ?:[\\/]*]) - lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path. - ;; -*) - lt_save_MAGIC_CMD="$MAGIC_CMD" - lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR -dnl $ac_dummy forces splitting on constant user-supplied paths. -dnl POSIX.2 word splitting is done only on the output of word expansions, -dnl not every word. This closes a longstanding sh security hole. - ac_dummy="m4_if([$2], , $PATH, [$2])" - for ac_dir in $ac_dummy; do - IFS="$lt_save_ifs" - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$1; then - lt_cv_path_MAGIC_CMD="$ac_dir/$1" - if test -n "$file_magic_test_file"; then - case $deplibs_check_method in - "file_magic "*) - file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` - MAGIC_CMD="$lt_cv_path_MAGIC_CMD" - if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | - $EGREP "$file_magic_regex" > /dev/null; then - : - else - cat <<_LT_EOF 1>&2 - -*** Warning: the command libtool uses to detect shared libraries, -*** $file_magic_cmd, produces output that libtool cannot recognize. -*** The result is that libtool may fail to recognize shared libraries -*** as such. This will affect the creation of libtool libraries that -*** depend on shared libraries, but programs linked with such libtool -*** libraries will work regardless of this problem. Nevertheless, you -*** may want to report the problem to your system manager and/or to -*** bug-libtool@gnu.org - -_LT_EOF - fi ;; - esac - fi - break - fi - done - IFS="$lt_save_ifs" - MAGIC_CMD="$lt_save_MAGIC_CMD" - ;; -esac]) -MAGIC_CMD="$lt_cv_path_MAGIC_CMD" -if test -n "$MAGIC_CMD"; then - AC_MSG_RESULT($MAGIC_CMD) -else - AC_MSG_RESULT(no) -fi -_LT_DECL([], [MAGIC_CMD], [0], - [Used to examine libraries when file_magic_cmd begins with "file"])dnl -])# _LT_PATH_TOOL_PREFIX - -# Old name: -AU_ALIAS([AC_PATH_TOOL_PREFIX], [_LT_PATH_TOOL_PREFIX]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_PATH_TOOL_PREFIX], []) - - -# _LT_PATH_MAGIC -# -------------- -# find a file program which can recognize a shared library -m4_defun([_LT_PATH_MAGIC], -[_LT_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin$PATH_SEPARATOR$PATH) -if test -z "$lt_cv_path_MAGIC_CMD"; then - if test -n "$ac_tool_prefix"; then - _LT_PATH_TOOL_PREFIX(file, /usr/bin$PATH_SEPARATOR$PATH) - else - MAGIC_CMD=: - fi -fi -])# _LT_PATH_MAGIC - - -# LT_PATH_LD -# ---------- -# find the pathname to the GNU or non-GNU linker -AC_DEFUN([LT_PATH_LD], -[AC_REQUIRE([AC_PROG_CC])dnl -AC_REQUIRE([AC_CANONICAL_HOST])dnl -AC_REQUIRE([AC_CANONICAL_BUILD])dnl -m4_require([_LT_DECL_SED])dnl -m4_require([_LT_DECL_EGREP])dnl -m4_require([_LT_PROG_ECHO_BACKSLASH])dnl - -AC_ARG_WITH([gnu-ld], - [AS_HELP_STRING([--with-gnu-ld], - [assume the C compiler uses GNU ld @<:@default=no@:>@])], - [test "$withval" = no || with_gnu_ld=yes], - [with_gnu_ld=no])dnl - -ac_prog=ld -if test "$GCC" = yes; then - # Check if gcc -print-prog-name=ld gives a path. - AC_MSG_CHECKING([for ld used by $CC]) - case $host in - *-*-mingw*) - # gcc leaves a trailing carriage return which upsets mingw - ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; - *) - ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; - esac - case $ac_prog in - # Accept absolute paths. - [[\\/]]* | ?:[[\\/]]*) - re_direlt='/[[^/]][[^/]]*/\.\./' - # Canonicalize the pathname of ld - ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` - while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do - ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` - done - test -z "$LD" && LD="$ac_prog" - ;; - "") - # If it fails, then pretend we aren't using GCC. - ac_prog=ld - ;; - *) - # If it is relative, then search for the first ld in PATH. - with_gnu_ld=unknown - ;; - esac -elif test "$with_gnu_ld" = yes; then - AC_MSG_CHECKING([for GNU ld]) -else - AC_MSG_CHECKING([for non-GNU ld]) -fi -AC_CACHE_VAL(lt_cv_path_LD, -[if test -z "$LD"; then - lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR - for ac_dir in $PATH; do - IFS="$lt_save_ifs" - test -z "$ac_dir" && ac_dir=. - if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then - lt_cv_path_LD="$ac_dir/$ac_prog" - # Check to see if the program is GNU ld. I'd rather use --version, - # but apparently some variants of GNU ld only accept -v. - # Break only if it was the GNU/non-GNU ld that we prefer. - case `"$lt_cv_path_LD" -v 2>&1 &1 /dev/null 2>&1; then - lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' - lt_cv_file_magic_cmd='func_win32_libid' - else - # Keep this pattern in sync with the one in func_win32_libid. - lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' - lt_cv_file_magic_cmd='$OBJDUMP -f' - fi - ;; - -cegcc*) - # use the weaker test based on 'objdump'. See mingw*. - lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?' - lt_cv_file_magic_cmd='$OBJDUMP -f' - ;; - -darwin* | rhapsody*) - lt_cv_deplibs_check_method=pass_all - ;; - -freebsd* | dragonfly*) - if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then - case $host_cpu in - i*86 ) - # Not sure whether the presence of OpenBSD here was a mistake. - # Let's accept both of them until this is cleared up. - lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[[3-9]]86 (compact )?demand paged shared library' - lt_cv_file_magic_cmd=/usr/bin/file - lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` - ;; - esac - else - lt_cv_deplibs_check_method=pass_all - fi - ;; - -gnu*) - lt_cv_deplibs_check_method=pass_all - ;; - -haiku*) - lt_cv_deplibs_check_method=pass_all - ;; - -hpux10.20* | hpux11*) - lt_cv_file_magic_cmd=/usr/bin/file - case $host_cpu in - ia64*) - lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|ELF-[[0-9]][[0-9]]) shared object file - IA64' - lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so - ;; - hppa*64*) - [lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]'] - lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl - ;; - *) - lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|PA-RISC[[0-9]]\.[[0-9]]) shared library' - lt_cv_file_magic_test_file=/usr/lib/libc.sl - ;; - esac - ;; - -interix[[3-9]]*) - # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here - lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|\.a)$' - ;; - -irix5* | irix6* | nonstopux*) - case $LD in - *-32|*"-32 ") libmagic=32-bit;; - *-n32|*"-n32 ") libmagic=N32;; - *-64|*"-64 ") libmagic=64-bit;; - *) libmagic=never-match;; - esac - lt_cv_deplibs_check_method=pass_all - ;; - -# This must be glibc/ELF. -linux* | k*bsd*-gnu | kopensolaris*-gnu) - lt_cv_deplibs_check_method=pass_all - ;; - -netbsd*) - if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then - lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' - else - lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|_pic\.a)$' - fi - ;; - -newos6*) - lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)' - lt_cv_file_magic_cmd=/usr/bin/file - lt_cv_file_magic_test_file=/usr/lib/libnls.so - ;; - -*nto* | *qnx*) - lt_cv_deplibs_check_method=pass_all - ;; - -openbsd*) - if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then - lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|\.so|_pic\.a)$' - else - lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' - fi - ;; - -osf3* | osf4* | osf5*) - lt_cv_deplibs_check_method=pass_all - ;; - -rdos*) - lt_cv_deplibs_check_method=pass_all - ;; - -solaris*) - lt_cv_deplibs_check_method=pass_all - ;; - -sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) - lt_cv_deplibs_check_method=pass_all - ;; - -sysv4 | sysv4.3*) - case $host_vendor in - motorola) - lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib) M[[0-9]][[0-9]]* Version [[0-9]]' - lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` - ;; - ncr) - lt_cv_deplibs_check_method=pass_all - ;; - sequent) - lt_cv_file_magic_cmd='/bin/file' - lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )' - ;; - sni) - lt_cv_file_magic_cmd='/bin/file' - lt_cv_deplibs_check_method="file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB dynamic lib" - lt_cv_file_magic_test_file=/lib/libc.so - ;; - siemens) - lt_cv_deplibs_check_method=pass_all - ;; - pc) - lt_cv_deplibs_check_method=pass_all - ;; - esac - ;; - -tpf*) - lt_cv_deplibs_check_method=pass_all - ;; -esac -]) - -file_magic_glob= -want_nocaseglob=no -if test "$build" = "$host"; then - case $host_os in - mingw* | pw32*) - if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then - want_nocaseglob=yes - else - file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[[\1]]\/[[\1]]\/g;/g"` - fi - ;; - esac -fi - -file_magic_cmd=$lt_cv_file_magic_cmd -deplibs_check_method=$lt_cv_deplibs_check_method -test -z "$deplibs_check_method" && deplibs_check_method=unknown - -_LT_DECL([], [deplibs_check_method], [1], - [Method to check whether dependent libraries are shared objects]) -_LT_DECL([], [file_magic_cmd], [1], - [Command to use when deplibs_check_method = "file_magic"]) -_LT_DECL([], [file_magic_glob], [1], - [How to find potential files when deplibs_check_method = "file_magic"]) -_LT_DECL([], [want_nocaseglob], [1], - [Find potential files using nocaseglob when deplibs_check_method = "file_magic"]) -])# _LT_CHECK_MAGIC_METHOD - - -# LT_PATH_NM -# ---------- -# find the pathname to a BSD- or MS-compatible name lister -AC_DEFUN([LT_PATH_NM], -[AC_REQUIRE([AC_PROG_CC])dnl -AC_CACHE_CHECK([for BSD- or MS-compatible name lister (nm)], lt_cv_path_NM, -[if test -n "$NM"; then - # Let the user override the test. - lt_cv_path_NM="$NM" -else - lt_nm_to_check="${ac_tool_prefix}nm" - if test -n "$ac_tool_prefix" && test "$build" = "$host"; then - lt_nm_to_check="$lt_nm_to_check nm" - fi - for lt_tmp_nm in $lt_nm_to_check; do - lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR - for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do - IFS="$lt_save_ifs" - test -z "$ac_dir" && ac_dir=. - tmp_nm="$ac_dir/$lt_tmp_nm" - if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then - # Check to see if the nm accepts a BSD-compat flag. - # Adding the `sed 1q' prevents false positives on HP-UX, which says: - # nm: unknown option "B" ignored - # Tru64's nm complains that /dev/null is an invalid object file - case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in - */dev/null* | *'Invalid file or object type'*) - lt_cv_path_NM="$tmp_nm -B" - break - ;; - *) - case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in - */dev/null*) - lt_cv_path_NM="$tmp_nm -p" - break - ;; - *) - lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but - continue # so that we can try to find one that supports BSD flags - ;; - esac - ;; - esac - fi - done - IFS="$lt_save_ifs" - done - : ${lt_cv_path_NM=no} -fi]) -if test "$lt_cv_path_NM" != "no"; then - NM="$lt_cv_path_NM" -else - # Didn't find any BSD compatible name lister, look for dumpbin. - if test -n "$DUMPBIN"; then : - # Let the user override the test. - else - AC_CHECK_TOOLS(DUMPBIN, [dumpbin "link -dump"], :) - case `$DUMPBIN -symbols /dev/null 2>&1 | sed '1q'` in - *COFF*) - DUMPBIN="$DUMPBIN -symbols" - ;; - *) - DUMPBIN=: - ;; - esac - fi - AC_SUBST([DUMPBIN]) - if test "$DUMPBIN" != ":"; then - NM="$DUMPBIN" - fi -fi -test -z "$NM" && NM=nm -AC_SUBST([NM]) -_LT_DECL([], [NM], [1], [A BSD- or MS-compatible name lister])dnl - -AC_CACHE_CHECK([the name lister ($NM) interface], [lt_cv_nm_interface], - [lt_cv_nm_interface="BSD nm" - echo "int some_variable = 0;" > conftest.$ac_ext - (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&AS_MESSAGE_LOG_FD) - (eval "$ac_compile" 2>conftest.err) - cat conftest.err >&AS_MESSAGE_LOG_FD - (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&AS_MESSAGE_LOG_FD) - (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) - cat conftest.err >&AS_MESSAGE_LOG_FD - (eval echo "\"\$as_me:$LINENO: output\"" >&AS_MESSAGE_LOG_FD) - cat conftest.out >&AS_MESSAGE_LOG_FD - if $GREP 'External.*some_variable' conftest.out > /dev/null; then - lt_cv_nm_interface="MS dumpbin" - fi - rm -f conftest*]) -])# LT_PATH_NM - -# Old names: -AU_ALIAS([AM_PROG_NM], [LT_PATH_NM]) -AU_ALIAS([AC_PROG_NM], [LT_PATH_NM]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AM_PROG_NM], []) -dnl AC_DEFUN([AC_PROG_NM], []) - -# _LT_CHECK_SHAREDLIB_FROM_LINKLIB -# -------------------------------- -# how to determine the name of the shared library -# associated with a specific link library. -# -- PORTME fill in with the dynamic library characteristics -m4_defun([_LT_CHECK_SHAREDLIB_FROM_LINKLIB], -[m4_require([_LT_DECL_EGREP]) -m4_require([_LT_DECL_OBJDUMP]) -m4_require([_LT_DECL_DLLTOOL]) -AC_CACHE_CHECK([how to associate runtime and link libraries], -lt_cv_sharedlib_from_linklib_cmd, -[lt_cv_sharedlib_from_linklib_cmd='unknown' - -case $host_os in -cygwin* | mingw* | pw32* | cegcc*) - # two different shell functions defined in ltmain.sh - # decide which to use based on capabilities of $DLLTOOL - case `$DLLTOOL --help 2>&1` in - *--identify-strict*) - lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib - ;; - *) - lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback - ;; - esac - ;; -*) - # fallback: assume linklib IS sharedlib - lt_cv_sharedlib_from_linklib_cmd="$ECHO" - ;; -esac -]) -sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd -test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO - -_LT_DECL([], [sharedlib_from_linklib_cmd], [1], - [Command to associate shared and link libraries]) -])# _LT_CHECK_SHAREDLIB_FROM_LINKLIB - - -# _LT_PATH_MANIFEST_TOOL -# ---------------------- -# locate the manifest tool -m4_defun([_LT_PATH_MANIFEST_TOOL], -[AC_CHECK_TOOL(MANIFEST_TOOL, mt, :) -test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt -AC_CACHE_CHECK([if $MANIFEST_TOOL is a manifest tool], [lt_cv_path_mainfest_tool], - [lt_cv_path_mainfest_tool=no - echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&AS_MESSAGE_LOG_FD - $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out - cat conftest.err >&AS_MESSAGE_LOG_FD - if $GREP 'Manifest Tool' conftest.out > /dev/null; then - lt_cv_path_mainfest_tool=yes - fi - rm -f conftest*]) -if test "x$lt_cv_path_mainfest_tool" != xyes; then - MANIFEST_TOOL=: -fi -_LT_DECL([], [MANIFEST_TOOL], [1], [Manifest tool])dnl -])# _LT_PATH_MANIFEST_TOOL - - -# LT_LIB_M -# -------- -# check for math library -AC_DEFUN([LT_LIB_M], -[AC_REQUIRE([AC_CANONICAL_HOST])dnl -LIBM= -case $host in -*-*-beos* | *-*-cegcc* | *-*-cygwin* | *-*-haiku* | *-*-pw32* | *-*-darwin*) - # These system don't have libm, or don't need it - ;; -*-ncr-sysv4.3*) - AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM="-lmw") - AC_CHECK_LIB(m, cos, LIBM="$LIBM -lm") - ;; -*) - AC_CHECK_LIB(m, cos, LIBM="-lm") - ;; -esac -AC_SUBST([LIBM]) -])# LT_LIB_M - -# Old name: -AU_ALIAS([AC_CHECK_LIBM], [LT_LIB_M]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_CHECK_LIBM], []) - - -# _LT_COMPILER_NO_RTTI([TAGNAME]) -# ------------------------------- -m4_defun([_LT_COMPILER_NO_RTTI], -[m4_require([_LT_TAG_COMPILER])dnl - -_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= - -if test "$GCC" = yes; then - case $cc_basename in - nvcc*) - _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -Xcompiler -fno-builtin' ;; - *) - _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' ;; - esac - - _LT_COMPILER_OPTION([if $compiler supports -fno-rtti -fno-exceptions], - lt_cv_prog_compiler_rtti_exceptions, - [-fno-rtti -fno-exceptions], [], - [_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)="$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1) -fno-rtti -fno-exceptions"]) -fi -_LT_TAGDECL([no_builtin_flag], [lt_prog_compiler_no_builtin_flag], [1], - [Compiler flag to turn off builtin functions]) -])# _LT_COMPILER_NO_RTTI - - -# _LT_CMD_GLOBAL_SYMBOLS -# ---------------------- -m4_defun([_LT_CMD_GLOBAL_SYMBOLS], -[AC_REQUIRE([AC_CANONICAL_HOST])dnl -AC_REQUIRE([AC_PROG_CC])dnl -AC_REQUIRE([AC_PROG_AWK])dnl -AC_REQUIRE([LT_PATH_NM])dnl -AC_REQUIRE([LT_PATH_LD])dnl -m4_require([_LT_DECL_SED])dnl -m4_require([_LT_DECL_EGREP])dnl -m4_require([_LT_TAG_COMPILER])dnl - -# Check for command to grab the raw symbol name followed by C symbol from nm. -AC_MSG_CHECKING([command to parse $NM output from $compiler object]) -AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe], -[ -# These are sane defaults that work on at least a few old systems. -# [They come from Ultrix. What could be older than Ultrix?!! ;)] - -# Character class describing NM global symbol codes. -symcode='[[BCDEGRST]]' - -# Regexp to match symbols that can be accessed directly from C. -sympat='\([[_A-Za-z]][[_A-Za-z0-9]]*\)' - -# Define system-specific variables. -case $host_os in -aix*) - symcode='[[BCDT]]' - ;; -cygwin* | mingw* | pw32* | cegcc*) - symcode='[[ABCDGISTW]]' - ;; -hpux*) - if test "$host_cpu" = ia64; then - symcode='[[ABCDEGRST]]' - fi - ;; -irix* | nonstopux*) - symcode='[[BCDEGRST]]' - ;; -osf*) - symcode='[[BCDEGQRST]]' - ;; -solaris*) - symcode='[[BDRT]]' - ;; -sco3.2v5*) - symcode='[[DT]]' - ;; -sysv4.2uw2*) - symcode='[[DT]]' - ;; -sysv5* | sco5v6* | unixware* | OpenUNIX*) - symcode='[[ABDT]]' - ;; -sysv4) - symcode='[[DFNSTU]]' - ;; -esac - -# If we're using GNU nm, then use its standard symbol codes. -case `$NM -V 2>&1` in -*GNU* | *'with BFD'*) - symcode='[[ABCDGIRSTW]]' ;; -esac - -# Transform an extracted symbol line into a proper C declaration. -# Some systems (esp. on ia64) link data and code symbols differently, -# so use this general approach. -lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'" - -# Transform an extracted symbol line into symbol name and symbol address -lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\)[[ ]]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/ {\"\2\", (void *) \&\2},/p'" -lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n -e 's/^: \([[^ ]]*\)[[ ]]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \(lib[[^ ]]*\)$/ {\"\2\", (void *) \&\2},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/ {\"lib\2\", (void *) \&\2},/p'" - -# Handle CRLF in mingw tool chain -opt_cr= -case $build_os in -mingw*) - opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp - ;; -esac - -# Try without a prefix underscore, then with it. -for ac_symprfx in "" "_"; do - - # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. - symxfrm="\\1 $ac_symprfx\\2 \\2" - - # Write the raw and C identifiers. - if test "$lt_cv_nm_interface" = "MS dumpbin"; then - # Fake it for dumpbin and say T for any non-static function - # and D for any global variable. - # Also find C++ and __fastcall symbols from MSVC++, - # which start with @ or ?. - lt_cv_sys_global_symbol_pipe="$AWK ['"\ -" {last_section=section; section=\$ 3};"\ -" /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\ -" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\ -" \$ 0!~/External *\|/{next};"\ -" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\ -" {if(hide[section]) next};"\ -" {f=0}; \$ 0~/\(\).*\|/{f=1}; {printf f ? \"T \" : \"D \"};"\ -" {split(\$ 0, a, /\||\r/); split(a[2], s)};"\ -" s[1]~/^[@?]/{print s[1], s[1]; next};"\ -" s[1]~prfx {split(s[1],t,\"@\"); print t[1], substr(t[1],length(prfx))}"\ -" ' prfx=^$ac_symprfx]" - else - lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[[ ]]\($symcode$symcode*\)[[ ]][[ ]]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" - fi - lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'" - - # Check to see that the pipe works correctly. - pipe_works=no - - rm -f conftest* - cat > conftest.$ac_ext <<_LT_EOF -#ifdef __cplusplus -extern "C" { -#endif -char nm_test_var; -void nm_test_func(void); -void nm_test_func(void){} -#ifdef __cplusplus -} -#endif -int main(){nm_test_var='a';nm_test_func();return(0);} -_LT_EOF - - if AC_TRY_EVAL(ac_compile); then - # Now try to grab the symbols. - nlist=conftest.nm - if AC_TRY_EVAL(NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) && test -s "$nlist"; then - # Try sorting and uniquifying the output. - if sort "$nlist" | uniq > "$nlist"T; then - mv -f "$nlist"T "$nlist" - else - rm -f "$nlist"T - fi - - # Make sure that we snagged all the symbols we need. - if $GREP ' nm_test_var$' "$nlist" >/dev/null; then - if $GREP ' nm_test_func$' "$nlist" >/dev/null; then - cat <<_LT_EOF > conftest.$ac_ext -/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ -#if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE) -/* DATA imports from DLLs on WIN32 con't be const, because runtime - relocations are performed -- see ld's documentation on pseudo-relocs. */ -# define LT@&t@_DLSYM_CONST -#elif defined(__osf__) -/* This system does not cope well with relocations in const data. */ -# define LT@&t@_DLSYM_CONST -#else -# define LT@&t@_DLSYM_CONST const -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -_LT_EOF - # Now generate the symbol file. - eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext' - - cat <<_LT_EOF >> conftest.$ac_ext - -/* The mapping between symbol names and symbols. */ -LT@&t@_DLSYM_CONST struct { - const char *name; - void *address; -} -lt__PROGRAM__LTX_preloaded_symbols[[]] = -{ - { "@PROGRAM@", (void *) 0 }, -_LT_EOF - $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/ {\"\2\", (void *) \&\2},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext - cat <<\_LT_EOF >> conftest.$ac_ext - {0, (void *) 0} -}; - -/* This works around a problem in FreeBSD linker */ -#ifdef FREEBSD_WORKAROUND -static const void *lt_preloaded_setup() { - return lt__PROGRAM__LTX_preloaded_symbols; -} -#endif - -#ifdef __cplusplus -} -#endif -_LT_EOF - # Now try linking the two files. - mv conftest.$ac_objext conftstm.$ac_objext - lt_globsym_save_LIBS=$LIBS - lt_globsym_save_CFLAGS=$CFLAGS - LIBS="conftstm.$ac_objext" - CFLAGS="$CFLAGS$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)" - if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext}; then - pipe_works=yes - fi - LIBS=$lt_globsym_save_LIBS - CFLAGS=$lt_globsym_save_CFLAGS - else - echo "cannot find nm_test_func in $nlist" >&AS_MESSAGE_LOG_FD - fi - else - echo "cannot find nm_test_var in $nlist" >&AS_MESSAGE_LOG_FD - fi - else - echo "cannot run $lt_cv_sys_global_symbol_pipe" >&AS_MESSAGE_LOG_FD - fi - else - echo "$progname: failed program was:" >&AS_MESSAGE_LOG_FD - cat conftest.$ac_ext >&5 - fi - rm -rf conftest* conftst* - - # Do not use the global_symbol_pipe unless it works. - if test "$pipe_works" = yes; then - break - else - lt_cv_sys_global_symbol_pipe= - fi -done -]) -if test -z "$lt_cv_sys_global_symbol_pipe"; then - lt_cv_sys_global_symbol_to_cdecl= -fi -if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then - AC_MSG_RESULT(failed) -else - AC_MSG_RESULT(ok) -fi - -# Response file support. -if test "$lt_cv_nm_interface" = "MS dumpbin"; then - nm_file_list_spec='@' -elif $NM --help 2>/dev/null | grep '[[@]]FILE' >/dev/null; then - nm_file_list_spec='@' -fi - -_LT_DECL([global_symbol_pipe], [lt_cv_sys_global_symbol_pipe], [1], - [Take the output of nm and produce a listing of raw symbols and C names]) -_LT_DECL([global_symbol_to_cdecl], [lt_cv_sys_global_symbol_to_cdecl], [1], - [Transform the output of nm in a proper C declaration]) -_LT_DECL([global_symbol_to_c_name_address], - [lt_cv_sys_global_symbol_to_c_name_address], [1], - [Transform the output of nm in a C name address pair]) -_LT_DECL([global_symbol_to_c_name_address_lib_prefix], - [lt_cv_sys_global_symbol_to_c_name_address_lib_prefix], [1], - [Transform the output of nm in a C name address pair when lib prefix is needed]) -_LT_DECL([], [nm_file_list_spec], [1], - [Specify filename containing input files for $NM]) -]) # _LT_CMD_GLOBAL_SYMBOLS - - -# _LT_COMPILER_PIC([TAGNAME]) -# --------------------------- -m4_defun([_LT_COMPILER_PIC], -[m4_require([_LT_TAG_COMPILER])dnl -_LT_TAGVAR(lt_prog_compiler_wl, $1)= -_LT_TAGVAR(lt_prog_compiler_pic, $1)= -_LT_TAGVAR(lt_prog_compiler_static, $1)= - -m4_if([$1], [CXX], [ - # C++ specific cases for pic, static, wl, etc. - if test "$GXX" = yes; then - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' - - case $host_os in - aix*) - # All AIX code is PIC. - if test "$host_cpu" = ia64; then - # AIX 5 now supports IA64 processor - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - fi - ;; - - amigaos*) - case $host_cpu in - powerpc) - # see comment about AmigaOS4 .so support - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - ;; - m68k) - # FIXME: we need at least 68020 code to build shared libraries, but - # adding the `-m68020' flag to GCC prevents building anything better, - # like `-m68040'. - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' - ;; - esac - ;; - - beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) - # PIC is the default for these OSes. - ;; - mingw* | cygwin* | os2* | pw32* | cegcc*) - # This hack is so that the source file can tell whether it is being - # built for inclusion in a dll (and should export symbols for example). - # Although the cygwin gcc ignores -fPIC, still need this for old-style - # (--disable-auto-import) libraries - m4_if([$1], [GCJ], [], - [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) - ;; - darwin* | rhapsody*) - # PIC is the default on this platform - # Common symbols not allowed in MH_DYLIB files - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' - ;; - *djgpp*) - # DJGPP does not support shared libraries at all - _LT_TAGVAR(lt_prog_compiler_pic, $1)= - ;; - haiku*) - # PIC is the default for Haiku. - # The "-static" flag exists, but is broken. - _LT_TAGVAR(lt_prog_compiler_static, $1)= - ;; - interix[[3-9]]*) - # Interix 3.x gcc -fpic/-fPIC options generate broken code. - # Instead, we relocate shared libraries at runtime. - ;; - sysv4*MP*) - if test -d /usr/nec; then - _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic - fi - ;; - hpux*) - # PIC is the default for 64-bit PA HP-UX, but not for 32-bit - # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag - # sets the default TLS model and affects inlining. - case $host_cpu in - hppa*64*) - ;; - *) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - ;; - esac - ;; - *qnx* | *nto*) - # QNX uses GNU C++, but need to define -shared option too, otherwise - # it will coredump. - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' - ;; - *) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - ;; - esac - else - case $host_os in - aix[[4-9]]*) - # All AIX code is PIC. - if test "$host_cpu" = ia64; then - # AIX 5 now supports IA64 processor - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - else - _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' - fi - ;; - chorus*) - case $cc_basename in - cxch68*) - # Green Hills C++ Compiler - # _LT_TAGVAR(lt_prog_compiler_static, $1)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a" - ;; - esac - ;; - mingw* | cygwin* | os2* | pw32* | cegcc*) - # This hack is so that the source file can tell whether it is being - # built for inclusion in a dll (and should export symbols for example). - m4_if([$1], [GCJ], [], - [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) - ;; - dgux*) - case $cc_basename in - ec++*) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - ;; - ghcx*) - # Green Hills C++ Compiler - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' - ;; - *) - ;; - esac - ;; - freebsd* | dragonfly*) - # FreeBSD uses GNU C++ - ;; - hpux9* | hpux10* | hpux11*) - case $cc_basename in - CC*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive' - if test "$host_cpu" != ia64; then - _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' - fi - ;; - aCC*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive' - case $host_cpu in - hppa*64*|ia64*) - # +Z the default - ;; - *) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' - ;; - esac - ;; - *) - ;; - esac - ;; - interix*) - # This is c89, which is MS Visual C++ (no shared libs) - # Anyone wants to do a port? - ;; - irix5* | irix6* | nonstopux*) - case $cc_basename in - CC*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' - # CC pic flag -KPIC is the default. - ;; - *) - ;; - esac - ;; - linux* | k*bsd*-gnu | kopensolaris*-gnu) - case $cc_basename in - KCC*) - # KAI C++ Compiler - _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - ;; - ecpc* ) - # old Intel C++ for x86_64 which still supported -KPIC. - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' - ;; - icpc* ) - # Intel C++, used to be incompatible with GCC. - # ICC 10 doesn't accept -KPIC any more. - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' - ;; - pgCC* | pgcpp*) - # Portland Group C++ compiler - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - ;; - cxx*) - # Compaq C++ - # Make sure the PIC flag is empty. It appears that all Alpha - # Linux and Compaq Tru64 Unix objects are PIC. - _LT_TAGVAR(lt_prog_compiler_pic, $1)= - _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' - ;; - xlc* | xlC* | bgxl[[cC]]* | mpixl[[cC]]*) - # IBM XL 8.0, 9.0 on PPC and BlueGene - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' - ;; - *) - case `$CC -V 2>&1 | sed 5q` in - *Sun\ C*) - # Sun C++ 5.9 - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' - ;; - esac - ;; - esac - ;; - lynxos*) - ;; - m88k*) - ;; - mvs*) - case $cc_basename in - cxx*) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-W c,exportall' - ;; - *) - ;; - esac - ;; - netbsd*) - ;; - *qnx* | *nto*) - # QNX uses GNU C++, but need to define -shared option too, otherwise - # it will coredump. - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' - ;; - osf3* | osf4* | osf5*) - case $cc_basename in - KCC*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' - ;; - RCC*) - # Rational C++ 2.4.1 - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' - ;; - cxx*) - # Digital/Compaq C++ - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - # Make sure the PIC flag is empty. It appears that all Alpha - # Linux and Compaq Tru64 Unix objects are PIC. - _LT_TAGVAR(lt_prog_compiler_pic, $1)= - _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' - ;; - *) - ;; - esac - ;; - psos*) - ;; - solaris*) - case $cc_basename in - CC* | sunCC*) - # Sun C++ 4.2, 5.x and Centerline C++ - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' - ;; - gcx*) - # Green Hills C++ Compiler - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' - ;; - *) - ;; - esac - ;; - sunos4*) - case $cc_basename in - CC*) - # Sun C++ 4.x - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - ;; - lcc*) - # Lucid - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' - ;; - *) - ;; - esac - ;; - sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) - case $cc_basename in - CC*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - ;; - esac - ;; - tandem*) - case $cc_basename in - NCC*) - # NonStop-UX NCC 3.20 - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - ;; - *) - ;; - esac - ;; - vxworks*) - ;; - *) - _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no - ;; - esac - fi -], -[ - if test "$GCC" = yes; then - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' - - case $host_os in - aix*) - # All AIX code is PIC. - if test "$host_cpu" = ia64; then - # AIX 5 now supports IA64 processor - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - fi - ;; - - amigaos*) - case $host_cpu in - powerpc) - # see comment about AmigaOS4 .so support - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - ;; - m68k) - # FIXME: we need at least 68020 code to build shared libraries, but - # adding the `-m68020' flag to GCC prevents building anything better, - # like `-m68040'. - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' - ;; - esac - ;; - - beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) - # PIC is the default for these OSes. - ;; - - mingw* | cygwin* | pw32* | os2* | cegcc*) - # This hack is so that the source file can tell whether it is being - # built for inclusion in a dll (and should export symbols for example). - # Although the cygwin gcc ignores -fPIC, still need this for old-style - # (--disable-auto-import) libraries - m4_if([$1], [GCJ], [], - [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) - ;; - - darwin* | rhapsody*) - # PIC is the default on this platform - # Common symbols not allowed in MH_DYLIB files - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' - ;; - - haiku*) - # PIC is the default for Haiku. - # The "-static" flag exists, but is broken. - _LT_TAGVAR(lt_prog_compiler_static, $1)= - ;; - - hpux*) - # PIC is the default for 64-bit PA HP-UX, but not for 32-bit - # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag - # sets the default TLS model and affects inlining. - case $host_cpu in - hppa*64*) - # +Z the default - ;; - *) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - ;; - esac - ;; - - interix[[3-9]]*) - # Interix 3.x gcc -fpic/-fPIC options generate broken code. - # Instead, we relocate shared libraries at runtime. - ;; - - msdosdjgpp*) - # Just because we use GCC doesn't mean we suddenly get shared libraries - # on systems that don't support them. - _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no - enable_shared=no - ;; - - *nto* | *qnx*) - # QNX uses GNU C++, but need to define -shared option too, otherwise - # it will coredump. - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' - ;; - - sysv4*MP*) - if test -d /usr/nec; then - _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic - fi - ;; - - *) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - ;; - esac - - case $cc_basename in - nvcc*) # Cuda Compiler Driver 2.2 - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Xlinker ' - if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then - _LT_TAGVAR(lt_prog_compiler_pic, $1)="-Xcompiler $_LT_TAGVAR(lt_prog_compiler_pic, $1)" - fi - ;; - esac - else - # PORTME Check for flag to pass linker flags through the system compiler. - case $host_os in - aix*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - if test "$host_cpu" = ia64; then - # AIX 5 now supports IA64 processor - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - else - _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' - fi - ;; - - mingw* | cygwin* | pw32* | os2* | cegcc*) - # This hack is so that the source file can tell whether it is being - # built for inclusion in a dll (and should export symbols for example). - m4_if([$1], [GCJ], [], - [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) - ;; - - hpux9* | hpux10* | hpux11*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but - # not for PA HP-UX. - case $host_cpu in - hppa*64*|ia64*) - # +Z the default - ;; - *) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' - ;; - esac - # Is there a better lt_prog_compiler_static that works with the bundled CC? - _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive' - ;; - - irix5* | irix6* | nonstopux*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - # PIC (with -KPIC) is the default. - _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' - ;; - - linux* | k*bsd*-gnu | kopensolaris*-gnu) - case $cc_basename in - # old Intel for x86_64 which still supported -KPIC. - ecc*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' - ;; - # icc used to be incompatible with GCC. - # ICC 10 doesn't accept -KPIC any more. - icc* | ifort*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' - ;; - # Lahey Fortran 8.1. - lf95*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='--shared' - _LT_TAGVAR(lt_prog_compiler_static, $1)='--static' - ;; - nagfor*) - # NAG Fortran compiler - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,-Wl,,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - ;; - pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) - # Portland Group compilers (*not* the Pentium gcc compiler, - # which looks to be a dead project) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - ;; - ccc*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - # All Alpha code is PIC. - _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' - ;; - xl* | bgxl* | bgf* | mpixl*) - # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' - ;; - *) - case `$CC -V 2>&1 | sed 5q` in - *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [[1-7]].* | *Sun*Fortran*\ 8.[[0-3]]*) - # Sun Fortran 8.3 passes all unrecognized flags to the linker - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - _LT_TAGVAR(lt_prog_compiler_wl, $1)='' - ;; - *Sun\ F* | *Sun*Fortran*) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' - ;; - *Sun\ C*) - # Sun C 5.9 - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - ;; - *Intel*\ [[CF]]*Compiler*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' - ;; - *Portland\ Group*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - ;; - esac - ;; - esac - ;; - - newsos6) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - ;; - - *nto* | *qnx*) - # QNX uses GNU C++, but need to define -shared option too, otherwise - # it will coredump. - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' - ;; - - osf3* | osf4* | osf5*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - # All OSF/1 code is PIC. - _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' - ;; - - rdos*) - _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' - ;; - - solaris*) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - case $cc_basename in - f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ';; - *) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,';; - esac - ;; - - sunos4*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - ;; - - sysv4 | sysv4.2uw2* | sysv4.3*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - ;; - - sysv4*MP*) - if test -d /usr/nec ;then - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-Kconform_pic' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - fi - ;; - - sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - ;; - - unicos*) - _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' - _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no - ;; - - uts4*) - _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' - _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' - ;; - - *) - _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no - ;; - esac - fi -]) -case $host_os in - # For platforms which do not support PIC, -DPIC is meaningless: - *djgpp*) - _LT_TAGVAR(lt_prog_compiler_pic, $1)= - ;; - *) - _LT_TAGVAR(lt_prog_compiler_pic, $1)="$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])" - ;; -esac - -AC_CACHE_CHECK([for $compiler option to produce PIC], - [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)], - [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_prog_compiler_pic, $1)]) -_LT_TAGVAR(lt_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_cv_prog_compiler_pic, $1) - -# -# Check to make sure the PIC flag actually works. -# -if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then - _LT_COMPILER_OPTION([if $compiler PIC flag $_LT_TAGVAR(lt_prog_compiler_pic, $1) works], - [_LT_TAGVAR(lt_cv_prog_compiler_pic_works, $1)], - [$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])], [], - [case $_LT_TAGVAR(lt_prog_compiler_pic, $1) in - "" | " "*) ;; - *) _LT_TAGVAR(lt_prog_compiler_pic, $1)=" $_LT_TAGVAR(lt_prog_compiler_pic, $1)" ;; - esac], - [_LT_TAGVAR(lt_prog_compiler_pic, $1)= - _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no]) -fi -_LT_TAGDECL([pic_flag], [lt_prog_compiler_pic], [1], - [Additional compiler flags for building library objects]) - -_LT_TAGDECL([wl], [lt_prog_compiler_wl], [1], - [How to pass a linker flag through the compiler]) -# -# Check to make sure the static flag actually works. -# -wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) eval lt_tmp_static_flag=\"$_LT_TAGVAR(lt_prog_compiler_static, $1)\" -_LT_LINKER_OPTION([if $compiler static flag $lt_tmp_static_flag works], - _LT_TAGVAR(lt_cv_prog_compiler_static_works, $1), - $lt_tmp_static_flag, - [], - [_LT_TAGVAR(lt_prog_compiler_static, $1)=]) -_LT_TAGDECL([link_static_flag], [lt_prog_compiler_static], [1], - [Compiler flag to prevent dynamic linking]) -])# _LT_COMPILER_PIC - - -# _LT_LINKER_SHLIBS([TAGNAME]) -# ---------------------------- -# See if the linker supports building shared libraries. -m4_defun([_LT_LINKER_SHLIBS], -[AC_REQUIRE([LT_PATH_LD])dnl -AC_REQUIRE([LT_PATH_NM])dnl -m4_require([_LT_PATH_MANIFEST_TOOL])dnl -m4_require([_LT_FILEUTILS_DEFAULTS])dnl -m4_require([_LT_DECL_EGREP])dnl -m4_require([_LT_DECL_SED])dnl -m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl -m4_require([_LT_TAG_COMPILER])dnl -AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) -m4_if([$1], [CXX], [ - _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' - _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] - case $host_os in - aix[[4-9]]*) - # If we're using GNU nm, then we don't want the "-C" option. - # -C means demangle to AIX nm, but means don't demangle with GNU nm - # Also, AIX nm treats weak defined symbols like other global defined - # symbols, whereas GNU nm marks them as "W". - if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then - _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' - else - _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' - fi - ;; - pw32*) - _LT_TAGVAR(export_symbols_cmds, $1)="$ltdll_cmds" - ;; - cygwin* | mingw* | cegcc*) - case $cc_basename in - cl*) - _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' - ;; - *) - _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' - _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] - ;; - esac - ;; - *) - _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' - ;; - esac -], [ - runpath_var= - _LT_TAGVAR(allow_undefined_flag, $1)= - _LT_TAGVAR(always_export_symbols, $1)=no - _LT_TAGVAR(archive_cmds, $1)= - _LT_TAGVAR(archive_expsym_cmds, $1)= - _LT_TAGVAR(compiler_needs_object, $1)=no - _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no - _LT_TAGVAR(export_dynamic_flag_spec, $1)= - _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' - _LT_TAGVAR(hardcode_automatic, $1)=no - _LT_TAGVAR(hardcode_direct, $1)=no - _LT_TAGVAR(hardcode_direct_absolute, $1)=no - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= - _LT_TAGVAR(hardcode_libdir_separator, $1)= - _LT_TAGVAR(hardcode_minus_L, $1)=no - _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported - _LT_TAGVAR(inherit_rpath, $1)=no - _LT_TAGVAR(link_all_deplibs, $1)=unknown - _LT_TAGVAR(module_cmds, $1)= - _LT_TAGVAR(module_expsym_cmds, $1)= - _LT_TAGVAR(old_archive_from_new_cmds, $1)= - _LT_TAGVAR(old_archive_from_expsyms_cmds, $1)= - _LT_TAGVAR(thread_safe_flag_spec, $1)= - _LT_TAGVAR(whole_archive_flag_spec, $1)= - # include_expsyms should be a list of space-separated symbols to be *always* - # included in the symbol list - _LT_TAGVAR(include_expsyms, $1)= - # exclude_expsyms can be an extended regexp of symbols to exclude - # it will be wrapped by ` (' and `)$', so one must not match beginning or - # end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc', - # as well as any symbol that contains `d'. - _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] - # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out - # platforms (ab)use it in PIC code, but their linkers get confused if - # the symbol is explicitly referenced. Since portable code cannot - # rely on this symbol name, it's probably fine to never include it in - # preloaded symbol tables. - # Exclude shared library initialization/finalization symbols. -dnl Note also adjust exclude_expsyms for C++ above. - extract_expsyms_cmds= - - case $host_os in - cygwin* | mingw* | pw32* | cegcc*) - # FIXME: the MSVC++ port hasn't been tested in a loooong time - # When not using gcc, we currently assume that we are using - # Microsoft Visual C++. - if test "$GCC" != yes; then - with_gnu_ld=no - fi - ;; - interix*) - # we just hope/assume this is gcc and not c89 (= MSVC++) - with_gnu_ld=yes - ;; - openbsd*) - with_gnu_ld=no - ;; - esac - - _LT_TAGVAR(ld_shlibs, $1)=yes - - # On some targets, GNU ld is compatible enough with the native linker - # that we're better off using the native interface for both. - lt_use_gnu_ld_interface=no - if test "$with_gnu_ld" = yes; then - case $host_os in - aix*) - # The AIX port of GNU ld has always aspired to compatibility - # with the native linker. However, as the warning in the GNU ld - # block says, versions before 2.19.5* couldn't really create working - # shared libraries, regardless of the interface used. - case `$LD -v 2>&1` in - *\ \(GNU\ Binutils\)\ 2.19.5*) ;; - *\ \(GNU\ Binutils\)\ 2.[[2-9]]*) ;; - *\ \(GNU\ Binutils\)\ [[3-9]]*) ;; - *) - lt_use_gnu_ld_interface=yes - ;; - esac - ;; - *) - lt_use_gnu_ld_interface=yes - ;; - esac - fi - - if test "$lt_use_gnu_ld_interface" = yes; then - # If archive_cmds runs LD, not CC, wlarc should be empty - wlarc='${wl}' - - # Set some defaults for GNU ld with shared library support. These - # are reset later if shared libraries are not supported. Putting them - # here allows them to be overridden if necessary. - runpath_var=LD_RUN_PATH - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' - # ancient GNU ld didn't support --whole-archive et. al. - if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then - _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' - else - _LT_TAGVAR(whole_archive_flag_spec, $1)= - fi - supports_anon_versioning=no - case `$LD -v 2>&1` in - *GNU\ gold*) supports_anon_versioning=yes ;; - *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.10.*) ;; # catch versions < 2.11 - *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... - *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... - *\ 2.11.*) ;; # other 2.11 versions - *) supports_anon_versioning=yes ;; - esac - - # See if GNU ld supports shared libraries. - case $host_os in - aix[[3-9]]*) - # On AIX/PPC, the GNU linker is very broken - if test "$host_cpu" != ia64; then - _LT_TAGVAR(ld_shlibs, $1)=no - cat <<_LT_EOF 1>&2 - -*** Warning: the GNU linker, at least up to release 2.19, is reported -*** to be unable to reliably create shared libraries on AIX. -*** Therefore, libtool is disabling shared libraries support. If you -*** really care for shared libraries, you may want to install binutils -*** 2.20 or above, or modify your PATH so that a non-GNU linker is found. -*** You will then need to restart the configuration process. - -_LT_EOF - fi - ;; - - amigaos*) - case $host_cpu in - powerpc) - # see comment about AmigaOS4 .so support - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='' - ;; - m68k) - _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' - _LT_TAGVAR(hardcode_minus_L, $1)=yes - ;; - esac - ;; - - beos*) - if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then - _LT_TAGVAR(allow_undefined_flag, $1)=unsupported - # Joseph Beckenbach says some releases of gcc - # support --undefined. This deserves some investigation. FIXME - _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' - else - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - - cygwin* | mingw* | pw32* | cegcc*) - # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, - # as there is no search path for DLLs. - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-all-symbols' - _LT_TAGVAR(allow_undefined_flag, $1)=unsupported - _LT_TAGVAR(always_export_symbols, $1)=no - _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes - _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' - _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] - - if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' - # If the export-symbols file already is a .def file (1st line - # is EXPORTS), use it as is; otherwise, prepend... - _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then - cp $export_symbols $output_objdir/$soname.def; - else - echo EXPORTS > $output_objdir/$soname.def; - cat $export_symbols >> $output_objdir/$soname.def; - fi~ - $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' - else - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - - haiku*) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' - _LT_TAGVAR(link_all_deplibs, $1)=yes - ;; - - interix[[3-9]]*) - _LT_TAGVAR(hardcode_direct, $1)=no - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' - # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. - # Instead, shared libraries are loaded at an image base (0x10000000 by - # default) and relocated if they conflict, which is a slow very memory - # consuming and fragmenting process. To avoid this, we pick a random, - # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link - # time. Moving up from 0x10000000 also allows more sbrk(2) space. - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' - ;; - - gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) - tmp_diet=no - if test "$host_os" = linux-dietlibc; then - case $cc_basename in - diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) - esac - fi - if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ - && test "$tmp_diet" = no - then - tmp_addflag=' $pic_flag' - tmp_sharedflag='-shared' - case $cc_basename,$host_cpu in - pgcc*) # Portland Group C compiler - _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' - tmp_addflag=' $pic_flag' - ;; - pgf77* | pgf90* | pgf95* | pgfortran*) - # Portland Group f77 and f90 compilers - _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' - tmp_addflag=' $pic_flag -Mnomain' ;; - ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 - tmp_addflag=' -i_dynamic' ;; - efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 - tmp_addflag=' -i_dynamic -nofor_main' ;; - ifc* | ifort*) # Intel Fortran compiler - tmp_addflag=' -nofor_main' ;; - lf95*) # Lahey Fortran 8.1 - _LT_TAGVAR(whole_archive_flag_spec, $1)= - tmp_sharedflag='--shared' ;; - xl[[cC]]* | bgxl[[cC]]* | mpixl[[cC]]*) # IBM XL C 8.0 on PPC (deal with xlf below) - tmp_sharedflag='-qmkshrobj' - tmp_addflag= ;; - nvcc*) # Cuda Compiler Driver 2.2 - _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' - _LT_TAGVAR(compiler_needs_object, $1)=yes - ;; - esac - case `$CC -V 2>&1 | sed 5q` in - *Sun\ C*) # Sun C 5.9 - _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' - _LT_TAGVAR(compiler_needs_object, $1)=yes - tmp_sharedflag='-G' ;; - *Sun\ F*) # Sun Fortran 8.3 - tmp_sharedflag='-G' ;; - esac - _LT_TAGVAR(archive_cmds, $1)='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' - - if test "x$supports_anon_versioning" = xyes; then - _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ - cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ - echo "local: *; };" >> $output_objdir/$libname.ver~ - $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib' - fi - - case $cc_basename in - xlf* | bgf* | bgxlf* | mpixlf*) - # IBM XL Fortran 10.1 on PPC cannot create shared libs itself - _LT_TAGVAR(whole_archive_flag_spec, $1)='--whole-archive$convenience --no-whole-archive' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' - _LT_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' - if test "x$supports_anon_versioning" = xyes; then - _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ - cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ - echo "local: *; };" >> $output_objdir/$libname.ver~ - $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' - fi - ;; - esac - else - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - - netbsd*) - if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then - _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' - wlarc= - else - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' - fi - ;; - - solaris*) - if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then - _LT_TAGVAR(ld_shlibs, $1)=no - cat <<_LT_EOF 1>&2 - -*** Warning: The releases 2.8.* of the GNU linker cannot reliably -*** create shared libraries on Solaris systems. Therefore, libtool -*** is disabling shared libraries support. We urge you to upgrade GNU -*** binutils to release 2.9.1 or newer. Another option is to modify -*** your PATH or compiler configuration so that the native linker is -*** used, and then restart. - -_LT_EOF - elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' - else - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - - sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) - case `$LD -v 2>&1` in - *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.1[[0-5]].*) - _LT_TAGVAR(ld_shlibs, $1)=no - cat <<_LT_EOF 1>&2 - -*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not -*** reliably create shared libraries on SCO systems. Therefore, libtool -*** is disabling shared libraries support. We urge you to upgrade GNU -*** binutils to release 2.16.91.0.3 or newer. Another option is to modify -*** your PATH or compiler configuration so that the native linker is -*** used, and then restart. - -_LT_EOF - ;; - *) - # For security reasons, it is highly recommended that you always - # use absolute paths for naming shared libraries, and exclude the - # DT_RUNPATH tag from executables and libraries. But doing so - # requires that you compile everything twice, which is a pain. - if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' - else - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - esac - ;; - - sunos4*) - _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' - wlarc= - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - - *) - if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' - else - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - esac - - if test "$_LT_TAGVAR(ld_shlibs, $1)" = no; then - runpath_var= - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= - _LT_TAGVAR(export_dynamic_flag_spec, $1)= - _LT_TAGVAR(whole_archive_flag_spec, $1)= - fi - else - # PORTME fill in a description of your system's linker (not GNU ld) - case $host_os in - aix3*) - _LT_TAGVAR(allow_undefined_flag, $1)=unsupported - _LT_TAGVAR(always_export_symbols, $1)=yes - _LT_TAGVAR(archive_expsym_cmds, $1)='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' - # Note: this linker hardcodes the directories in LIBPATH if there - # are no directories specified by -L. - _LT_TAGVAR(hardcode_minus_L, $1)=yes - if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then - # Neither direct hardcoding nor static linking is supported with a - # broken collect2. - _LT_TAGVAR(hardcode_direct, $1)=unsupported - fi - ;; - - aix[[4-9]]*) - if test "$host_cpu" = ia64; then - # On IA64, the linker does run time linking by default, so we don't - # have to do anything special. - aix_use_runtimelinking=no - exp_sym_flag='-Bexport' - no_entry_flag="" - else - # If we're using GNU nm, then we don't want the "-C" option. - # -C means demangle to AIX nm, but means don't demangle with GNU nm - # Also, AIX nm treats weak defined symbols like other global - # defined symbols, whereas GNU nm marks them as "W". - if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then - _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' - else - _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' - fi - aix_use_runtimelinking=no - - # Test if we are trying to use run time linking or normal - # AIX style linking. If -brtl is somewhere in LDFLAGS, we - # need to do runtime linking. - case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) - for ld_flag in $LDFLAGS; do - if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then - aix_use_runtimelinking=yes - break - fi - done - ;; - esac - - exp_sym_flag='-bexport' - no_entry_flag='-bnoentry' - fi - - # When large executables or shared objects are built, AIX ld can - # have problems creating the table of contents. If linking a library - # or program results in "error TOC overflow" add -mminimal-toc to - # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not - # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. - - _LT_TAGVAR(archive_cmds, $1)='' - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_direct_absolute, $1)=yes - _LT_TAGVAR(hardcode_libdir_separator, $1)=':' - _LT_TAGVAR(link_all_deplibs, $1)=yes - _LT_TAGVAR(file_list_spec, $1)='${wl}-f,' - - if test "$GCC" = yes; then - case $host_os in aix4.[[012]]|aix4.[[012]].*) - # We only want to do this on AIX 4.2 and lower, the check - # below for broken collect2 doesn't work under 4.3+ - collect2name=`${CC} -print-prog-name=collect2` - if test -f "$collect2name" && - strings "$collect2name" | $GREP resolve_lib_name >/dev/null - then - # We have reworked collect2 - : - else - # We have old collect2 - _LT_TAGVAR(hardcode_direct, $1)=unsupported - # It fails to find uninstalled libraries when the uninstalled - # path is not listed in the libpath. Setting hardcode_minus_L - # to unsupported forces relinking - _LT_TAGVAR(hardcode_minus_L, $1)=yes - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)= - fi - ;; - esac - shared_flag='-shared' - if test "$aix_use_runtimelinking" = yes; then - shared_flag="$shared_flag "'${wl}-G' - fi - else - # not using gcc - if test "$host_cpu" = ia64; then - # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release - # chokes on -Wl,-G. The following line is correct: - shared_flag='-G' - else - if test "$aix_use_runtimelinking" = yes; then - shared_flag='${wl}-G' - else - shared_flag='${wl}-bM:SRE' - fi - fi - fi - - _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-bexpall' - # It seems that -bexpall does not export symbols beginning with - # underscore (_), so it is better to generate a list of symbols to export. - _LT_TAGVAR(always_export_symbols, $1)=yes - if test "$aix_use_runtimelinking" = yes; then - # Warning - without using the other runtime loading flags (-brtl), - # -berok will link without error, but may produce a broken library. - _LT_TAGVAR(allow_undefined_flag, $1)='-berok' - # Determine the default libpath from the value encoded in an - # empty executable. - _LT_SYS_MODULE_PATH_AIX([$1]) - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" - else - if test "$host_cpu" = ia64; then - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib' - _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" - _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" - else - # Determine the default libpath from the value encoded in an - # empty executable. - _LT_SYS_MODULE_PATH_AIX([$1]) - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" - # Warning - without using the other run time loading flags, - # -berok will link without error, but may produce a broken library. - _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok' - _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok' - if test "$with_gnu_ld" = yes; then - # We only use this code for GNU lds that support --whole-archive. - _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive' - else - # Exported symbols can be pulled into shared objects from archives - _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' - fi - _LT_TAGVAR(archive_cmds_need_lc, $1)=yes - # This is similar to how AIX traditionally builds its shared libraries. - _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' - fi - fi - ;; - - amigaos*) - case $host_cpu in - powerpc) - # see comment about AmigaOS4 .so support - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='' - ;; - m68k) - _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' - _LT_TAGVAR(hardcode_minus_L, $1)=yes - ;; - esac - ;; - - bsdi[[45]]*) - _LT_TAGVAR(export_dynamic_flag_spec, $1)=-rdynamic - ;; - - cygwin* | mingw* | pw32* | cegcc*) - # When not using gcc, we currently assume that we are using - # Microsoft Visual C++. - # hardcode_libdir_flag_spec is actually meaningless, as there is - # no search path for DLLs. - case $cc_basename in - cl*) - # Native MSVC - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' - _LT_TAGVAR(allow_undefined_flag, $1)=unsupported - _LT_TAGVAR(always_export_symbols, $1)=yes - _LT_TAGVAR(file_list_spec, $1)='@' - # Tell ltmain to make .lib files, not .a files. - libext=lib - # Tell ltmain to make .dll files, not .so files. - shrext_cmds=".dll" - # FIXME: Setting linknames here is a bad hack. - _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames=' - _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then - sed -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp; - else - sed -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp; - fi~ - $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ - linknames=' - # The linker will not automatically build a static lib if we build a DLL. - # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' - _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes - _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' - _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1,DATA/'\'' | $SED -e '\''/^[[AITW]][[ ]]/s/.*[[ ]]//'\'' | sort | uniq > $export_symbols' - # Don't use ranlib - _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' - _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ - lt_tool_outputfile="@TOOL_OUTPUT@"~ - case $lt_outputfile in - *.exe|*.EXE) ;; - *) - lt_outputfile="$lt_outputfile.exe" - lt_tool_outputfile="$lt_tool_outputfile.exe" - ;; - esac~ - if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then - $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; - $RM "$lt_outputfile.manifest"; - fi' - ;; - *) - # Assume MSVC wrapper - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' - _LT_TAGVAR(allow_undefined_flag, $1)=unsupported - # Tell ltmain to make .lib files, not .a files. - libext=lib - # Tell ltmain to make .dll files, not .so files. - shrext_cmds=".dll" - # FIXME: Setting linknames here is a bad hack. - _LT_TAGVAR(archive_cmds, $1)='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' - # The linker will automatically build a .lib file if we build a DLL. - _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' - # FIXME: Should let the user specify the lib program. - _LT_TAGVAR(old_archive_cmds, $1)='lib -OUT:$oldlib$oldobjs$old_deplibs' - _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes - ;; - esac - ;; - - darwin* | rhapsody*) - _LT_DARWIN_LINKER_FEATURES($1) - ;; - - dgux*) - _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - - # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor - # support. Future versions do this automatically, but an explicit c++rt0.o - # does not break anything, and helps significantly (at the cost of a little - # extra space). - freebsd2.2*) - _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. - freebsd2.*) - _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_minus_L, $1)=yes - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - - # FreeBSD 3 and greater uses gcc -shared to do shared libraries. - freebsd* | dragonfly*) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - - hpux9*) - if test "$GCC" = yes; then - _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' - else - _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' - fi - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - _LT_TAGVAR(hardcode_direct, $1)=yes - - # hardcode_minus_L: Not really in the search PATH, - # but as the default location of the library. - _LT_TAGVAR(hardcode_minus_L, $1)=yes - _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' - ;; - - hpux10*) - if test "$GCC" = yes && test "$with_gnu_ld" = no; then - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' - else - _LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' - fi - if test "$with_gnu_ld" = no; then - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_direct_absolute, $1)=yes - _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' - # hardcode_minus_L: Not really in the search PATH, - # but as the default location of the library. - _LT_TAGVAR(hardcode_minus_L, $1)=yes - fi - ;; - - hpux11*) - if test "$GCC" = yes && test "$with_gnu_ld" = no; then - case $host_cpu in - hppa*64*) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' - ;; - ia64*) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' - ;; - *) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' - ;; - esac - else - case $host_cpu in - hppa*64*) - _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' - ;; - ia64*) - _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' - ;; - *) - m4_if($1, [], [ - # Older versions of the 11.00 compiler do not understand -b yet - # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does) - _LT_LINKER_OPTION([if $CC understands -b], - _LT_TAGVAR(lt_cv_prog_compiler__b, $1), [-b], - [_LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'], - [_LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'])], - [_LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags']) - ;; - esac - fi - if test "$with_gnu_ld" = no; then - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - - case $host_cpu in - hppa*64*|ia64*) - _LT_TAGVAR(hardcode_direct, $1)=no - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - *) - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_direct_absolute, $1)=yes - _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' - - # hardcode_minus_L: Not really in the search PATH, - # but as the default location of the library. - _LT_TAGVAR(hardcode_minus_L, $1)=yes - ;; - esac - fi - ;; - - irix5* | irix6* | nonstopux*) - if test "$GCC" = yes; then - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' - # Try to use the -exported_symbol ld option, if it does not - # work, assume that -exports_file does not work either and - # implicitly export all symbols. - # This should be the same for all languages, so no per-tag cache variable. - AC_CACHE_CHECK([whether the $host_os linker accepts -exported_symbol], - [lt_cv_irix_exported_symbol], - [save_LDFLAGS="$LDFLAGS" - LDFLAGS="$LDFLAGS -shared ${wl}-exported_symbol ${wl}foo ${wl}-update_registry ${wl}/dev/null" - AC_LINK_IFELSE( - [AC_LANG_SOURCE( - [AC_LANG_CASE([C], [[int foo (void) { return 0; }]], - [C++], [[int foo (void) { return 0; }]], - [Fortran 77], [[ - subroutine foo - end]], - [Fortran], [[ - subroutine foo - end]])])], - [lt_cv_irix_exported_symbol=yes], - [lt_cv_irix_exported_symbol=no]) - LDFLAGS="$save_LDFLAGS"]) - if test "$lt_cv_irix_exported_symbol" = yes; then - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations ${wl}-exports_file ${wl}$export_symbols -o $lib' - fi - else - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -exports_file $export_symbols -o $lib' - fi - _LT_TAGVAR(archive_cmds_need_lc, $1)='no' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - _LT_TAGVAR(inherit_rpath, $1)=yes - _LT_TAGVAR(link_all_deplibs, $1)=yes - ;; - - netbsd*) - if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then - _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out - else - _LT_TAGVAR(archive_cmds, $1)='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF - fi - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - - newsos6) - _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - - *nto* | *qnx*) - ;; - - openbsd*) - if test -f /usr/libexec/ld.so; then - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - _LT_TAGVAR(hardcode_direct_absolute, $1)=yes - if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' - else - case $host_os in - openbsd[[01]].* | openbsd2.[[0-7]] | openbsd2.[[0-7]].*) - _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' - ;; - *) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' - ;; - esac - fi - else - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - - os2*) - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' - _LT_TAGVAR(hardcode_minus_L, $1)=yes - _LT_TAGVAR(allow_undefined_flag, $1)=unsupported - _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~echo DATA >> $output_objdir/$libname.def~echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def' - _LT_TAGVAR(old_archive_from_new_cmds, $1)='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def' - ;; - - osf3*) - if test "$GCC" = yes; then - _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' - _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' - else - _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' - _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' - fi - _LT_TAGVAR(archive_cmds_need_lc, $1)='no' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - ;; - - osf4* | osf5*) # as osf3* with the addition of -msym flag - if test "$GCC" = yes; then - _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' - _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $pic_flag $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' - else - _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' - _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ - $CC -shared${allow_undefined_flag} ${wl}-input ${wl}$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~$RM $lib.exp' - - # Both c and cxx compiler support -rpath directly - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' - fi - _LT_TAGVAR(archive_cmds_need_lc, $1)='no' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - ;; - - solaris*) - _LT_TAGVAR(no_undefined_flag, $1)=' -z defs' - if test "$GCC" = yes; then - wlarc='${wl}' - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ - $CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' - else - case `$CC -V 2>&1` in - *"Compilers 5.0"*) - wlarc='' - _LT_TAGVAR(archive_cmds, $1)='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ - $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' - ;; - *) - wlarc='${wl}' - _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ - $CC -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' - ;; - esac - fi - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - case $host_os in - solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; - *) - # The compiler driver will combine and reorder linker options, - # but understands `-z linker_flag'. GCC discards it without `$wl', - # but is careful enough not to reorder. - # Supported since Solaris 2.6 (maybe 2.5.1?) - if test "$GCC" = yes; then - _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' - else - _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' - fi - ;; - esac - _LT_TAGVAR(link_all_deplibs, $1)=yes - ;; - - sunos4*) - if test "x$host_vendor" = xsequent; then - # Use $CC to link under sequent, because it throws in some extra .o - # files that make .init and .fini sections work. - _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags' - else - _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' - fi - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_minus_L, $1)=yes - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - - sysv4) - case $host_vendor in - sni) - _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' - _LT_TAGVAR(hardcode_direct, $1)=yes # is this really true??? - ;; - siemens) - ## LD is ld it makes a PLAMLIB - ## CC just makes a GrossModule. - _LT_TAGVAR(archive_cmds, $1)='$LD -G -o $lib $libobjs $deplibs $linker_flags' - _LT_TAGVAR(reload_cmds, $1)='$CC -r -o $output$reload_objs' - _LT_TAGVAR(hardcode_direct, $1)=no - ;; - motorola) - _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' - _LT_TAGVAR(hardcode_direct, $1)=no #Motorola manual says yes, but my tests say they lie - ;; - esac - runpath_var='LD_RUN_PATH' - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - - sysv4.3*) - _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - _LT_TAGVAR(export_dynamic_flag_spec, $1)='-Bexport' - ;; - - sysv4*MP*) - if test -d /usr/nec; then - _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - runpath_var=LD_RUN_PATH - hardcode_runpath_var=yes - _LT_TAGVAR(ld_shlibs, $1)=yes - fi - ;; - - sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) - _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' - _LT_TAGVAR(archive_cmds_need_lc, $1)=no - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - runpath_var='LD_RUN_PATH' - - if test "$GCC" = yes; then - _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - else - _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - fi - ;; - - sysv5* | sco3.2v5* | sco5v6*) - # Note: We can NOT use -z defs as we might desire, because we do not - # link with -lc, and that would cause any symbols used from libc to - # always be unresolved, which means just about no library would - # ever link correctly. If we're not using GNU ld we use -z text - # though, which does catch some bad symbols but isn't as heavy-handed - # as -z defs. - _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' - _LT_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs' - _LT_TAGVAR(archive_cmds_need_lc, $1)=no - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R,$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=':' - _LT_TAGVAR(link_all_deplibs, $1)=yes - _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport' - runpath_var='LD_RUN_PATH' - - if test "$GCC" = yes; then - _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - else - _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - fi - ;; - - uts4*) - _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - - *) - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - esac - - if test x$host_vendor = xsni; then - case $host in - sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) - _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Blargedynsym' - ;; - esac - fi - fi -]) -AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) -test "$_LT_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no - -_LT_TAGVAR(with_gnu_ld, $1)=$with_gnu_ld - -_LT_DECL([], [libext], [0], [Old archive suffix (normally "a")])dnl -_LT_DECL([], [shrext_cmds], [1], [Shared library suffix (normally ".so")])dnl -_LT_DECL([], [extract_expsyms_cmds], [2], - [The commands to extract the exported symbol list from a shared archive]) - -# -# Do we need to explicitly link libc? -# -case "x$_LT_TAGVAR(archive_cmds_need_lc, $1)" in -x|xyes) - # Assume -lc should be added - _LT_TAGVAR(archive_cmds_need_lc, $1)=yes - - if test "$enable_shared" = yes && test "$GCC" = yes; then - case $_LT_TAGVAR(archive_cmds, $1) in - *'~'*) - # FIXME: we may have to deal with multi-command sequences. - ;; - '$CC '*) - # Test whether the compiler implicitly links with -lc since on some - # systems, -lgcc has to come before -lc. If gcc already passes -lc - # to ld, don't add -lc before -lgcc. - AC_CACHE_CHECK([whether -lc should be explicitly linked in], - [lt_cv_]_LT_TAGVAR(archive_cmds_need_lc, $1), - [$RM conftest* - echo "$lt_simple_compile_test_code" > conftest.$ac_ext - - if AC_TRY_EVAL(ac_compile) 2>conftest.err; then - soname=conftest - lib=conftest - libobjs=conftest.$ac_objext - deplibs= - wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) - pic_flag=$_LT_TAGVAR(lt_prog_compiler_pic, $1) - compiler_flags=-v - linker_flags=-v - verstring= - output_objdir=. - libname=conftest - lt_save_allow_undefined_flag=$_LT_TAGVAR(allow_undefined_flag, $1) - _LT_TAGVAR(allow_undefined_flag, $1)= - if AC_TRY_EVAL(_LT_TAGVAR(archive_cmds, $1) 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) - then - lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=no - else - lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=yes - fi - _LT_TAGVAR(allow_undefined_flag, $1)=$lt_save_allow_undefined_flag - else - cat conftest.err 1>&5 - fi - $RM conftest* - ]) - _LT_TAGVAR(archive_cmds_need_lc, $1)=$lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1) - ;; - esac - fi - ;; -esac - -_LT_TAGDECL([build_libtool_need_lc], [archive_cmds_need_lc], [0], - [Whether or not to add -lc for building shared libraries]) -_LT_TAGDECL([allow_libtool_libs_with_static_runtimes], - [enable_shared_with_static_runtimes], [0], - [Whether or not to disallow shared libs when runtime libs are static]) -_LT_TAGDECL([], [export_dynamic_flag_spec], [1], - [Compiler flag to allow reflexive dlopens]) -_LT_TAGDECL([], [whole_archive_flag_spec], [1], - [Compiler flag to generate shared objects directly from archives]) -_LT_TAGDECL([], [compiler_needs_object], [1], - [Whether the compiler copes with passing no objects directly]) -_LT_TAGDECL([], [old_archive_from_new_cmds], [2], - [Create an old-style archive from a shared archive]) -_LT_TAGDECL([], [old_archive_from_expsyms_cmds], [2], - [Create a temporary old-style archive to link instead of a shared archive]) -_LT_TAGDECL([], [archive_cmds], [2], [Commands used to build a shared archive]) -_LT_TAGDECL([], [archive_expsym_cmds], [2]) -_LT_TAGDECL([], [module_cmds], [2], - [Commands used to build a loadable module if different from building - a shared archive.]) -_LT_TAGDECL([], [module_expsym_cmds], [2]) -_LT_TAGDECL([], [with_gnu_ld], [1], - [Whether we are building with GNU ld or not]) -_LT_TAGDECL([], [allow_undefined_flag], [1], - [Flag that allows shared libraries with undefined symbols to be built]) -_LT_TAGDECL([], [no_undefined_flag], [1], - [Flag that enforces no undefined symbols]) -_LT_TAGDECL([], [hardcode_libdir_flag_spec], [1], - [Flag to hardcode $libdir into a binary during linking. - This must work even if $libdir does not exist]) -_LT_TAGDECL([], [hardcode_libdir_separator], [1], - [Whether we need a single "-rpath" flag with a separated argument]) -_LT_TAGDECL([], [hardcode_direct], [0], - [Set to "yes" if using DIR/libNAME${shared_ext} during linking hardcodes - DIR into the resulting binary]) -_LT_TAGDECL([], [hardcode_direct_absolute], [0], - [Set to "yes" if using DIR/libNAME${shared_ext} during linking hardcodes - DIR into the resulting binary and the resulting library dependency is - "absolute", i.e impossible to change by setting ${shlibpath_var} if the - library is relocated]) -_LT_TAGDECL([], [hardcode_minus_L], [0], - [Set to "yes" if using the -LDIR flag during linking hardcodes DIR - into the resulting binary]) -_LT_TAGDECL([], [hardcode_shlibpath_var], [0], - [Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR - into the resulting binary]) -_LT_TAGDECL([], [hardcode_automatic], [0], - [Set to "yes" if building a shared library automatically hardcodes DIR - into the library and all subsequent libraries and executables linked - against it]) -_LT_TAGDECL([], [inherit_rpath], [0], - [Set to yes if linker adds runtime paths of dependent libraries - to runtime path list]) -_LT_TAGDECL([], [link_all_deplibs], [0], - [Whether libtool must link a program against all its dependency libraries]) -_LT_TAGDECL([], [always_export_symbols], [0], - [Set to "yes" if exported symbols are required]) -_LT_TAGDECL([], [export_symbols_cmds], [2], - [The commands to list exported symbols]) -_LT_TAGDECL([], [exclude_expsyms], [1], - [Symbols that should not be listed in the preloaded symbols]) -_LT_TAGDECL([], [include_expsyms], [1], - [Symbols that must always be exported]) -_LT_TAGDECL([], [prelink_cmds], [2], - [Commands necessary for linking programs (against libraries) with templates]) -_LT_TAGDECL([], [postlink_cmds], [2], - [Commands necessary for finishing linking programs]) -_LT_TAGDECL([], [file_list_spec], [1], - [Specify filename containing input files]) -dnl FIXME: Not yet implemented -dnl _LT_TAGDECL([], [thread_safe_flag_spec], [1], -dnl [Compiler flag to generate thread safe objects]) -])# _LT_LINKER_SHLIBS - - -# _LT_LANG_C_CONFIG([TAG]) -# ------------------------ -# Ensure that the configuration variables for a C compiler are suitably -# defined. These variables are subsequently used by _LT_CONFIG to write -# the compiler configuration to `libtool'. -m4_defun([_LT_LANG_C_CONFIG], -[m4_require([_LT_DECL_EGREP])dnl -lt_save_CC="$CC" -AC_LANG_PUSH(C) - -# Source file extension for C test sources. -ac_ext=c - -# Object file extension for compiled C test sources. -objext=o -_LT_TAGVAR(objext, $1)=$objext - -# Code to be used in simple compile tests -lt_simple_compile_test_code="int some_variable = 0;" - -# Code to be used in simple link tests -lt_simple_link_test_code='int main(){return(0);}' - -_LT_TAG_COMPILER -# Save the default compiler, since it gets overwritten when the other -# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP. -compiler_DEFAULT=$CC - -# save warnings/boilerplate of simple test code -_LT_COMPILER_BOILERPLATE -_LT_LINKER_BOILERPLATE - -## CAVEAT EMPTOR: -## There is no encapsulation within the following macros, do not change -## the running order or otherwise move them around unless you know exactly -## what you are doing... -if test -n "$compiler"; then - _LT_COMPILER_NO_RTTI($1) - _LT_COMPILER_PIC($1) - _LT_COMPILER_C_O($1) - _LT_COMPILER_FILE_LOCKS($1) - _LT_LINKER_SHLIBS($1) - _LT_SYS_DYNAMIC_LINKER($1) - _LT_LINKER_HARDCODE_LIBPATH($1) - LT_SYS_DLOPEN_SELF - _LT_CMD_STRIPLIB - - # Report which library types will actually be built - AC_MSG_CHECKING([if libtool supports shared libraries]) - AC_MSG_RESULT([$can_build_shared]) - - AC_MSG_CHECKING([whether to build shared libraries]) - test "$can_build_shared" = "no" && enable_shared=no - - # On AIX, shared libraries and static libraries use the same namespace, and - # are all built from PIC. - case $host_os in - aix3*) - test "$enable_shared" = yes && enable_static=no - if test -n "$RANLIB"; then - archive_cmds="$archive_cmds~\$RANLIB \$lib" - postinstall_cmds='$RANLIB $lib' - fi - ;; - - aix[[4-9]]*) - if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then - test "$enable_shared" = yes && enable_static=no - fi - ;; - esac - AC_MSG_RESULT([$enable_shared]) - - AC_MSG_CHECKING([whether to build static libraries]) - # Make sure either enable_shared or enable_static is yes. - test "$enable_shared" = yes || enable_static=yes - AC_MSG_RESULT([$enable_static]) - - _LT_CONFIG($1) -fi -AC_LANG_POP -CC="$lt_save_CC" -])# _LT_LANG_C_CONFIG - - -# _LT_LANG_CXX_CONFIG([TAG]) -# -------------------------- -# Ensure that the configuration variables for a C++ compiler are suitably -# defined. These variables are subsequently used by _LT_CONFIG to write -# the compiler configuration to `libtool'. -m4_defun([_LT_LANG_CXX_CONFIG], -[m4_require([_LT_FILEUTILS_DEFAULTS])dnl -m4_require([_LT_DECL_EGREP])dnl -m4_require([_LT_PATH_MANIFEST_TOOL])dnl -if test -n "$CXX" && ( test "X$CXX" != "Xno" && - ( (test "X$CXX" = "Xg++" && `g++ -v >/dev/null 2>&1` ) || - (test "X$CXX" != "Xg++"))) ; then - AC_PROG_CXXCPP -else - _lt_caught_CXX_error=yes -fi - -AC_LANG_PUSH(C++) -_LT_TAGVAR(archive_cmds_need_lc, $1)=no -_LT_TAGVAR(allow_undefined_flag, $1)= -_LT_TAGVAR(always_export_symbols, $1)=no -_LT_TAGVAR(archive_expsym_cmds, $1)= -_LT_TAGVAR(compiler_needs_object, $1)=no -_LT_TAGVAR(export_dynamic_flag_spec, $1)= -_LT_TAGVAR(hardcode_direct, $1)=no -_LT_TAGVAR(hardcode_direct_absolute, $1)=no -_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= -_LT_TAGVAR(hardcode_libdir_separator, $1)= -_LT_TAGVAR(hardcode_minus_L, $1)=no -_LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported -_LT_TAGVAR(hardcode_automatic, $1)=no -_LT_TAGVAR(inherit_rpath, $1)=no -_LT_TAGVAR(module_cmds, $1)= -_LT_TAGVAR(module_expsym_cmds, $1)= -_LT_TAGVAR(link_all_deplibs, $1)=unknown -_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds -_LT_TAGVAR(reload_flag, $1)=$reload_flag -_LT_TAGVAR(reload_cmds, $1)=$reload_cmds -_LT_TAGVAR(no_undefined_flag, $1)= -_LT_TAGVAR(whole_archive_flag_spec, $1)= -_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no - -# Source file extension for C++ test sources. -ac_ext=cpp - -# Object file extension for compiled C++ test sources. -objext=o -_LT_TAGVAR(objext, $1)=$objext - -# No sense in running all these tests if we already determined that -# the CXX compiler isn't working. Some variables (like enable_shared) -# are currently assumed to apply to all compilers on this platform, -# and will be corrupted by setting them based on a non-working compiler. -if test "$_lt_caught_CXX_error" != yes; then - # Code to be used in simple compile tests - lt_simple_compile_test_code="int some_variable = 0;" - - # Code to be used in simple link tests - lt_simple_link_test_code='int main(int, char *[[]]) { return(0); }' - - # ltmain only uses $CC for tagged configurations so make sure $CC is set. - _LT_TAG_COMPILER - - # save warnings/boilerplate of simple test code - _LT_COMPILER_BOILERPLATE - _LT_LINKER_BOILERPLATE - - # Allow CC to be a program name with arguments. - lt_save_CC=$CC - lt_save_CFLAGS=$CFLAGS - lt_save_LD=$LD - lt_save_GCC=$GCC - GCC=$GXX - lt_save_with_gnu_ld=$with_gnu_ld - lt_save_path_LD=$lt_cv_path_LD - if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then - lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx - else - $as_unset lt_cv_prog_gnu_ld - fi - if test -n "${lt_cv_path_LDCXX+set}"; then - lt_cv_path_LD=$lt_cv_path_LDCXX - else - $as_unset lt_cv_path_LD - fi - test -z "${LDCXX+set}" || LD=$LDCXX - CC=${CXX-"c++"} - CFLAGS=$CXXFLAGS - compiler=$CC - _LT_TAGVAR(compiler, $1)=$CC - _LT_CC_BASENAME([$compiler]) - - if test -n "$compiler"; then - # We don't want -fno-exception when compiling C++ code, so set the - # no_builtin_flag separately - if test "$GXX" = yes; then - _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' - else - _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= - fi - - if test "$GXX" = yes; then - # Set up default GNU C++ configuration - - LT_PATH_LD - - # Check if GNU C++ uses GNU ld as the underlying linker, since the - # archiving commands below assume that GNU ld is being used. - if test "$with_gnu_ld" = yes; then - _LT_TAGVAR(archive_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' - - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' - - # If archive_cmds runs LD, not CC, wlarc should be empty - # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to - # investigate it a little bit more. (MM) - wlarc='${wl}' - - # ancient GNU ld didn't support --whole-archive et. al. - if eval "`$CC -print-prog-name=ld` --help 2>&1" | - $GREP 'no-whole-archive' > /dev/null; then - _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' - else - _LT_TAGVAR(whole_archive_flag_spec, $1)= - fi - else - with_gnu_ld=no - wlarc= - - # A generic and very simple default shared library creation - # command for GNU C++ for the case where it uses the native - # linker, instead of GNU ld. If possible, this setting should - # overridden to take advantage of the native linker features on - # the platform it is being used on. - _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' - fi - - # Commands to make compiler produce verbose output that lists - # what "hidden" libraries, object files and flags are used when - # linking a shared library. - output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' - - else - GXX=no - with_gnu_ld=no - wlarc= - fi - - # PORTME: fill in a description of your system's C++ link characteristics - AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) - _LT_TAGVAR(ld_shlibs, $1)=yes - case $host_os in - aix3*) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - aix[[4-9]]*) - if test "$host_cpu" = ia64; then - # On IA64, the linker does run time linking by default, so we don't - # have to do anything special. - aix_use_runtimelinking=no - exp_sym_flag='-Bexport' - no_entry_flag="" - else - aix_use_runtimelinking=no - - # Test if we are trying to use run time linking or normal - # AIX style linking. If -brtl is somewhere in LDFLAGS, we - # need to do runtime linking. - case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) - for ld_flag in $LDFLAGS; do - case $ld_flag in - *-brtl*) - aix_use_runtimelinking=yes - break - ;; - esac - done - ;; - esac - - exp_sym_flag='-bexport' - no_entry_flag='-bnoentry' - fi - - # When large executables or shared objects are built, AIX ld can - # have problems creating the table of contents. If linking a library - # or program results in "error TOC overflow" add -mminimal-toc to - # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not - # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. - - _LT_TAGVAR(archive_cmds, $1)='' - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_direct_absolute, $1)=yes - _LT_TAGVAR(hardcode_libdir_separator, $1)=':' - _LT_TAGVAR(link_all_deplibs, $1)=yes - _LT_TAGVAR(file_list_spec, $1)='${wl}-f,' - - if test "$GXX" = yes; then - case $host_os in aix4.[[012]]|aix4.[[012]].*) - # We only want to do this on AIX 4.2 and lower, the check - # below for broken collect2 doesn't work under 4.3+ - collect2name=`${CC} -print-prog-name=collect2` - if test -f "$collect2name" && - strings "$collect2name" | $GREP resolve_lib_name >/dev/null - then - # We have reworked collect2 - : - else - # We have old collect2 - _LT_TAGVAR(hardcode_direct, $1)=unsupported - # It fails to find uninstalled libraries when the uninstalled - # path is not listed in the libpath. Setting hardcode_minus_L - # to unsupported forces relinking - _LT_TAGVAR(hardcode_minus_L, $1)=yes - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)= - fi - esac - shared_flag='-shared' - if test "$aix_use_runtimelinking" = yes; then - shared_flag="$shared_flag "'${wl}-G' - fi - else - # not using gcc - if test "$host_cpu" = ia64; then - # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release - # chokes on -Wl,-G. The following line is correct: - shared_flag='-G' - else - if test "$aix_use_runtimelinking" = yes; then - shared_flag='${wl}-G' - else - shared_flag='${wl}-bM:SRE' - fi - fi - fi - - _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-bexpall' - # It seems that -bexpall does not export symbols beginning with - # underscore (_), so it is better to generate a list of symbols to - # export. - _LT_TAGVAR(always_export_symbols, $1)=yes - if test "$aix_use_runtimelinking" = yes; then - # Warning - without using the other runtime loading flags (-brtl), - # -berok will link without error, but may produce a broken library. - _LT_TAGVAR(allow_undefined_flag, $1)='-berok' - # Determine the default libpath from the value encoded in an empty - # executable. - _LT_SYS_MODULE_PATH_AIX([$1]) - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" - - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" - else - if test "$host_cpu" = ia64; then - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib' - _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" - _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" - else - # Determine the default libpath from the value encoded in an - # empty executable. - _LT_SYS_MODULE_PATH_AIX([$1]) - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" - # Warning - without using the other run time loading flags, - # -berok will link without error, but may produce a broken library. - _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok' - _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok' - if test "$with_gnu_ld" = yes; then - # We only use this code for GNU lds that support --whole-archive. - _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive' - else - # Exported symbols can be pulled into shared objects from archives - _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' - fi - _LT_TAGVAR(archive_cmds_need_lc, $1)=yes - # This is similar to how AIX traditionally builds its shared - # libraries. - _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' - fi - fi - ;; - - beos*) - if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then - _LT_TAGVAR(allow_undefined_flag, $1)=unsupported - # Joseph Beckenbach says some releases of gcc - # support --undefined. This deserves some investigation. FIXME - _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' - else - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - - chorus*) - case $cc_basename in - *) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - esac - ;; - - cygwin* | mingw* | pw32* | cegcc*) - case $GXX,$cc_basename in - ,cl* | no,cl*) - # Native MSVC - # hardcode_libdir_flag_spec is actually meaningless, as there is - # no search path for DLLs. - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' - _LT_TAGVAR(allow_undefined_flag, $1)=unsupported - _LT_TAGVAR(always_export_symbols, $1)=yes - _LT_TAGVAR(file_list_spec, $1)='@' - # Tell ltmain to make .lib files, not .a files. - libext=lib - # Tell ltmain to make .dll files, not .so files. - shrext_cmds=".dll" - # FIXME: Setting linknames here is a bad hack. - _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames=' - _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then - $SED -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp; - else - $SED -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp; - fi~ - $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ - linknames=' - # The linker will not automatically build a static lib if we build a DLL. - # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' - _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes - # Don't use ranlib - _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' - _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ - lt_tool_outputfile="@TOOL_OUTPUT@"~ - case $lt_outputfile in - *.exe|*.EXE) ;; - *) - lt_outputfile="$lt_outputfile.exe" - lt_tool_outputfile="$lt_tool_outputfile.exe" - ;; - esac~ - func_to_tool_file "$lt_outputfile"~ - if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then - $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; - $RM "$lt_outputfile.manifest"; - fi' - ;; - *) - # g++ - # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, - # as there is no search path for DLLs. - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-all-symbols' - _LT_TAGVAR(allow_undefined_flag, $1)=unsupported - _LT_TAGVAR(always_export_symbols, $1)=no - _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes - - if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then - _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' - # If the export-symbols file already is a .def file (1st line - # is EXPORTS), use it as is; otherwise, prepend... - _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then - cp $export_symbols $output_objdir/$soname.def; - else - echo EXPORTS > $output_objdir/$soname.def; - cat $export_symbols >> $output_objdir/$soname.def; - fi~ - $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' - else - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - esac - ;; - darwin* | rhapsody*) - _LT_DARWIN_LINKER_FEATURES($1) - ;; - - dgux*) - case $cc_basename in - ec++*) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - ghcx*) - # Green Hills C++ Compiler - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - *) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - esac - ;; - - freebsd2.*) - # C++ shared libraries reported to be fairly broken before - # switch to ELF - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - - freebsd-elf*) - _LT_TAGVAR(archive_cmds_need_lc, $1)=no - ;; - - freebsd* | dragonfly*) - # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF - # conventions - _LT_TAGVAR(ld_shlibs, $1)=yes - ;; - - gnu*) - ;; - - haiku*) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' - _LT_TAGVAR(link_all_deplibs, $1)=yes - ;; - - hpux9*) - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, - # but as the default - # location of the library. - - case $cc_basename in - CC*) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - aCC*) - _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -b ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' - # Commands to make compiler produce verbose output that lists - # what "hidden" libraries, object files and flags are used when - # linking a shared library. - # - # There doesn't appear to be a way to prevent this compiler from - # explicitly linking system object files so we need to strip them - # from the output so that they don't get included in the library - # dependencies. - output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' - ;; - *) - if test "$GXX" = yes; then - _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' - else - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - esac - ;; - - hpux10*|hpux11*) - if test $with_gnu_ld = no; then - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - - case $host_cpu in - hppa*64*|ia64*) - ;; - *) - _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' - ;; - esac - fi - case $host_cpu in - hppa*64*|ia64*) - _LT_TAGVAR(hardcode_direct, $1)=no - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - ;; - *) - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_direct_absolute, $1)=yes - _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, - # but as the default - # location of the library. - ;; - esac - - case $cc_basename in - CC*) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - aCC*) - case $host_cpu in - hppa*64*) - _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' - ;; - ia64*) - _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' - ;; - *) - _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' - ;; - esac - # Commands to make compiler produce verbose output that lists - # what "hidden" libraries, object files and flags are used when - # linking a shared library. - # - # There doesn't appear to be a way to prevent this compiler from - # explicitly linking system object files so we need to strip them - # from the output so that they don't get included in the library - # dependencies. - output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' - ;; - *) - if test "$GXX" = yes; then - if test $with_gnu_ld = no; then - case $host_cpu in - hppa*64*) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' - ;; - ia64*) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' - ;; - *) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' - ;; - esac - fi - else - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - esac - ;; - - interix[[3-9]]*) - _LT_TAGVAR(hardcode_direct, $1)=no - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' - # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. - # Instead, shared libraries are loaded at an image base (0x10000000 by - # default) and relocated if they conflict, which is a slow very memory - # consuming and fragmenting process. To avoid this, we pick a random, - # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link - # time. Moving up from 0x10000000 also allows more sbrk(2) space. - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' - ;; - irix5* | irix6*) - case $cc_basename in - CC*) - # SGI C++ - _LT_TAGVAR(archive_cmds, $1)='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' - - # Archives containing C++ object files must be created using - # "CC -ar", where "CC" is the IRIX C++ compiler. This is - # necessary to make sure instantiated templates are included - # in the archive. - _LT_TAGVAR(old_archive_cmds, $1)='$CC -ar -WR,-u -o $oldlib $oldobjs' - ;; - *) - if test "$GXX" = yes; then - if test "$with_gnu_ld" = no; then - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' - else - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` -o $lib' - fi - fi - _LT_TAGVAR(link_all_deplibs, $1)=yes - ;; - esac - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - _LT_TAGVAR(inherit_rpath, $1)=yes - ;; - - linux* | k*bsd*-gnu | kopensolaris*-gnu) - case $cc_basename in - KCC*) - # Kuck and Associates, Inc. (KAI) C++ Compiler - - # KCC will only create a shared library if the output file - # ends with ".so" (or ".sl" for HP-UX), so rename the library - # to its proper name (with version) after linking. - _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib ${wl}-retain-symbols-file,$export_symbols; mv \$templib $lib' - # Commands to make compiler produce verbose output that lists - # what "hidden" libraries, object files and flags are used when - # linking a shared library. - # - # There doesn't appear to be a way to prevent this compiler from - # explicitly linking system object files so we need to strip them - # from the output so that they don't get included in the library - # dependencies. - output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' - - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' - - # Archives containing C++ object files must be created using - # "CC -Bstatic", where "CC" is the KAI C++ compiler. - _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' - ;; - icpc* | ecpc* ) - # Intel C++ - with_gnu_ld=yes - # version 8.0 and above of icpc choke on multiply defined symbols - # if we add $predep_objects and $postdep_objects, however 7.1 and - # earlier do not add the objects themselves. - case `$CC -V 2>&1` in - *"Version 7."*) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' - ;; - *) # Version 8.0 or newer - tmp_idyn= - case $host_cpu in - ia64*) tmp_idyn=' -i_dynamic';; - esac - _LT_TAGVAR(archive_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' - ;; - esac - _LT_TAGVAR(archive_cmds_need_lc, $1)=no - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' - _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive' - ;; - pgCC* | pgcpp*) - # Portland Group C++ compiler - case `$CC -V` in - *pgCC\ [[1-5]].* | *pgcpp\ [[1-5]].*) - _LT_TAGVAR(prelink_cmds, $1)='tpldir=Template.dir~ - rm -rf $tpldir~ - $CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~ - compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"' - _LT_TAGVAR(old_archive_cmds, $1)='tpldir=Template.dir~ - rm -rf $tpldir~ - $CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~ - $AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~ - $RANLIB $oldlib' - _LT_TAGVAR(archive_cmds, $1)='tpldir=Template.dir~ - rm -rf $tpldir~ - $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ - $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='tpldir=Template.dir~ - rm -rf $tpldir~ - $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ - $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib' - ;; - *) # Version 6 and above use weak symbols - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib' - ;; - esac - - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}--rpath ${wl}$libdir' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' - _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' - ;; - cxx*) - # Compaq C++ - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib ${wl}-retain-symbols-file $wl$export_symbols' - - runpath_var=LD_RUN_PATH - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - - # Commands to make compiler produce verbose output that lists - # what "hidden" libraries, object files and flags are used when - # linking a shared library. - # - # There doesn't appear to be a way to prevent this compiler from - # explicitly linking system object files so we need to strip them - # from the output so that they don't get included in the library - # dependencies. - output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed' - ;; - xl* | mpixl* | bgxl*) - # IBM XL 8.0 on PPC, with GNU ld - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' - _LT_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' - if test "x$supports_anon_versioning" = xyes; then - _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ - cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ - echo "local: *; };" >> $output_objdir/$libname.ver~ - $CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib' - fi - ;; - *) - case `$CC -V 2>&1 | sed 5q` in - *Sun\ C*) - # Sun C++ 5.9 - _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' - _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file ${wl}$export_symbols' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' - _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' - _LT_TAGVAR(compiler_needs_object, $1)=yes - - # Not sure whether something based on - # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 - # would be better. - output_verbose_link_cmd='func_echo_all' - - # Archives containing C++ object files must be created using - # "CC -xar", where "CC" is the Sun C++ compiler. This is - # necessary to make sure instantiated templates are included - # in the archive. - _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' - ;; - esac - ;; - esac - ;; - - lynxos*) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - - m88k*) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - - mvs*) - case $cc_basename in - cxx*) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - *) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - esac - ;; - - netbsd*) - if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then - _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags' - wlarc= - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - fi - # Workaround some broken pre-1.5 toolchains - output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"' - ;; - - *nto* | *qnx*) - _LT_TAGVAR(ld_shlibs, $1)=yes - ;; - - openbsd2*) - # C++ shared libraries are fairly broken - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - - openbsd*) - if test -f /usr/libexec/ld.so; then - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - _LT_TAGVAR(hardcode_direct_absolute, $1)=yes - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' - if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file,$export_symbols -o $lib' - _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' - _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' - fi - output_verbose_link_cmd=func_echo_all - else - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - - osf3* | osf4* | osf5*) - case $cc_basename in - KCC*) - # Kuck and Associates, Inc. (KAI) C++ Compiler - - # KCC will only create a shared library if the output file - # ends with ".so" (or ".sl" for HP-UX), so rename the library - # to its proper name (with version) after linking. - _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' - - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - - # Archives containing C++ object files must be created using - # the KAI C++ compiler. - case $host in - osf3*) _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' ;; - *) _LT_TAGVAR(old_archive_cmds, $1)='$CC -o $oldlib $oldobjs' ;; - esac - ;; - RCC*) - # Rational C++ 2.4.1 - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - cxx*) - case $host in - osf3*) - _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' - _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $soname `test -n "$verstring" && func_echo_all "${wl}-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' - ;; - *) - _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' - _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~ - echo "-hidden">> $lib.exp~ - $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname ${wl}-input ${wl}$lib.exp `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~ - $RM $lib.exp' - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' - ;; - esac - - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - - # Commands to make compiler produce verbose output that lists - # what "hidden" libraries, object files and flags are used when - # linking a shared library. - # - # There doesn't appear to be a way to prevent this compiler from - # explicitly linking system object files so we need to strip them - # from the output so that they don't get included in the library - # dependencies. - output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' - ;; - *) - if test "$GXX" = yes && test "$with_gnu_ld" = no; then - _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' - case $host in - osf3*) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' - ;; - *) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' - ;; - esac - - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=: - - # Commands to make compiler produce verbose output that lists - # what "hidden" libraries, object files and flags are used when - # linking a shared library. - output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' - - else - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - fi - ;; - esac - ;; - - psos*) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - - sunos4*) - case $cc_basename in - CC*) - # Sun C++ 4.x - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - lcc*) - # Lucid - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - *) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - esac - ;; - - solaris*) - case $cc_basename in - CC* | sunCC*) - # Sun C++ 4.2, 5.x and Centerline C++ - _LT_TAGVAR(archive_cmds_need_lc,$1)=yes - _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' - _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ - $CC -G${allow_undefined_flag} ${wl}-M ${wl}$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' - - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - case $host_os in - solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; - *) - # The compiler driver will combine and reorder linker options, - # but understands `-z linker_flag'. - # Supported since Solaris 2.6 (maybe 2.5.1?) - _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' - ;; - esac - _LT_TAGVAR(link_all_deplibs, $1)=yes - - output_verbose_link_cmd='func_echo_all' - - # Archives containing C++ object files must be created using - # "CC -xar", where "CC" is the Sun C++ compiler. This is - # necessary to make sure instantiated templates are included - # in the archive. - _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' - ;; - gcx*) - # Green Hills C++ Compiler - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' - - # The C++ compiler must be used to create the archive. - _LT_TAGVAR(old_archive_cmds, $1)='$CC $LDFLAGS -archive -o $oldlib $oldobjs' - ;; - *) - # GNU C++ compiler with Solaris linker - if test "$GXX" = yes && test "$with_gnu_ld" = no; then - _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-z ${wl}defs' - if $CC --version | $GREP -v '^2\.7' > /dev/null; then - _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ - $CC -shared $pic_flag -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' - - # Commands to make compiler produce verbose output that lists - # what "hidden" libraries, object files and flags are used when - # linking a shared library. - output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' - else - # g++ 2.7 appears to require `-G' NOT `-shared' on this - # platform. - _LT_TAGVAR(archive_cmds, $1)='$CC -G -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' - _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ - $CC -G -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' - - # Commands to make compiler produce verbose output that lists - # what "hidden" libraries, object files and flags are used when - # linking a shared library. - output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' - fi - - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $wl$libdir' - case $host_os in - solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; - *) - _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' - ;; - esac - fi - ;; - esac - ;; - - sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) - _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' - _LT_TAGVAR(archive_cmds_need_lc, $1)=no - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - runpath_var='LD_RUN_PATH' - - case $cc_basename in - CC*) - _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - ;; - *) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - ;; - esac - ;; - - sysv5* | sco3.2v5* | sco5v6*) - # Note: We can NOT use -z defs as we might desire, because we do not - # link with -lc, and that would cause any symbols used from libc to - # always be unresolved, which means just about no library would - # ever link correctly. If we're not using GNU ld we use -z text - # though, which does catch some bad symbols but isn't as heavy-handed - # as -z defs. - _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' - _LT_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs' - _LT_TAGVAR(archive_cmds_need_lc, $1)=no - _LT_TAGVAR(hardcode_shlibpath_var, $1)=no - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R,$libdir' - _LT_TAGVAR(hardcode_libdir_separator, $1)=':' - _LT_TAGVAR(link_all_deplibs, $1)=yes - _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport' - runpath_var='LD_RUN_PATH' - - case $cc_basename in - CC*) - _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(old_archive_cmds, $1)='$CC -Tprelink_objects $oldobjs~ - '"$_LT_TAGVAR(old_archive_cmds, $1)" - _LT_TAGVAR(reload_cmds, $1)='$CC -Tprelink_objects $reload_objs~ - '"$_LT_TAGVAR(reload_cmds, $1)" - ;; - *) - _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' - ;; - esac - ;; - - tandem*) - case $cc_basename in - NCC*) - # NonStop-UX NCC 3.20 - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - *) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - esac - ;; - - vxworks*) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - - *) - # FIXME: insert proper C++ library support - _LT_TAGVAR(ld_shlibs, $1)=no - ;; - esac - - AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) - test "$_LT_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no - - _LT_TAGVAR(GCC, $1)="$GXX" - _LT_TAGVAR(LD, $1)="$LD" - - ## CAVEAT EMPTOR: - ## There is no encapsulation within the following macros, do not change - ## the running order or otherwise move them around unless you know exactly - ## what you are doing... - _LT_SYS_HIDDEN_LIBDEPS($1) - _LT_COMPILER_PIC($1) - _LT_COMPILER_C_O($1) - _LT_COMPILER_FILE_LOCKS($1) - _LT_LINKER_SHLIBS($1) - _LT_SYS_DYNAMIC_LINKER($1) - _LT_LINKER_HARDCODE_LIBPATH($1) - - _LT_CONFIG($1) - fi # test -n "$compiler" - - CC=$lt_save_CC - CFLAGS=$lt_save_CFLAGS - LDCXX=$LD - LD=$lt_save_LD - GCC=$lt_save_GCC - with_gnu_ld=$lt_save_with_gnu_ld - lt_cv_path_LDCXX=$lt_cv_path_LD - lt_cv_path_LD=$lt_save_path_LD - lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld - lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld -fi # test "$_lt_caught_CXX_error" != yes - -AC_LANG_POP -])# _LT_LANG_CXX_CONFIG - - -# _LT_FUNC_STRIPNAME_CNF -# ---------------------- -# func_stripname_cnf prefix suffix name -# strip PREFIX and SUFFIX off of NAME. -# PREFIX and SUFFIX must not contain globbing or regex special -# characters, hashes, percent signs, but SUFFIX may contain a leading -# dot (in which case that matches only a dot). -# -# This function is identical to the (non-XSI) version of func_stripname, -# except this one can be used by m4 code that may be executed by configure, -# rather than the libtool script. -m4_defun([_LT_FUNC_STRIPNAME_CNF],[dnl -AC_REQUIRE([_LT_DECL_SED]) -AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH]) -func_stripname_cnf () -{ - case ${2} in - .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;; - *) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;; - esac -} # func_stripname_cnf -])# _LT_FUNC_STRIPNAME_CNF - -# _LT_SYS_HIDDEN_LIBDEPS([TAGNAME]) -# --------------------------------- -# Figure out "hidden" library dependencies from verbose -# compiler output when linking a shared library. -# Parse the compiler output and extract the necessary -# objects, libraries and library flags. -m4_defun([_LT_SYS_HIDDEN_LIBDEPS], -[m4_require([_LT_FILEUTILS_DEFAULTS])dnl -AC_REQUIRE([_LT_FUNC_STRIPNAME_CNF])dnl -# Dependencies to place before and after the object being linked: -_LT_TAGVAR(predep_objects, $1)= -_LT_TAGVAR(postdep_objects, $1)= -_LT_TAGVAR(predeps, $1)= -_LT_TAGVAR(postdeps, $1)= -_LT_TAGVAR(compiler_lib_search_path, $1)= - -dnl we can't use the lt_simple_compile_test_code here, -dnl because it contains code intended for an executable, -dnl not a library. It's possible we should let each -dnl tag define a new lt_????_link_test_code variable, -dnl but it's only used here... -m4_if([$1], [], [cat > conftest.$ac_ext <<_LT_EOF -int a; -void foo (void) { a = 0; } -_LT_EOF -], [$1], [CXX], [cat > conftest.$ac_ext <<_LT_EOF -class Foo -{ -public: - Foo (void) { a = 0; } -private: - int a; -}; -_LT_EOF -], [$1], [F77], [cat > conftest.$ac_ext <<_LT_EOF - subroutine foo - implicit none - integer*4 a - a=0 - return - end -_LT_EOF -], [$1], [FC], [cat > conftest.$ac_ext <<_LT_EOF - subroutine foo - implicit none - integer a - a=0 - return - end -_LT_EOF -], [$1], [GCJ], [cat > conftest.$ac_ext <<_LT_EOF -public class foo { - private int a; - public void bar (void) { - a = 0; - } -}; -_LT_EOF -], [$1], [GO], [cat > conftest.$ac_ext <<_LT_EOF -package foo -func foo() { -} -_LT_EOF -]) - -_lt_libdeps_save_CFLAGS=$CFLAGS -case "$CC $CFLAGS " in #( -*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;; -*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;; -*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;; -esac - -dnl Parse the compiler output and extract the necessary -dnl objects, libraries and library flags. -if AC_TRY_EVAL(ac_compile); then - # Parse the compiler output and extract the necessary - # objects, libraries and library flags. - - # Sentinel used to keep track of whether or not we are before - # the conftest object file. - pre_test_object_deps_done=no - - for p in `eval "$output_verbose_link_cmd"`; do - case ${prev}${p} in - - -L* | -R* | -l*) - # Some compilers place space between "-{L,R}" and the path. - # Remove the space. - if test $p = "-L" || - test $p = "-R"; then - prev=$p - continue - fi - - # Expand the sysroot to ease extracting the directories later. - if test -z "$prev"; then - case $p in - -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;; - -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;; - -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;; - esac - fi - case $p in - =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;; - esac - if test "$pre_test_object_deps_done" = no; then - case ${prev} in - -L | -R) - # Internal compiler library paths should come after those - # provided the user. The postdeps already come after the - # user supplied libs so there is no need to process them. - if test -z "$_LT_TAGVAR(compiler_lib_search_path, $1)"; then - _LT_TAGVAR(compiler_lib_search_path, $1)="${prev}${p}" - else - _LT_TAGVAR(compiler_lib_search_path, $1)="${_LT_TAGVAR(compiler_lib_search_path, $1)} ${prev}${p}" - fi - ;; - # The "-l" case would never come before the object being - # linked, so don't bother handling this case. - esac - else - if test -z "$_LT_TAGVAR(postdeps, $1)"; then - _LT_TAGVAR(postdeps, $1)="${prev}${p}" - else - _LT_TAGVAR(postdeps, $1)="${_LT_TAGVAR(postdeps, $1)} ${prev}${p}" - fi - fi - prev= - ;; - - *.lto.$objext) ;; # Ignore GCC LTO objects - *.$objext) - # This assumes that the test object file only shows up - # once in the compiler output. - if test "$p" = "conftest.$objext"; then - pre_test_object_deps_done=yes - continue - fi - - if test "$pre_test_object_deps_done" = no; then - if test -z "$_LT_TAGVAR(predep_objects, $1)"; then - _LT_TAGVAR(predep_objects, $1)="$p" - else - _LT_TAGVAR(predep_objects, $1)="$_LT_TAGVAR(predep_objects, $1) $p" - fi - else - if test -z "$_LT_TAGVAR(postdep_objects, $1)"; then - _LT_TAGVAR(postdep_objects, $1)="$p" - else - _LT_TAGVAR(postdep_objects, $1)="$_LT_TAGVAR(postdep_objects, $1) $p" - fi - fi - ;; - - *) ;; # Ignore the rest. - - esac - done - - # Clean up. - rm -f a.out a.exe -else - echo "libtool.m4: error: problem compiling $1 test program" -fi - -$RM -f confest.$objext -CFLAGS=$_lt_libdeps_save_CFLAGS - -# PORTME: override above test on systems where it is broken -m4_if([$1], [CXX], -[case $host_os in -interix[[3-9]]*) - # Interix 3.5 installs completely hosed .la files for C++, so rather than - # hack all around it, let's just trust "g++" to DTRT. - _LT_TAGVAR(predep_objects,$1)= - _LT_TAGVAR(postdep_objects,$1)= - _LT_TAGVAR(postdeps,$1)= - ;; - -linux*) - case `$CC -V 2>&1 | sed 5q` in - *Sun\ C*) - # Sun C++ 5.9 - - # The more standards-conforming stlport4 library is - # incompatible with the Cstd library. Avoid specifying - # it if it's in CXXFLAGS. Ignore libCrun as - # -library=stlport4 depends on it. - case " $CXX $CXXFLAGS " in - *" -library=stlport4 "*) - solaris_use_stlport4=yes - ;; - esac - - if test "$solaris_use_stlport4" != yes; then - _LT_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun' - fi - ;; - esac - ;; - -solaris*) - case $cc_basename in - CC* | sunCC*) - # The more standards-conforming stlport4 library is - # incompatible with the Cstd library. Avoid specifying - # it if it's in CXXFLAGS. Ignore libCrun as - # -library=stlport4 depends on it. - case " $CXX $CXXFLAGS " in - *" -library=stlport4 "*) - solaris_use_stlport4=yes - ;; - esac - - # Adding this requires a known-good setup of shared libraries for - # Sun compiler versions before 5.6, else PIC objects from an old - # archive will be linked into the output, leading to subtle bugs. - if test "$solaris_use_stlport4" != yes; then - _LT_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun' - fi - ;; - esac - ;; -esac -]) - -case " $_LT_TAGVAR(postdeps, $1) " in -*" -lc "*) _LT_TAGVAR(archive_cmds_need_lc, $1)=no ;; -esac - _LT_TAGVAR(compiler_lib_search_dirs, $1)= -if test -n "${_LT_TAGVAR(compiler_lib_search_path, $1)}"; then - _LT_TAGVAR(compiler_lib_search_dirs, $1)=`echo " ${_LT_TAGVAR(compiler_lib_search_path, $1)}" | ${SED} -e 's! -L! !g' -e 's!^ !!'` -fi -_LT_TAGDECL([], [compiler_lib_search_dirs], [1], - [The directories searched by this compiler when creating a shared library]) -_LT_TAGDECL([], [predep_objects], [1], - [Dependencies to place before and after the objects being linked to - create a shared library]) -_LT_TAGDECL([], [postdep_objects], [1]) -_LT_TAGDECL([], [predeps], [1]) -_LT_TAGDECL([], [postdeps], [1]) -_LT_TAGDECL([], [compiler_lib_search_path], [1], - [The library search path used internally by the compiler when linking - a shared library]) -])# _LT_SYS_HIDDEN_LIBDEPS - - -# _LT_LANG_F77_CONFIG([TAG]) -# -------------------------- -# Ensure that the configuration variables for a Fortran 77 compiler are -# suitably defined. These variables are subsequently used by _LT_CONFIG -# to write the compiler configuration to `libtool'. -m4_defun([_LT_LANG_F77_CONFIG], -[AC_LANG_PUSH(Fortran 77) -if test -z "$F77" || test "X$F77" = "Xno"; then - _lt_disable_F77=yes -fi - -_LT_TAGVAR(archive_cmds_need_lc, $1)=no -_LT_TAGVAR(allow_undefined_flag, $1)= -_LT_TAGVAR(always_export_symbols, $1)=no -_LT_TAGVAR(archive_expsym_cmds, $1)= -_LT_TAGVAR(export_dynamic_flag_spec, $1)= -_LT_TAGVAR(hardcode_direct, $1)=no -_LT_TAGVAR(hardcode_direct_absolute, $1)=no -_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= -_LT_TAGVAR(hardcode_libdir_separator, $1)= -_LT_TAGVAR(hardcode_minus_L, $1)=no -_LT_TAGVAR(hardcode_automatic, $1)=no -_LT_TAGVAR(inherit_rpath, $1)=no -_LT_TAGVAR(module_cmds, $1)= -_LT_TAGVAR(module_expsym_cmds, $1)= -_LT_TAGVAR(link_all_deplibs, $1)=unknown -_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds -_LT_TAGVAR(reload_flag, $1)=$reload_flag -_LT_TAGVAR(reload_cmds, $1)=$reload_cmds -_LT_TAGVAR(no_undefined_flag, $1)= -_LT_TAGVAR(whole_archive_flag_spec, $1)= -_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no - -# Source file extension for f77 test sources. -ac_ext=f - -# Object file extension for compiled f77 test sources. -objext=o -_LT_TAGVAR(objext, $1)=$objext - -# No sense in running all these tests if we already determined that -# the F77 compiler isn't working. Some variables (like enable_shared) -# are currently assumed to apply to all compilers on this platform, -# and will be corrupted by setting them based on a non-working compiler. -if test "$_lt_disable_F77" != yes; then - # Code to be used in simple compile tests - lt_simple_compile_test_code="\ - subroutine t - return - end -" - - # Code to be used in simple link tests - lt_simple_link_test_code="\ - program t - end -" - - # ltmain only uses $CC for tagged configurations so make sure $CC is set. - _LT_TAG_COMPILER - - # save warnings/boilerplate of simple test code - _LT_COMPILER_BOILERPLATE - _LT_LINKER_BOILERPLATE - - # Allow CC to be a program name with arguments. - lt_save_CC="$CC" - lt_save_GCC=$GCC - lt_save_CFLAGS=$CFLAGS - CC=${F77-"f77"} - CFLAGS=$FFLAGS - compiler=$CC - _LT_TAGVAR(compiler, $1)=$CC - _LT_CC_BASENAME([$compiler]) - GCC=$G77 - if test -n "$compiler"; then - AC_MSG_CHECKING([if libtool supports shared libraries]) - AC_MSG_RESULT([$can_build_shared]) - - AC_MSG_CHECKING([whether to build shared libraries]) - test "$can_build_shared" = "no" && enable_shared=no - - # On AIX, shared libraries and static libraries use the same namespace, and - # are all built from PIC. - case $host_os in - aix3*) - test "$enable_shared" = yes && enable_static=no - if test -n "$RANLIB"; then - archive_cmds="$archive_cmds~\$RANLIB \$lib" - postinstall_cmds='$RANLIB $lib' - fi - ;; - aix[[4-9]]*) - if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then - test "$enable_shared" = yes && enable_static=no - fi - ;; - esac - AC_MSG_RESULT([$enable_shared]) - - AC_MSG_CHECKING([whether to build static libraries]) - # Make sure either enable_shared or enable_static is yes. - test "$enable_shared" = yes || enable_static=yes - AC_MSG_RESULT([$enable_static]) - - _LT_TAGVAR(GCC, $1)="$G77" - _LT_TAGVAR(LD, $1)="$LD" - - ## CAVEAT EMPTOR: - ## There is no encapsulation within the following macros, do not change - ## the running order or otherwise move them around unless you know exactly - ## what you are doing... - _LT_COMPILER_PIC($1) - _LT_COMPILER_C_O($1) - _LT_COMPILER_FILE_LOCKS($1) - _LT_LINKER_SHLIBS($1) - _LT_SYS_DYNAMIC_LINKER($1) - _LT_LINKER_HARDCODE_LIBPATH($1) - - _LT_CONFIG($1) - fi # test -n "$compiler" - - GCC=$lt_save_GCC - CC="$lt_save_CC" - CFLAGS="$lt_save_CFLAGS" -fi # test "$_lt_disable_F77" != yes - -AC_LANG_POP -])# _LT_LANG_F77_CONFIG - - -# _LT_LANG_FC_CONFIG([TAG]) -# ------------------------- -# Ensure that the configuration variables for a Fortran compiler are -# suitably defined. These variables are subsequently used by _LT_CONFIG -# to write the compiler configuration to `libtool'. -m4_defun([_LT_LANG_FC_CONFIG], -[AC_LANG_PUSH(Fortran) - -if test -z "$FC" || test "X$FC" = "Xno"; then - _lt_disable_FC=yes -fi - -_LT_TAGVAR(archive_cmds_need_lc, $1)=no -_LT_TAGVAR(allow_undefined_flag, $1)= -_LT_TAGVAR(always_export_symbols, $1)=no -_LT_TAGVAR(archive_expsym_cmds, $1)= -_LT_TAGVAR(export_dynamic_flag_spec, $1)= -_LT_TAGVAR(hardcode_direct, $1)=no -_LT_TAGVAR(hardcode_direct_absolute, $1)=no -_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= -_LT_TAGVAR(hardcode_libdir_separator, $1)= -_LT_TAGVAR(hardcode_minus_L, $1)=no -_LT_TAGVAR(hardcode_automatic, $1)=no -_LT_TAGVAR(inherit_rpath, $1)=no -_LT_TAGVAR(module_cmds, $1)= -_LT_TAGVAR(module_expsym_cmds, $1)= -_LT_TAGVAR(link_all_deplibs, $1)=unknown -_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds -_LT_TAGVAR(reload_flag, $1)=$reload_flag -_LT_TAGVAR(reload_cmds, $1)=$reload_cmds -_LT_TAGVAR(no_undefined_flag, $1)= -_LT_TAGVAR(whole_archive_flag_spec, $1)= -_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no - -# Source file extension for fc test sources. -ac_ext=${ac_fc_srcext-f} - -# Object file extension for compiled fc test sources. -objext=o -_LT_TAGVAR(objext, $1)=$objext - -# No sense in running all these tests if we already determined that -# the FC compiler isn't working. Some variables (like enable_shared) -# are currently assumed to apply to all compilers on this platform, -# and will be corrupted by setting them based on a non-working compiler. -if test "$_lt_disable_FC" != yes; then - # Code to be used in simple compile tests - lt_simple_compile_test_code="\ - subroutine t - return - end -" - - # Code to be used in simple link tests - lt_simple_link_test_code="\ - program t - end -" - - # ltmain only uses $CC for tagged configurations so make sure $CC is set. - _LT_TAG_COMPILER - - # save warnings/boilerplate of simple test code - _LT_COMPILER_BOILERPLATE - _LT_LINKER_BOILERPLATE - - # Allow CC to be a program name with arguments. - lt_save_CC="$CC" - lt_save_GCC=$GCC - lt_save_CFLAGS=$CFLAGS - CC=${FC-"f95"} - CFLAGS=$FCFLAGS - compiler=$CC - GCC=$ac_cv_fc_compiler_gnu - - _LT_TAGVAR(compiler, $1)=$CC - _LT_CC_BASENAME([$compiler]) - - if test -n "$compiler"; then - AC_MSG_CHECKING([if libtool supports shared libraries]) - AC_MSG_RESULT([$can_build_shared]) - - AC_MSG_CHECKING([whether to build shared libraries]) - test "$can_build_shared" = "no" && enable_shared=no - - # On AIX, shared libraries and static libraries use the same namespace, and - # are all built from PIC. - case $host_os in - aix3*) - test "$enable_shared" = yes && enable_static=no - if test -n "$RANLIB"; then - archive_cmds="$archive_cmds~\$RANLIB \$lib" - postinstall_cmds='$RANLIB $lib' - fi - ;; - aix[[4-9]]*) - if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then - test "$enable_shared" = yes && enable_static=no - fi - ;; - esac - AC_MSG_RESULT([$enable_shared]) - - AC_MSG_CHECKING([whether to build static libraries]) - # Make sure either enable_shared or enable_static is yes. - test "$enable_shared" = yes || enable_static=yes - AC_MSG_RESULT([$enable_static]) - - _LT_TAGVAR(GCC, $1)="$ac_cv_fc_compiler_gnu" - _LT_TAGVAR(LD, $1)="$LD" - - ## CAVEAT EMPTOR: - ## There is no encapsulation within the following macros, do not change - ## the running order or otherwise move them around unless you know exactly - ## what you are doing... - _LT_SYS_HIDDEN_LIBDEPS($1) - _LT_COMPILER_PIC($1) - _LT_COMPILER_C_O($1) - _LT_COMPILER_FILE_LOCKS($1) - _LT_LINKER_SHLIBS($1) - _LT_SYS_DYNAMIC_LINKER($1) - _LT_LINKER_HARDCODE_LIBPATH($1) - - _LT_CONFIG($1) - fi # test -n "$compiler" - - GCC=$lt_save_GCC - CC=$lt_save_CC - CFLAGS=$lt_save_CFLAGS -fi # test "$_lt_disable_FC" != yes - -AC_LANG_POP -])# _LT_LANG_FC_CONFIG - - -# _LT_LANG_GCJ_CONFIG([TAG]) -# -------------------------- -# Ensure that the configuration variables for the GNU Java Compiler compiler -# are suitably defined. These variables are subsequently used by _LT_CONFIG -# to write the compiler configuration to `libtool'. -m4_defun([_LT_LANG_GCJ_CONFIG], -[AC_REQUIRE([LT_PROG_GCJ])dnl -AC_LANG_SAVE - -# Source file extension for Java test sources. -ac_ext=java - -# Object file extension for compiled Java test sources. -objext=o -_LT_TAGVAR(objext, $1)=$objext - -# Code to be used in simple compile tests -lt_simple_compile_test_code="class foo {}" - -# Code to be used in simple link tests -lt_simple_link_test_code='public class conftest { public static void main(String[[]] argv) {}; }' - -# ltmain only uses $CC for tagged configurations so make sure $CC is set. -_LT_TAG_COMPILER - -# save warnings/boilerplate of simple test code -_LT_COMPILER_BOILERPLATE -_LT_LINKER_BOILERPLATE - -# Allow CC to be a program name with arguments. -lt_save_CC=$CC -lt_save_CFLAGS=$CFLAGS -lt_save_GCC=$GCC -GCC=yes -CC=${GCJ-"gcj"} -CFLAGS=$GCJFLAGS -compiler=$CC -_LT_TAGVAR(compiler, $1)=$CC -_LT_TAGVAR(LD, $1)="$LD" -_LT_CC_BASENAME([$compiler]) - -# GCJ did not exist at the time GCC didn't implicitly link libc in. -_LT_TAGVAR(archive_cmds_need_lc, $1)=no - -_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds -_LT_TAGVAR(reload_flag, $1)=$reload_flag -_LT_TAGVAR(reload_cmds, $1)=$reload_cmds - -## CAVEAT EMPTOR: -## There is no encapsulation within the following macros, do not change -## the running order or otherwise move them around unless you know exactly -## what you are doing... -if test -n "$compiler"; then - _LT_COMPILER_NO_RTTI($1) - _LT_COMPILER_PIC($1) - _LT_COMPILER_C_O($1) - _LT_COMPILER_FILE_LOCKS($1) - _LT_LINKER_SHLIBS($1) - _LT_LINKER_HARDCODE_LIBPATH($1) - - _LT_CONFIG($1) -fi - -AC_LANG_RESTORE - -GCC=$lt_save_GCC -CC=$lt_save_CC -CFLAGS=$lt_save_CFLAGS -])# _LT_LANG_GCJ_CONFIG - - -# _LT_LANG_GO_CONFIG([TAG]) -# -------------------------- -# Ensure that the configuration variables for the GNU Go compiler -# are suitably defined. These variables are subsequently used by _LT_CONFIG -# to write the compiler configuration to `libtool'. -m4_defun([_LT_LANG_GO_CONFIG], -[AC_REQUIRE([LT_PROG_GO])dnl -AC_LANG_SAVE - -# Source file extension for Go test sources. -ac_ext=go - -# Object file extension for compiled Go test sources. -objext=o -_LT_TAGVAR(objext, $1)=$objext - -# Code to be used in simple compile tests -lt_simple_compile_test_code="package main; func main() { }" - -# Code to be used in simple link tests -lt_simple_link_test_code='package main; func main() { }' - -# ltmain only uses $CC for tagged configurations so make sure $CC is set. -_LT_TAG_COMPILER - -# save warnings/boilerplate of simple test code -_LT_COMPILER_BOILERPLATE -_LT_LINKER_BOILERPLATE - -# Allow CC to be a program name with arguments. -lt_save_CC=$CC -lt_save_CFLAGS=$CFLAGS -lt_save_GCC=$GCC -GCC=yes -CC=${GOC-"gccgo"} -CFLAGS=$GOFLAGS -compiler=$CC -_LT_TAGVAR(compiler, $1)=$CC -_LT_TAGVAR(LD, $1)="$LD" -_LT_CC_BASENAME([$compiler]) - -# Go did not exist at the time GCC didn't implicitly link libc in. -_LT_TAGVAR(archive_cmds_need_lc, $1)=no - -_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds -_LT_TAGVAR(reload_flag, $1)=$reload_flag -_LT_TAGVAR(reload_cmds, $1)=$reload_cmds - -## CAVEAT EMPTOR: -## There is no encapsulation within the following macros, do not change -## the running order or otherwise move them around unless you know exactly -## what you are doing... -if test -n "$compiler"; then - _LT_COMPILER_NO_RTTI($1) - _LT_COMPILER_PIC($1) - _LT_COMPILER_C_O($1) - _LT_COMPILER_FILE_LOCKS($1) - _LT_LINKER_SHLIBS($1) - _LT_LINKER_HARDCODE_LIBPATH($1) - - _LT_CONFIG($1) -fi - -AC_LANG_RESTORE - -GCC=$lt_save_GCC -CC=$lt_save_CC -CFLAGS=$lt_save_CFLAGS -])# _LT_LANG_GO_CONFIG - - -# _LT_LANG_RC_CONFIG([TAG]) -# ------------------------- -# Ensure that the configuration variables for the Windows resource compiler -# are suitably defined. These variables are subsequently used by _LT_CONFIG -# to write the compiler configuration to `libtool'. -m4_defun([_LT_LANG_RC_CONFIG], -[AC_REQUIRE([LT_PROG_RC])dnl -AC_LANG_SAVE - -# Source file extension for RC test sources. -ac_ext=rc - -# Object file extension for compiled RC test sources. -objext=o -_LT_TAGVAR(objext, $1)=$objext - -# Code to be used in simple compile tests -lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }' - -# Code to be used in simple link tests -lt_simple_link_test_code="$lt_simple_compile_test_code" - -# ltmain only uses $CC for tagged configurations so make sure $CC is set. -_LT_TAG_COMPILER - -# save warnings/boilerplate of simple test code -_LT_COMPILER_BOILERPLATE -_LT_LINKER_BOILERPLATE - -# Allow CC to be a program name with arguments. -lt_save_CC="$CC" -lt_save_CFLAGS=$CFLAGS -lt_save_GCC=$GCC -GCC= -CC=${RC-"windres"} -CFLAGS= -compiler=$CC -_LT_TAGVAR(compiler, $1)=$CC -_LT_CC_BASENAME([$compiler]) -_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes - -if test -n "$compiler"; then - : - _LT_CONFIG($1) -fi - -GCC=$lt_save_GCC -AC_LANG_RESTORE -CC=$lt_save_CC -CFLAGS=$lt_save_CFLAGS -])# _LT_LANG_RC_CONFIG - - -# LT_PROG_GCJ -# ----------- -AC_DEFUN([LT_PROG_GCJ], -[m4_ifdef([AC_PROG_GCJ], [AC_PROG_GCJ], - [m4_ifdef([A][M_PROG_GCJ], [A][M_PROG_GCJ], - [AC_CHECK_TOOL(GCJ, gcj,) - test "x${GCJFLAGS+set}" = xset || GCJFLAGS="-g -O2" - AC_SUBST(GCJFLAGS)])])[]dnl -]) - -# Old name: -AU_ALIAS([LT_AC_PROG_GCJ], [LT_PROG_GCJ]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([LT_AC_PROG_GCJ], []) - - -# LT_PROG_GO -# ---------- -AC_DEFUN([LT_PROG_GO], -[AC_CHECK_TOOL(GOC, gccgo,) -]) - - -# LT_PROG_RC -# ---------- -AC_DEFUN([LT_PROG_RC], -[AC_CHECK_TOOL(RC, windres,) -]) - -# Old name: -AU_ALIAS([LT_AC_PROG_RC], [LT_PROG_RC]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([LT_AC_PROG_RC], []) - - -# _LT_DECL_EGREP -# -------------- -# If we don't have a new enough Autoconf to choose the best grep -# available, choose the one first in the user's PATH. -m4_defun([_LT_DECL_EGREP], -[AC_REQUIRE([AC_PROG_EGREP])dnl -AC_REQUIRE([AC_PROG_FGREP])dnl -test -z "$GREP" && GREP=grep -_LT_DECL([], [GREP], [1], [A grep program that handles long lines]) -_LT_DECL([], [EGREP], [1], [An ERE matcher]) -_LT_DECL([], [FGREP], [1], [A literal string matcher]) -dnl Non-bleeding-edge autoconf doesn't subst GREP, so do it here too -AC_SUBST([GREP]) -]) - - -# _LT_DECL_OBJDUMP -# -------------- -# If we don't have a new enough Autoconf to choose the best objdump -# available, choose the one first in the user's PATH. -m4_defun([_LT_DECL_OBJDUMP], -[AC_CHECK_TOOL(OBJDUMP, objdump, false) -test -z "$OBJDUMP" && OBJDUMP=objdump -_LT_DECL([], [OBJDUMP], [1], [An object symbol dumper]) -AC_SUBST([OBJDUMP]) -]) - -# _LT_DECL_DLLTOOL -# ---------------- -# Ensure DLLTOOL variable is set. -m4_defun([_LT_DECL_DLLTOOL], -[AC_CHECK_TOOL(DLLTOOL, dlltool, false) -test -z "$DLLTOOL" && DLLTOOL=dlltool -_LT_DECL([], [DLLTOOL], [1], [DLL creation program]) -AC_SUBST([DLLTOOL]) -]) - -# _LT_DECL_SED -# ------------ -# Check for a fully-functional sed program, that truncates -# as few characters as possible. Prefer GNU sed if found. -m4_defun([_LT_DECL_SED], -[AC_PROG_SED -test -z "$SED" && SED=sed -Xsed="$SED -e 1s/^X//" -_LT_DECL([], [SED], [1], [A sed program that does not truncate output]) -_LT_DECL([], [Xsed], ["\$SED -e 1s/^X//"], - [Sed that helps us avoid accidentally triggering echo(1) options like -n]) -])# _LT_DECL_SED - -m4_ifndef([AC_PROG_SED], [ -############################################################ -# NOTE: This macro has been submitted for inclusion into # -# GNU Autoconf as AC_PROG_SED. When it is available in # -# a released version of Autoconf we should remove this # -# macro and use it instead. # -############################################################ - -m4_defun([AC_PROG_SED], -[AC_MSG_CHECKING([for a sed that does not truncate output]) -AC_CACHE_VAL(lt_cv_path_SED, -[# Loop through the user's path and test for sed and gsed. -# Then use that list of sed's as ones to test for truncation. -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for lt_ac_prog in sed gsed; do - for ac_exec_ext in '' $ac_executable_extensions; do - if $as_executable_p "$as_dir/$lt_ac_prog$ac_exec_ext"; then - lt_ac_sed_list="$lt_ac_sed_list $as_dir/$lt_ac_prog$ac_exec_ext" - fi - done - done -done -IFS=$as_save_IFS -lt_ac_max=0 -lt_ac_count=0 -# Add /usr/xpg4/bin/sed as it is typically found on Solaris -# along with /bin/sed that truncates output. -for lt_ac_sed in $lt_ac_sed_list /usr/xpg4/bin/sed; do - test ! -f $lt_ac_sed && continue - cat /dev/null > conftest.in - lt_ac_count=0 - echo $ECHO_N "0123456789$ECHO_C" >conftest.in - # Check for GNU sed and select it if it is found. - if "$lt_ac_sed" --version 2>&1 < /dev/null | grep 'GNU' > /dev/null; then - lt_cv_path_SED=$lt_ac_sed - break - fi - while true; do - cat conftest.in conftest.in >conftest.tmp - mv conftest.tmp conftest.in - cp conftest.in conftest.nl - echo >>conftest.nl - $lt_ac_sed -e 's/a$//' < conftest.nl >conftest.out || break - cmp -s conftest.out conftest.nl || break - # 10000 chars as input seems more than enough - test $lt_ac_count -gt 10 && break - lt_ac_count=`expr $lt_ac_count + 1` - if test $lt_ac_count -gt $lt_ac_max; then - lt_ac_max=$lt_ac_count - lt_cv_path_SED=$lt_ac_sed - fi - done -done -]) -SED=$lt_cv_path_SED -AC_SUBST([SED]) -AC_MSG_RESULT([$SED]) -])#AC_PROG_SED -])#m4_ifndef - -# Old name: -AU_ALIAS([LT_AC_PROG_SED], [AC_PROG_SED]) -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([LT_AC_PROG_SED], []) - - -# _LT_CHECK_SHELL_FEATURES -# ------------------------ -# Find out whether the shell is Bourne or XSI compatible, -# or has some other useful features. -m4_defun([_LT_CHECK_SHELL_FEATURES], -[AC_MSG_CHECKING([whether the shell understands some XSI constructs]) -# Try some XSI features -xsi_shell=no -( _lt_dummy="a/b/c" - test "${_lt_dummy##*/},${_lt_dummy%/*},${_lt_dummy#??}"${_lt_dummy%"$_lt_dummy"}, \ - = c,a/b,b/c, \ - && eval 'test $(( 1 + 1 )) -eq 2 \ - && test "${#_lt_dummy}" -eq 5' ) >/dev/null 2>&1 \ - && xsi_shell=yes -AC_MSG_RESULT([$xsi_shell]) -_LT_CONFIG_LIBTOOL_INIT([xsi_shell='$xsi_shell']) - -AC_MSG_CHECKING([whether the shell understands "+="]) -lt_shell_append=no -( foo=bar; set foo baz; eval "$[1]+=\$[2]" && test "$foo" = barbaz ) \ - >/dev/null 2>&1 \ - && lt_shell_append=yes -AC_MSG_RESULT([$lt_shell_append]) -_LT_CONFIG_LIBTOOL_INIT([lt_shell_append='$lt_shell_append']) - -if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then - lt_unset=unset -else - lt_unset=false -fi -_LT_DECL([], [lt_unset], [0], [whether the shell understands "unset"])dnl - -# test EBCDIC or ASCII -case `echo X|tr X '\101'` in - A) # ASCII based system - # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr - lt_SP2NL='tr \040 \012' - lt_NL2SP='tr \015\012 \040\040' - ;; - *) # EBCDIC based system - lt_SP2NL='tr \100 \n' - lt_NL2SP='tr \r\n \100\100' - ;; -esac -_LT_DECL([SP2NL], [lt_SP2NL], [1], [turn spaces into newlines])dnl -_LT_DECL([NL2SP], [lt_NL2SP], [1], [turn newlines into spaces])dnl -])# _LT_CHECK_SHELL_FEATURES - - -# _LT_PROG_FUNCTION_REPLACE (FUNCNAME, REPLACEMENT-BODY) -# ------------------------------------------------------ -# In `$cfgfile', look for function FUNCNAME delimited by `^FUNCNAME ()$' and -# '^} FUNCNAME ', and replace its body with REPLACEMENT-BODY. -m4_defun([_LT_PROG_FUNCTION_REPLACE], -[dnl { -sed -e '/^$1 ()$/,/^} # $1 /c\ -$1 ()\ -{\ -m4_bpatsubsts([$2], [$], [\\], [^\([ ]\)], [\\\1]) -} # Extended-shell $1 implementation' "$cfgfile" > $cfgfile.tmp \ - && mv -f "$cfgfile.tmp" "$cfgfile" \ - || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") -test 0 -eq $? || _lt_function_replace_fail=: -]) - - -# _LT_PROG_REPLACE_SHELLFNS -# ------------------------- -# Replace existing portable implementations of several shell functions with -# equivalent extended shell implementations where those features are available.. -m4_defun([_LT_PROG_REPLACE_SHELLFNS], -[if test x"$xsi_shell" = xyes; then - _LT_PROG_FUNCTION_REPLACE([func_dirname], [dnl - case ${1} in - */*) func_dirname_result="${1%/*}${2}" ;; - * ) func_dirname_result="${3}" ;; - esac]) - - _LT_PROG_FUNCTION_REPLACE([func_basename], [dnl - func_basename_result="${1##*/}"]) - - _LT_PROG_FUNCTION_REPLACE([func_dirname_and_basename], [dnl - case ${1} in - */*) func_dirname_result="${1%/*}${2}" ;; - * ) func_dirname_result="${3}" ;; - esac - func_basename_result="${1##*/}"]) - - _LT_PROG_FUNCTION_REPLACE([func_stripname], [dnl - # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are - # positional parameters, so assign one to ordinary parameter first. - func_stripname_result=${3} - func_stripname_result=${func_stripname_result#"${1}"} - func_stripname_result=${func_stripname_result%"${2}"}]) - - _LT_PROG_FUNCTION_REPLACE([func_split_long_opt], [dnl - func_split_long_opt_name=${1%%=*} - func_split_long_opt_arg=${1#*=}]) - - _LT_PROG_FUNCTION_REPLACE([func_split_short_opt], [dnl - func_split_short_opt_arg=${1#??} - func_split_short_opt_name=${1%"$func_split_short_opt_arg"}]) - - _LT_PROG_FUNCTION_REPLACE([func_lo2o], [dnl - case ${1} in - *.lo) func_lo2o_result=${1%.lo}.${objext} ;; - *) func_lo2o_result=${1} ;; - esac]) - - _LT_PROG_FUNCTION_REPLACE([func_xform], [ func_xform_result=${1%.*}.lo]) - - _LT_PROG_FUNCTION_REPLACE([func_arith], [ func_arith_result=$(( $[*] ))]) - - _LT_PROG_FUNCTION_REPLACE([func_len], [ func_len_result=${#1}]) -fi - -if test x"$lt_shell_append" = xyes; then - _LT_PROG_FUNCTION_REPLACE([func_append], [ eval "${1}+=\\${2}"]) - - _LT_PROG_FUNCTION_REPLACE([func_append_quoted], [dnl - func_quote_for_eval "${2}" -dnl m4 expansion turns \\\\ into \\, and then the shell eval turns that into \ - eval "${1}+=\\\\ \\$func_quote_for_eval_result"]) - - # Save a `func_append' function call where possible by direct use of '+=' - sed -e 's%func_append \([[a-zA-Z_]]\{1,\}\) "%\1+="%g' $cfgfile > $cfgfile.tmp \ - && mv -f "$cfgfile.tmp" "$cfgfile" \ - || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") - test 0 -eq $? || _lt_function_replace_fail=: -else - # Save a `func_append' function call even when '+=' is not available - sed -e 's%func_append \([[a-zA-Z_]]\{1,\}\) "%\1="$\1%g' $cfgfile > $cfgfile.tmp \ - && mv -f "$cfgfile.tmp" "$cfgfile" \ - || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") - test 0 -eq $? || _lt_function_replace_fail=: -fi - -if test x"$_lt_function_replace_fail" = x":"; then - AC_MSG_WARN([Unable to substitute extended shell functions in $ofile]) -fi -]) - -# _LT_PATH_CONVERSION_FUNCTIONS -# ----------------------------- -# Determine which file name conversion functions should be used by -# func_to_host_file (and, implicitly, by func_to_host_path). These are needed -# for certain cross-compile configurations and native mingw. -m4_defun([_LT_PATH_CONVERSION_FUNCTIONS], -[AC_REQUIRE([AC_CANONICAL_HOST])dnl -AC_REQUIRE([AC_CANONICAL_BUILD])dnl -AC_MSG_CHECKING([how to convert $build file names to $host format]) -AC_CACHE_VAL(lt_cv_to_host_file_cmd, -[case $host in - *-*-mingw* ) - case $build in - *-*-mingw* ) # actually msys - lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 - ;; - *-*-cygwin* ) - lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32 - ;; - * ) # otherwise, assume *nix - lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32 - ;; - esac - ;; - *-*-cygwin* ) - case $build in - *-*-mingw* ) # actually msys - lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin - ;; - *-*-cygwin* ) - lt_cv_to_host_file_cmd=func_convert_file_noop - ;; - * ) # otherwise, assume *nix - lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin - ;; - esac - ;; - * ) # unhandled hosts (and "normal" native builds) - lt_cv_to_host_file_cmd=func_convert_file_noop - ;; -esac -]) -to_host_file_cmd=$lt_cv_to_host_file_cmd -AC_MSG_RESULT([$lt_cv_to_host_file_cmd]) -_LT_DECL([to_host_file_cmd], [lt_cv_to_host_file_cmd], - [0], [convert $build file names to $host format])dnl - -AC_MSG_CHECKING([how to convert $build file names to toolchain format]) -AC_CACHE_VAL(lt_cv_to_tool_file_cmd, -[#assume ordinary cross tools, or native build. -lt_cv_to_tool_file_cmd=func_convert_file_noop -case $host in - *-*-mingw* ) - case $build in - *-*-mingw* ) # actually msys - lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 - ;; - esac - ;; -esac -]) -to_tool_file_cmd=$lt_cv_to_tool_file_cmd -AC_MSG_RESULT([$lt_cv_to_tool_file_cmd]) -_LT_DECL([to_tool_file_cmd], [lt_cv_to_tool_file_cmd], - [0], [convert $build files to toolchain format])dnl -])# _LT_PATH_CONVERSION_FUNCTIONS diff --git a/m4/ltoptions.m4 b/m4/ltoptions.m4 deleted file mode 100644 index 5d9acd8e..00000000 --- a/m4/ltoptions.m4 +++ /dev/null @@ -1,384 +0,0 @@ -# Helper functions for option handling. -*- Autoconf -*- -# -# Copyright (C) 2004, 2005, 2007, 2008, 2009 Free Software Foundation, -# Inc. -# Written by Gary V. Vaughan, 2004 -# -# This file is free software; the Free Software Foundation gives -# unlimited permission to copy and/or distribute it, with or without -# modifications, as long as this notice is preserved. - -# serial 7 ltoptions.m4 - -# This is to help aclocal find these macros, as it can't see m4_define. -AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])]) - - -# _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME) -# ------------------------------------------ -m4_define([_LT_MANGLE_OPTION], -[[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])]) - - -# _LT_SET_OPTION(MACRO-NAME, OPTION-NAME) -# --------------------------------------- -# Set option OPTION-NAME for macro MACRO-NAME, and if there is a -# matching handler defined, dispatch to it. Other OPTION-NAMEs are -# saved as a flag. -m4_define([_LT_SET_OPTION], -[m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl -m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]), - _LT_MANGLE_DEFUN([$1], [$2]), - [m4_warning([Unknown $1 option `$2'])])[]dnl -]) - - -# _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET]) -# ------------------------------------------------------------ -# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. -m4_define([_LT_IF_OPTION], -[m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])]) - - -# _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET) -# ------------------------------------------------------- -# Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME -# are set. -m4_define([_LT_UNLESS_OPTIONS], -[m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), - [m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option), - [m4_define([$0_found])])])[]dnl -m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3 -])[]dnl -]) - - -# _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST) -# ---------------------------------------- -# OPTION-LIST is a space-separated list of Libtool options associated -# with MACRO-NAME. If any OPTION has a matching handler declared with -# LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about -# the unknown option and exit. -m4_defun([_LT_SET_OPTIONS], -[# Set options -m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), - [_LT_SET_OPTION([$1], _LT_Option)]) - -m4_if([$1],[LT_INIT],[ - dnl - dnl Simply set some default values (i.e off) if boolean options were not - dnl specified: - _LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no - ]) - _LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no - ]) - dnl - dnl If no reference was made to various pairs of opposing options, then - dnl we run the default mode handler for the pair. For example, if neither - dnl `shared' nor `disable-shared' was passed, we enable building of shared - dnl archives by default: - _LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED]) - _LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC]) - _LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC]) - _LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install], - [_LT_ENABLE_FAST_INSTALL]) - ]) -])# _LT_SET_OPTIONS - - -## --------------------------------- ## -## Macros to handle LT_INIT options. ## -## --------------------------------- ## - -# _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME) -# ----------------------------------------- -m4_define([_LT_MANGLE_DEFUN], -[[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])]) - - -# LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE) -# ----------------------------------------------- -m4_define([LT_OPTION_DEFINE], -[m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl -])# LT_OPTION_DEFINE - - -# dlopen -# ------ -LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes -]) - -AU_DEFUN([AC_LIBTOOL_DLOPEN], -[_LT_SET_OPTION([LT_INIT], [dlopen]) -AC_DIAGNOSE([obsolete], -[$0: Remove this warning and the call to _LT_SET_OPTION when you -put the `dlopen' option into LT_INIT's first parameter.]) -]) - -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], []) - - -# win32-dll -# --------- -# Declare package support for building win32 dll's. -LT_OPTION_DEFINE([LT_INIT], [win32-dll], -[enable_win32_dll=yes - -case $host in -*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*) - AC_CHECK_TOOL(AS, as, false) - AC_CHECK_TOOL(DLLTOOL, dlltool, false) - AC_CHECK_TOOL(OBJDUMP, objdump, false) - ;; -esac - -test -z "$AS" && AS=as -_LT_DECL([], [AS], [1], [Assembler program])dnl - -test -z "$DLLTOOL" && DLLTOOL=dlltool -_LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl - -test -z "$OBJDUMP" && OBJDUMP=objdump -_LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl -])# win32-dll - -AU_DEFUN([AC_LIBTOOL_WIN32_DLL], -[AC_REQUIRE([AC_CANONICAL_HOST])dnl -_LT_SET_OPTION([LT_INIT], [win32-dll]) -AC_DIAGNOSE([obsolete], -[$0: Remove this warning and the call to _LT_SET_OPTION when you -put the `win32-dll' option into LT_INIT's first parameter.]) -]) - -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], []) - - -# _LT_ENABLE_SHARED([DEFAULT]) -# ---------------------------- -# implement the --enable-shared flag, and supports the `shared' and -# `disable-shared' LT_INIT options. -# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. -m4_define([_LT_ENABLE_SHARED], -[m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl -AC_ARG_ENABLE([shared], - [AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@], - [build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])], - [p=${PACKAGE-default} - case $enableval in - yes) enable_shared=yes ;; - no) enable_shared=no ;; - *) - enable_shared=no - # Look at the argument we got. We use all the common list separators. - lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," - for pkg in $enableval; do - IFS="$lt_save_ifs" - if test "X$pkg" = "X$p"; then - enable_shared=yes - fi - done - IFS="$lt_save_ifs" - ;; - esac], - [enable_shared=]_LT_ENABLE_SHARED_DEFAULT) - - _LT_DECL([build_libtool_libs], [enable_shared], [0], - [Whether or not to build shared libraries]) -])# _LT_ENABLE_SHARED - -LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])]) -LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])]) - -# Old names: -AC_DEFUN([AC_ENABLE_SHARED], -[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared]) -]) - -AC_DEFUN([AC_DISABLE_SHARED], -[_LT_SET_OPTION([LT_INIT], [disable-shared]) -]) - -AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)]) -AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)]) - -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AM_ENABLE_SHARED], []) -dnl AC_DEFUN([AM_DISABLE_SHARED], []) - - - -# _LT_ENABLE_STATIC([DEFAULT]) -# ---------------------------- -# implement the --enable-static flag, and support the `static' and -# `disable-static' LT_INIT options. -# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. -m4_define([_LT_ENABLE_STATIC], -[m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl -AC_ARG_ENABLE([static], - [AS_HELP_STRING([--enable-static@<:@=PKGS@:>@], - [build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])], - [p=${PACKAGE-default} - case $enableval in - yes) enable_static=yes ;; - no) enable_static=no ;; - *) - enable_static=no - # Look at the argument we got. We use all the common list separators. - lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," - for pkg in $enableval; do - IFS="$lt_save_ifs" - if test "X$pkg" = "X$p"; then - enable_static=yes - fi - done - IFS="$lt_save_ifs" - ;; - esac], - [enable_static=]_LT_ENABLE_STATIC_DEFAULT) - - _LT_DECL([build_old_libs], [enable_static], [0], - [Whether or not to build static libraries]) -])# _LT_ENABLE_STATIC - -LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])]) -LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])]) - -# Old names: -AC_DEFUN([AC_ENABLE_STATIC], -[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static]) -]) - -AC_DEFUN([AC_DISABLE_STATIC], -[_LT_SET_OPTION([LT_INIT], [disable-static]) -]) - -AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)]) -AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)]) - -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AM_ENABLE_STATIC], []) -dnl AC_DEFUN([AM_DISABLE_STATIC], []) - - - -# _LT_ENABLE_FAST_INSTALL([DEFAULT]) -# ---------------------------------- -# implement the --enable-fast-install flag, and support the `fast-install' -# and `disable-fast-install' LT_INIT options. -# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. -m4_define([_LT_ENABLE_FAST_INSTALL], -[m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl -AC_ARG_ENABLE([fast-install], - [AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@], - [optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])], - [p=${PACKAGE-default} - case $enableval in - yes) enable_fast_install=yes ;; - no) enable_fast_install=no ;; - *) - enable_fast_install=no - # Look at the argument we got. We use all the common list separators. - lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," - for pkg in $enableval; do - IFS="$lt_save_ifs" - if test "X$pkg" = "X$p"; then - enable_fast_install=yes - fi - done - IFS="$lt_save_ifs" - ;; - esac], - [enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT) - -_LT_DECL([fast_install], [enable_fast_install], [0], - [Whether or not to optimize for fast installation])dnl -])# _LT_ENABLE_FAST_INSTALL - -LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])]) -LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])]) - -# Old names: -AU_DEFUN([AC_ENABLE_FAST_INSTALL], -[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install]) -AC_DIAGNOSE([obsolete], -[$0: Remove this warning and the call to _LT_SET_OPTION when you put -the `fast-install' option into LT_INIT's first parameter.]) -]) - -AU_DEFUN([AC_DISABLE_FAST_INSTALL], -[_LT_SET_OPTION([LT_INIT], [disable-fast-install]) -AC_DIAGNOSE([obsolete], -[$0: Remove this warning and the call to _LT_SET_OPTION when you put -the `disable-fast-install' option into LT_INIT's first parameter.]) -]) - -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], []) -dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], []) - - -# _LT_WITH_PIC([MODE]) -# -------------------- -# implement the --with-pic flag, and support the `pic-only' and `no-pic' -# LT_INIT options. -# MODE is either `yes' or `no'. If omitted, it defaults to `both'. -m4_define([_LT_WITH_PIC], -[AC_ARG_WITH([pic], - [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@], - [try to use only PIC/non-PIC objects @<:@default=use both@:>@])], - [lt_p=${PACKAGE-default} - case $withval in - yes|no) pic_mode=$withval ;; - *) - pic_mode=default - # Look at the argument we got. We use all the common list separators. - lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," - for lt_pkg in $withval; do - IFS="$lt_save_ifs" - if test "X$lt_pkg" = "X$lt_p"; then - pic_mode=yes - fi - done - IFS="$lt_save_ifs" - ;; - esac], - [pic_mode=default]) - -test -z "$pic_mode" && pic_mode=m4_default([$1], [default]) - -_LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl -])# _LT_WITH_PIC - -LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])]) -LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])]) - -# Old name: -AU_DEFUN([AC_LIBTOOL_PICMODE], -[_LT_SET_OPTION([LT_INIT], [pic-only]) -AC_DIAGNOSE([obsolete], -[$0: Remove this warning and the call to _LT_SET_OPTION when you -put the `pic-only' option into LT_INIT's first parameter.]) -]) - -dnl aclocal-1.4 backwards compatibility: -dnl AC_DEFUN([AC_LIBTOOL_PICMODE], []) - -## ----------------- ## -## LTDL_INIT Options ## -## ----------------- ## - -m4_define([_LTDL_MODE], []) -LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive], - [m4_define([_LTDL_MODE], [nonrecursive])]) -LT_OPTION_DEFINE([LTDL_INIT], [recursive], - [m4_define([_LTDL_MODE], [recursive])]) -LT_OPTION_DEFINE([LTDL_INIT], [subproject], - [m4_define([_LTDL_MODE], [subproject])]) - -m4_define([_LTDL_TYPE], []) -LT_OPTION_DEFINE([LTDL_INIT], [installable], - [m4_define([_LTDL_TYPE], [installable])]) -LT_OPTION_DEFINE([LTDL_INIT], [convenience], - [m4_define([_LTDL_TYPE], [convenience])]) diff --git a/m4/ltsugar.m4 b/m4/ltsugar.m4 deleted file mode 100644 index 9000a057..00000000 --- a/m4/ltsugar.m4 +++ /dev/null @@ -1,123 +0,0 @@ -# ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*- -# -# Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc. -# Written by Gary V. Vaughan, 2004 -# -# This file is free software; the Free Software Foundation gives -# unlimited permission to copy and/or distribute it, with or without -# modifications, as long as this notice is preserved. - -# serial 6 ltsugar.m4 - -# This is to help aclocal find these macros, as it can't see m4_define. -AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])]) - - -# lt_join(SEP, ARG1, [ARG2...]) -# ----------------------------- -# Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their -# associated separator. -# Needed until we can rely on m4_join from Autoconf 2.62, since all earlier -# versions in m4sugar had bugs. -m4_define([lt_join], -[m4_if([$#], [1], [], - [$#], [2], [[$2]], - [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])]) -m4_define([_lt_join], -[m4_if([$#$2], [2], [], - [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])]) - - -# lt_car(LIST) -# lt_cdr(LIST) -# ------------ -# Manipulate m4 lists. -# These macros are necessary as long as will still need to support -# Autoconf-2.59 which quotes differently. -m4_define([lt_car], [[$1]]) -m4_define([lt_cdr], -[m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])], - [$#], 1, [], - [m4_dquote(m4_shift($@))])]) -m4_define([lt_unquote], $1) - - -# lt_append(MACRO-NAME, STRING, [SEPARATOR]) -# ------------------------------------------ -# Redefine MACRO-NAME to hold its former content plus `SEPARATOR'`STRING'. -# Note that neither SEPARATOR nor STRING are expanded; they are appended -# to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked). -# No SEPARATOR is output if MACRO-NAME was previously undefined (different -# than defined and empty). -# -# This macro is needed until we can rely on Autoconf 2.62, since earlier -# versions of m4sugar mistakenly expanded SEPARATOR but not STRING. -m4_define([lt_append], -[m4_define([$1], - m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])]) - - - -# lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...]) -# ---------------------------------------------------------- -# Produce a SEP delimited list of all paired combinations of elements of -# PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list -# has the form PREFIXmINFIXSUFFIXn. -# Needed until we can rely on m4_combine added in Autoconf 2.62. -m4_define([lt_combine], -[m4_if(m4_eval([$# > 3]), [1], - [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl -[[m4_foreach([_Lt_prefix], [$2], - [m4_foreach([_Lt_suffix], - ]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[, - [_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])]) - - -# lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ]) -# ----------------------------------------------------------------------- -# Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited -# by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ. -m4_define([lt_if_append_uniq], -[m4_ifdef([$1], - [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1], - [lt_append([$1], [$2], [$3])$4], - [$5])], - [lt_append([$1], [$2], [$3])$4])]) - - -# lt_dict_add(DICT, KEY, VALUE) -# ----------------------------- -m4_define([lt_dict_add], -[m4_define([$1($2)], [$3])]) - - -# lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE) -# -------------------------------------------- -m4_define([lt_dict_add_subkey], -[m4_define([$1($2:$3)], [$4])]) - - -# lt_dict_fetch(DICT, KEY, [SUBKEY]) -# ---------------------------------- -m4_define([lt_dict_fetch], -[m4_ifval([$3], - m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]), - m4_ifdef([$1($2)], [m4_defn([$1($2)])]))]) - - -# lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE]) -# ----------------------------------------------------------------- -m4_define([lt_if_dict_fetch], -[m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4], - [$5], - [$6])]) - - -# lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...]) -# -------------------------------------------------------------- -m4_define([lt_dict_filter], -[m4_if([$5], [], [], - [lt_join(m4_quote(m4_default([$4], [[, ]])), - lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]), - [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl -]) diff --git a/m4/ltversion.m4 b/m4/ltversion.m4 deleted file mode 100644 index 07a8602d..00000000 --- a/m4/ltversion.m4 +++ /dev/null @@ -1,23 +0,0 @@ -# ltversion.m4 -- version numbers -*- Autoconf -*- -# -# Copyright (C) 2004 Free Software Foundation, Inc. -# Written by Scott James Remnant, 2004 -# -# This file is free software; the Free Software Foundation gives -# unlimited permission to copy and/or distribute it, with or without -# modifications, as long as this notice is preserved. - -# @configure_input@ - -# serial 3337 ltversion.m4 -# This file is part of GNU Libtool - -m4_define([LT_PACKAGE_VERSION], [2.4.2]) -m4_define([LT_PACKAGE_REVISION], [1.3337]) - -AC_DEFUN([LTVERSION_VERSION], -[macro_version='2.4.2' -macro_revision='1.3337' -_LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?]) -_LT_DECL(, macro_revision, 0) -]) diff --git a/m4/lt~obsolete.m4 b/m4/lt~obsolete.m4 deleted file mode 100644 index c573da90..00000000 --- a/m4/lt~obsolete.m4 +++ /dev/null @@ -1,98 +0,0 @@ -# lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*- -# -# Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc. -# Written by Scott James Remnant, 2004. -# -# This file is free software; the Free Software Foundation gives -# unlimited permission to copy and/or distribute it, with or without -# modifications, as long as this notice is preserved. - -# serial 5 lt~obsolete.m4 - -# These exist entirely to fool aclocal when bootstrapping libtool. -# -# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN) -# which have later been changed to m4_define as they aren't part of the -# exported API, or moved to Autoconf or Automake where they belong. -# -# The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN -# in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us -# using a macro with the same name in our local m4/libtool.m4 it'll -# pull the old libtool.m4 in (it doesn't see our shiny new m4_define -# and doesn't know about Autoconf macros at all.) -# -# So we provide this file, which has a silly filename so it's always -# included after everything else. This provides aclocal with the -# AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything -# because those macros already exist, or will be overwritten later. -# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6. -# -# Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here. -# Yes, that means every name once taken will need to remain here until -# we give up compatibility with versions before 1.7, at which point -# we need to keep only those names which we still refer to. - -# This is to help aclocal find these macros, as it can't see m4_define. -AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])]) - -m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])]) -m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])]) -m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])]) -m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])]) -m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])]) -m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])]) -m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])]) -m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])]) -m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])]) -m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])]) -m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])]) -m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])]) -m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])]) -m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])]) -m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])]) -m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])]) -m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])]) -m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])]) -m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])]) -m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])]) -m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])]) -m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])]) -m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])]) -m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])]) -m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])]) -m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])]) -m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])]) -m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])]) -m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])]) -m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])]) -m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])]) -m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])]) -m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])]) -m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])]) -m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])]) -m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])]) -m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])]) -m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])]) -m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])]) -m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])]) -m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])]) -m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])]) -m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])]) -m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])]) -m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])]) -m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])]) -m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])]) -m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])]) -m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])]) -m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])]) -m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])]) -m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])]) -m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])]) -m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])]) -m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS], [AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])]) -m4_ifndef([_LT_AC_PROG_CXXCPP], [AC_DEFUN([_LT_AC_PROG_CXXCPP])]) -m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS], [AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])]) -m4_ifndef([_LT_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])]) -m4_ifndef([_LT_PROG_F77], [AC_DEFUN([_LT_PROG_F77])]) -m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])]) -m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])]) diff --git a/m4/m4_ax_boost_base.m4 b/m4/m4_ax_boost_base.m4 deleted file mode 100644 index 2c789eae..00000000 --- a/m4/m4_ax_boost_base.m4 +++ /dev/null @@ -1,301 +0,0 @@ -# =========================================================================== -# https://www.gnu.org/software/autoconf-archive/ax_boost_base.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_BOOST_BASE([MINIMUM-VERSION], [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) -# -# DESCRIPTION -# -# Test for the Boost C++ libraries of a particular version (or newer) -# -# If no path to the installed boost library is given the macro searchs -# under /usr, /usr/local, /opt and /opt/local and evaluates the -# $BOOST_ROOT environment variable. Further documentation is available at -# . -# -# This macro calls: -# -# AC_SUBST(BOOST_CPPFLAGS) / AC_SUBST(BOOST_LDFLAGS) -# -# And sets: -# -# HAVE_BOOST -# -# LICENSE -# -# Copyright (c) 2008 Thomas Porschberg -# Copyright (c) 2009 Peter Adolphs -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 44 - -# example boost program (need to pass version) -m4_define([_AX_BOOST_BASE_PROGRAM], - [AC_LANG_PROGRAM([[ -#include -]],[[ -(void) ((void)sizeof(char[1 - 2*!!((BOOST_VERSION) < ($1))])); -]])]) - -AC_DEFUN([AX_BOOST_BASE], -[ -AC_ARG_WITH([boost], - [AS_HELP_STRING([--with-boost@<:@=ARG@:>@], - [use Boost library from a standard location (ARG=yes), - from the specified location (ARG=), - or disable it (ARG=no) - @<:@ARG=yes@:>@ ])], - [ - AS_CASE([$withval], - [no],[want_boost="no";_AX_BOOST_BASE_boost_path=""], - [yes],[want_boost="yes";_AX_BOOST_BASE_boost_path=""], - [want_boost="yes";_AX_BOOST_BASE_boost_path="$withval"]) - ], - [want_boost="yes"]) - - -AC_ARG_WITH([boost-libdir], - [AS_HELP_STRING([--with-boost-libdir=LIB_DIR], - [Force given directory for boost libraries. - Note that this will override library path detection, - so use this parameter only if default library detection fails - and you know exactly where your boost libraries are located.])], - [ - AS_IF([test -d "$withval"], - [_AX_BOOST_BASE_boost_lib_path="$withval"], - [AC_MSG_ERROR([--with-boost-libdir expected directory name])]) - ], - [_AX_BOOST_BASE_boost_lib_path=""]) - -BOOST_LDFLAGS="" -BOOST_CPPFLAGS="" -AS_IF([test "x$want_boost" = "xyes"], - [_AX_BOOST_BASE_RUNDETECT([$1],[$2],[$3])]) -AC_SUBST(BOOST_CPPFLAGS) -AC_SUBST(BOOST_LDFLAGS) -]) - - -# convert a version string in $2 to numeric and affect to polymorphic var $1 -AC_DEFUN([_AX_BOOST_BASE_TONUMERICVERSION],[ - AS_IF([test "x$2" = "x"],[_AX_BOOST_BASE_TONUMERICVERSION_req="1.20.0"],[_AX_BOOST_BASE_TONUMERICVERSION_req="$2"]) - _AX_BOOST_BASE_TONUMERICVERSION_req_shorten=`expr $_AX_BOOST_BASE_TONUMERICVERSION_req : '\([[0-9]]*\.[[0-9]]*\)'` - _AX_BOOST_BASE_TONUMERICVERSION_req_major=`expr $_AX_BOOST_BASE_TONUMERICVERSION_req : '\([[0-9]]*\)'` - AS_IF([test "x$_AX_BOOST_BASE_TONUMERICVERSION_req_major" = "x"], - [AC_MSG_ERROR([You should at least specify libboost major version])]) - _AX_BOOST_BASE_TONUMERICVERSION_req_minor=`expr $_AX_BOOST_BASE_TONUMERICVERSION_req : '[[0-9]]*\.\([[0-9]]*\)'` - AS_IF([test "x$_AX_BOOST_BASE_TONUMERICVERSION_req_minor" = "x"], - [_AX_BOOST_BASE_TONUMERICVERSION_req_minor="0"]) - _AX_BOOST_BASE_TONUMERICVERSION_req_sub_minor=`expr $_AX_BOOST_BASE_TONUMERICVERSION_req : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'` - AS_IF([test "X$_AX_BOOST_BASE_TONUMERICVERSION_req_sub_minor" = "X"], - [_AX_BOOST_BASE_TONUMERICVERSION_req_sub_minor="0"]) - _AX_BOOST_BASE_TONUMERICVERSION_RET=`expr $_AX_BOOST_BASE_TONUMERICVERSION_req_major \* 100000 \+ $_AX_BOOST_BASE_TONUMERICVERSION_req_minor \* 100 \+ $_AX_BOOST_BASE_TONUMERICVERSION_req_sub_minor` - AS_VAR_SET($1,$_AX_BOOST_BASE_TONUMERICVERSION_RET) -]) - -dnl Run the detection of boost should be run only if $want_boost -AC_DEFUN([_AX_BOOST_BASE_RUNDETECT],[ - _AX_BOOST_BASE_TONUMERICVERSION(WANT_BOOST_VERSION,[$1]) - succeeded=no - - - AC_REQUIRE([AC_CANONICAL_HOST]) - dnl On 64-bit systems check for system libraries in both lib64 and lib. - dnl The former is specified by FHS, but e.g. Debian does not adhere to - dnl this (as it rises problems for generic multi-arch support). - dnl The last entry in the list is chosen by default when no libraries - dnl are found, e.g. when only header-only libraries are installed! - AS_CASE([${host_cpu}], - [x86_64],[libsubdirs="lib64 libx32 lib lib64"], - [ppc64|s390x|sparc64|aarch64|ppc64le|riscv64],[libsubdirs="lib64 lib lib64"], - [libsubdirs="lib"] - ) - - dnl allow for real multi-arch paths e.g. /usr/lib/x86_64-linux-gnu. Give - dnl them priority over the other paths since, if libs are found there, they - dnl are almost assuredly the ones desired. - AS_CASE([${host_cpu}], - [i?86],[multiarch_libsubdir="lib/i386-${host_os}"], - [multiarch_libsubdir="lib/${host_cpu}-${host_os}"] - ) - - dnl first we check the system location for boost libraries - dnl this location ist chosen if boost libraries are installed with the --layout=system option - dnl or if you install boost with RPM - AS_IF([test "x$_AX_BOOST_BASE_boost_path" != "x"],[ - AC_MSG_CHECKING([for boostlib >= $1 ($WANT_BOOST_VERSION) includes in "$_AX_BOOST_BASE_boost_path/include"]) - AS_IF([test -d "$_AX_BOOST_BASE_boost_path/include" && test -r "$_AX_BOOST_BASE_boost_path/include"],[ - AC_MSG_RESULT([yes]) - BOOST_CPPFLAGS="-I$_AX_BOOST_BASE_boost_path/include" - for _AX_BOOST_BASE_boost_path_tmp in $multiarch_libsubdir $libsubdirs; do - AC_MSG_CHECKING([for boostlib >= $1 ($WANT_BOOST_VERSION) lib path in "$_AX_BOOST_BASE_boost_path/$_AX_BOOST_BASE_boost_path_tmp"]) - AS_IF([test -d "$_AX_BOOST_BASE_boost_path/$_AX_BOOST_BASE_boost_path_tmp" && test -r "$_AX_BOOST_BASE_boost_path/$_AX_BOOST_BASE_boost_path_tmp" ],[ - AC_MSG_RESULT([yes]) - BOOST_LDFLAGS="-L$_AX_BOOST_BASE_boost_path/$_AX_BOOST_BASE_boost_path_tmp"; - break; - ], - [AC_MSG_RESULT([no])]) - done],[ - AC_MSG_RESULT([no])]) - ],[ - if test X"$cross_compiling" = Xyes; then - search_libsubdirs=$multiarch_libsubdir - else - search_libsubdirs="$multiarch_libsubdir $libsubdirs" - fi - for _AX_BOOST_BASE_boost_path_tmp in /usr /usr/local /opt /opt/local ; do - if test -d "$_AX_BOOST_BASE_boost_path_tmp/include/boost" && test -r "$_AX_BOOST_BASE_boost_path_tmp/include/boost" ; then - for libsubdir in $search_libsubdirs ; do - if ls "$_AX_BOOST_BASE_boost_path_tmp/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi - done - BOOST_LDFLAGS="-L$_AX_BOOST_BASE_boost_path_tmp/$libsubdir" - BOOST_CPPFLAGS="-I$_AX_BOOST_BASE_boost_path_tmp/include" - break; - fi - done - ]) - - dnl overwrite ld flags if we have required special directory with - dnl --with-boost-libdir parameter - AS_IF([test "x$_AX_BOOST_BASE_boost_lib_path" != "x"], - [BOOST_LDFLAGS="-L$_AX_BOOST_BASE_boost_lib_path"]) - - AC_MSG_CHECKING([for boostlib >= $1 ($WANT_BOOST_VERSION)]) - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - AC_REQUIRE([AC_PROG_CXX]) - AC_LANG_PUSH(C++) - AC_COMPILE_IFELSE([_AX_BOOST_BASE_PROGRAM($WANT_BOOST_VERSION)],[ - AC_MSG_RESULT(yes) - succeeded=yes - found_system=yes - ],[ - ]) - AC_LANG_POP([C++]) - - - - dnl if we found no boost with system layout we search for boost libraries - dnl built and installed without the --layout=system option or for a staged(not installed) version - if test "x$succeeded" != "xyes" ; then - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - BOOST_CPPFLAGS= - if test -z "$_AX_BOOST_BASE_boost_lib_path" ; then - BOOST_LDFLAGS= - fi - _version=0 - if test -n "$_AX_BOOST_BASE_boost_path" ; then - if test -d "$_AX_BOOST_BASE_boost_path" && test -r "$_AX_BOOST_BASE_boost_path"; then - for i in `ls -d $_AX_BOOST_BASE_boost_path/include/boost-* 2>/dev/null`; do - _version_tmp=`echo $i | sed "s#$_AX_BOOST_BASE_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'` - V_CHECK=`expr $_version_tmp \> $_version` - if test "x$V_CHECK" = "x1" ; then - _version=$_version_tmp - fi - VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` - BOOST_CPPFLAGS="-I$_AX_BOOST_BASE_boost_path/include/boost-$VERSION_UNDERSCORE" - done - dnl if nothing found search for layout used in Windows distributions - if test -z "$BOOST_CPPFLAGS"; then - if test -d "$_AX_BOOST_BASE_boost_path/boost" && test -r "$_AX_BOOST_BASE_boost_path/boost"; then - BOOST_CPPFLAGS="-I$_AX_BOOST_BASE_boost_path" - fi - fi - dnl if we found something and BOOST_LDFLAGS was unset before - dnl (because "$_AX_BOOST_BASE_boost_lib_path" = ""), set it here. - if test -n "$BOOST_CPPFLAGS" && test -z "$BOOST_LDFLAGS"; then - for libsubdir in $libsubdirs ; do - if ls "$_AX_BOOST_BASE_boost_path/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi - done - BOOST_LDFLAGS="-L$_AX_BOOST_BASE_boost_path/$libsubdir" - fi - fi - else - if test "x$cross_compiling" != "xyes" ; then - for _AX_BOOST_BASE_boost_path in /usr /usr/local /opt /opt/local ; do - if test -d "$_AX_BOOST_BASE_boost_path" && test -r "$_AX_BOOST_BASE_boost_path" ; then - for i in `ls -d $_AX_BOOST_BASE_boost_path/include/boost-* 2>/dev/null`; do - _version_tmp=`echo $i | sed "s#$_AX_BOOST_BASE_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'` - V_CHECK=`expr $_version_tmp \> $_version` - if test "x$V_CHECK" = "x1" ; then - _version=$_version_tmp - best_path=$_AX_BOOST_BASE_boost_path - fi - done - fi - done - - VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` - BOOST_CPPFLAGS="-I$best_path/include/boost-$VERSION_UNDERSCORE" - if test -z "$_AX_BOOST_BASE_boost_lib_path" ; then - for libsubdir in $libsubdirs ; do - if ls "$best_path/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi - done - BOOST_LDFLAGS="-L$best_path/$libsubdir" - fi - fi - - if test -n "$BOOST_ROOT" ; then - for libsubdir in $libsubdirs ; do - if ls "$BOOST_ROOT/stage/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi - done - if test -d "$BOOST_ROOT" && test -r "$BOOST_ROOT" && test -d "$BOOST_ROOT/stage/$libsubdir" && test -r "$BOOST_ROOT/stage/$libsubdir"; then - version_dir=`expr //$BOOST_ROOT : '.*/\(.*\)'` - stage_version=`echo $version_dir | sed 's/boost_//' | sed 's/_/./g'` - stage_version_shorten=`expr $stage_version : '\([[0-9]]*\.[[0-9]]*\)'` - V_CHECK=`expr $stage_version_shorten \>\= $_version` - if test "x$V_CHECK" = "x1" && test -z "$_AX_BOOST_BASE_boost_lib_path" ; then - AC_MSG_NOTICE(We will use a staged boost library from $BOOST_ROOT) - BOOST_CPPFLAGS="-I$BOOST_ROOT" - BOOST_LDFLAGS="-L$BOOST_ROOT/stage/$libsubdir" - fi - fi - fi - fi - - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - AC_LANG_PUSH(C++) - AC_COMPILE_IFELSE([_AX_BOOST_BASE_PROGRAM($WANT_BOOST_VERSION)],[ - AC_MSG_RESULT(yes) - succeeded=yes - found_system=yes - ],[ - ]) - AC_LANG_POP([C++]) - fi - - if test "x$succeeded" != "xyes" ; then - if test "x$_version" = "x0" ; then - AC_MSG_NOTICE([[We could not detect the boost libraries (version $1 or higher). If you have a staged boost library (still not installed) please specify \$BOOST_ROOT in your environment and do not give a PATH to --with-boost option. If you are sure you have boost installed, then check your version number looking in . See http://randspringer.de/boost for more documentation.]]) - else - AC_MSG_NOTICE([Your boost libraries seems to old (version $_version).]) - fi - # execute ACTION-IF-NOT-FOUND (if present): - ifelse([$3], , :, [$3]) - else - AC_DEFINE(HAVE_BOOST,,[define if the Boost library is available]) - # execute ACTION-IF-FOUND (if present): - ifelse([$2], , :, [$2]) - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - -]) diff --git a/m4/m4_ax_boost_filesystem.m4 b/m4/m4_ax_boost_filesystem.m4 deleted file mode 100644 index c392f9d6..00000000 --- a/m4/m4_ax_boost_filesystem.m4 +++ /dev/null @@ -1,118 +0,0 @@ -# =========================================================================== -# https://www.gnu.org/software/autoconf-archive/ax_boost_filesystem.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_BOOST_FILESYSTEM -# -# DESCRIPTION -# -# Test for Filesystem library from the Boost C++ libraries. The macro -# requires a preceding call to AX_BOOST_BASE. Further documentation is -# available at . -# -# This macro calls: -# -# AC_SUBST(BOOST_FILESYSTEM_LIB) -# -# And sets: -# -# HAVE_BOOST_FILESYSTEM -# -# LICENSE -# -# Copyright (c) 2009 Thomas Porschberg -# Copyright (c) 2009 Michael Tindal -# Copyright (c) 2009 Roman Rybalko -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 27 - -AC_DEFUN([AX_BOOST_FILESYSTEM], -[ - AC_ARG_WITH([boost-filesystem], - AS_HELP_STRING([--with-boost-filesystem@<:@=special-lib@:>@], - [use the Filesystem library from boost - it is possible to specify a certain library for the linker - e.g. --with-boost-filesystem=boost_filesystem-gcc-mt ]), - [ - if test "$withval" = "no"; then - want_boost="no" - elif test "$withval" = "yes"; then - want_boost="yes" - ax_boost_user_filesystem_lib="" - else - want_boost="yes" - ax_boost_user_filesystem_lib="$withval" - fi - ], - [want_boost="yes"] - ) - - if test "x$want_boost" = "xyes"; then - AC_REQUIRE([AC_PROG_CC]) - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - LIBS_SAVED=$LIBS - LIBS="$LIBS $BOOST_SYSTEM_LIB" - export LIBS - - AC_CACHE_CHECK(whether the Boost::Filesystem library is available, - ax_cv_boost_filesystem, - [AC_LANG_PUSH([C++]) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include ]], - [[using namespace boost::filesystem; - path my_path( "foo/bar/data.txt" ); - return 0;]])], - ax_cv_boost_filesystem=yes, ax_cv_boost_filesystem=no) - AC_LANG_POP([C++]) - ]) - if test "x$ax_cv_boost_filesystem" = "xyes"; then - AC_DEFINE(HAVE_BOOST_FILESYSTEM,,[define if the Boost::Filesystem library is available]) - BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` - if test "x$ax_boost_user_filesystem_lib" = "x"; then - for libextension in `ls -r $BOOSTLIBDIR/libboost_filesystem* 2>/dev/null | sed 's,.*/lib,,' | sed 's,\..*,,'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_FILESYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_FILESYSTEM_LIB) link_filesystem="yes"; break], - [link_filesystem="no"]) - done - if test "x$link_filesystem" != "xyes"; then - for libextension in `ls -r $BOOSTLIBDIR/boost_filesystem* 2>/dev/null | sed 's,.*/,,' | sed -e 's,\..*,,'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_FILESYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_FILESYSTEM_LIB) link_filesystem="yes"; break], - [link_filesystem="no"]) - done - fi - else - for ax_lib in $ax_boost_user_filesystem_lib boost_filesystem-$ax_boost_user_filesystem_lib; do - AC_CHECK_LIB($ax_lib, exit, - [BOOST_FILESYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_FILESYSTEM_LIB) link_filesystem="yes"; break], - [link_filesystem="no"]) - done - - fi - if test "x$ax_lib" = "x"; then - AC_MSG_ERROR(Could not find a version of the library!) - fi - if test "x$link_filesystem" != "xyes"; then - AC_MSG_ERROR(Could not link against $ax_lib !) - fi - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - LIBS="$LIBS_SAVED" - fi -]) diff --git a/m4/m4_ax_boost_system.m4 b/m4/m4_ax_boost_system.m4 deleted file mode 100644 index 207d7be8..00000000 --- a/m4/m4_ax_boost_system.m4 +++ /dev/null @@ -1,121 +0,0 @@ -# =========================================================================== -# https://www.gnu.org/software/autoconf-archive/ax_boost_system.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_BOOST_SYSTEM -# -# DESCRIPTION -# -# Test for System library from the Boost C++ libraries. The macro requires -# a preceding call to AX_BOOST_BASE. Further documentation is available at -# . -# -# This macro calls: -# -# AC_SUBST(BOOST_SYSTEM_LIB) -# -# And sets: -# -# HAVE_BOOST_SYSTEM -# -# LICENSE -# -# Copyright (c) 2008 Thomas Porschberg -# Copyright (c) 2008 Michael Tindal -# Copyright (c) 2008 Daniel Casimiro -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 19 - -AC_DEFUN([AX_BOOST_SYSTEM], -[ - AC_ARG_WITH([boost-system], - AS_HELP_STRING([--with-boost-system@<:@=special-lib@:>@], - [use the System library from boost - it is possible to specify a certain library for the linker - e.g. --with-boost-system=boost_system-gcc-mt ]), - [ - if test "$withval" = "no"; then - want_boost="no" - elif test "$withval" = "yes"; then - want_boost="yes" - ax_boost_user_system_lib="" - else - want_boost="yes" - ax_boost_user_system_lib="$withval" - fi - ], - [want_boost="yes"] - ) - - if test "x$want_boost" = "xyes"; then - AC_REQUIRE([AC_PROG_CC]) - AC_REQUIRE([AC_CANONICAL_BUILD]) - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - AC_CACHE_CHECK(whether the Boost::System library is available, - ax_cv_boost_system, - [AC_LANG_PUSH([C++]) - CXXFLAGS_SAVE=$CXXFLAGS - CXXFLAGS= - - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include ]], - [[boost::system::error_category *a = 0;]])], - ax_cv_boost_system=yes, ax_cv_boost_system=no) - CXXFLAGS=$CXXFLAGS_SAVE - AC_LANG_POP([C++]) - ]) - if test "x$ax_cv_boost_system" = "xyes"; then - AC_SUBST(BOOST_CPPFLAGS) - - AC_DEFINE(HAVE_BOOST_SYSTEM,,[define if the Boost::System library is available]) - BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` - - LDFLAGS_SAVE=$LDFLAGS - if test "x$ax_boost_user_system_lib" = "x"; then - for libextension in `ls -r $BOOSTLIBDIR/libboost_system* 2>/dev/null | sed 's,.*/lib,,' | sed 's,\..*,,'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_SYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_SYSTEM_LIB) link_system="yes"; break], - [link_system="no"]) - done - if test "x$link_system" != "xyes"; then - for libextension in `ls -r $BOOSTLIBDIR/boost_system* 2>/dev/null | sed 's,.*/,,' | sed -e 's,\..*,,'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_SYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_SYSTEM_LIB) link_system="yes"; break], - [link_system="no"]) - done - fi - - else - for ax_lib in $ax_boost_user_system_lib boost_system-$ax_boost_user_system_lib; do - AC_CHECK_LIB($ax_lib, exit, - [BOOST_SYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_SYSTEM_LIB) link_system="yes"; break], - [link_system="no"]) - done - - fi - if test "x$ax_lib" = "x"; then - AC_MSG_ERROR(Could not find a version of the library!) - fi - if test "x$link_system" = "xno"; then - AC_MSG_ERROR(Could not link against $ax_lib !) - fi - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - fi -]) diff --git a/m4/pkg.m4 b/m4/pkg.m4 deleted file mode 100644 index 62995f01..00000000 --- a/m4/pkg.m4 +++ /dev/null @@ -1,233 +0,0 @@ -# pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*- -# serial 1 (pkg-config-0.24) -# -# Copyright © 2004 Scott James Remnant . -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# PKG_PROG_PKG_CONFIG([MIN-VERSION]) -# ---------------------------------- -AC_DEFUN([PKG_PROG_PKG_CONFIG], -[m4_pattern_forbid([^_?PKG_[A-Z_]+$]) -m4_pattern_allow([^PKG_CONFIG(_(PATH|LIBDIR|SYSROOT_DIR|ALLOW_SYSTEM_(CFLAGS|LIBS)))?$]) -m4_pattern_allow([^PKG_CONFIG_(DISABLE_UNINSTALLED|TOP_BUILD_DIR|DEBUG_SPEW)$]) -AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility]) -AC_ARG_VAR([PKG_CONFIG_PATH], [directories to add to pkg-config's search path]) -AC_ARG_VAR([PKG_CONFIG_LIBDIR], [path overriding pkg-config's built-in search path]) - -if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then - AC_PATH_TOOL([PKG_CONFIG], [pkg-config]) -fi -if test -n "$PKG_CONFIG"; then - _pkg_min_version=m4_default([$1], [0.9.0]) - AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version]) - if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - PKG_CONFIG="" - fi -fi[]dnl -])# PKG_PROG_PKG_CONFIG - -# PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) -# -# Check to see whether a particular set of modules exists. Similar -# to PKG_CHECK_MODULES(), but does not set variables or print errors. -# -# Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG]) -# only at the first occurence in configure.ac, so if the first place -# it's called might be skipped (such as if it is within an "if", you -# have to call PKG_CHECK_EXISTS manually -# -------------------------------------------------------------- -AC_DEFUN([PKG_CHECK_EXISTS], -[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl -if test -n "$PKG_CONFIG" && \ - AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then - m4_default([$2], [:]) -m4_ifvaln([$3], [else - $3])dnl -fi]) - -# _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES]) -# --------------------------------------------- -m4_define([_PKG_CONFIG], -[if test -n "$$1"; then - pkg_cv_[]$1="$$1" - elif test -n "$PKG_CONFIG"; then - PKG_CHECK_EXISTS([$3], - [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null` - test "x$?" != "x0" && pkg_failed=yes ], - [pkg_failed=yes]) - else - pkg_failed=untried -fi[]dnl -])# _PKG_CONFIG - -# _PKG_SHORT_ERRORS_SUPPORTED -# ----------------------------- -AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED], -[AC_REQUIRE([PKG_PROG_PKG_CONFIG]) -if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then - _pkg_short_errors_supported=yes -else - _pkg_short_errors_supported=no -fi[]dnl -])# _PKG_SHORT_ERRORS_SUPPORTED - - -# PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], -# [ACTION-IF-NOT-FOUND]) -# -# -# Note that if there is a possibility the first call to -# PKG_CHECK_MODULES might not happen, you should be sure to include an -# explicit call to PKG_PROG_PKG_CONFIG in your configure.ac -# -# -# -------------------------------------------------------------- -AC_DEFUN([PKG_CHECK_MODULES], -[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl -AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl -AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl - -pkg_failed=no -AC_MSG_CHECKING([for $1]) - -_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2]) -_PKG_CONFIG([$1][_LIBS], [libs], [$2]) - -m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS -and $1[]_LIBS to avoid the need to call pkg-config. -See the pkg-config man page for more details.]) - -if test $pkg_failed = yes; then - AC_MSG_RESULT([no]) - _PKG_SHORT_ERRORS_SUPPORTED - if test $_pkg_short_errors_supported = yes; then - $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1` - else - $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1` - fi - # Put the nasty error message in config.log where it belongs - echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD - - m4_default([$4], [AC_MSG_ERROR( -[Package requirements ($2) were not met: - -$$1_PKG_ERRORS - -Consider adjusting the PKG_CONFIG_PATH environment variable if you -installed software in a non-standard prefix. - -_PKG_TEXT])[]dnl - ]) -elif test $pkg_failed = untried; then - AC_MSG_RESULT([no]) - m4_default([$4], [AC_MSG_FAILURE( -[The pkg-config script could not be found or is too old. Make sure it -is in your PATH or set the PKG_CONFIG environment variable to the full -path to pkg-config. - -_PKG_TEXT - -To get pkg-config, see .])[]dnl - ]) -else - $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS - $1[]_LIBS=$pkg_cv_[]$1[]_LIBS - AC_MSG_RESULT([yes]) - $3 -fi[]dnl -])# PKG_CHECK_MODULES - - -# PKG_CHECK_MODULES_STATIC(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], -# [ACTION-IF-NOT-FOUND]) -# --------------------------------------------------------------------- -# Checks for existence of MODULES and gathers its build flags with -# static libraries enabled. Sets VARIABLE-PREFIX_CFLAGS from --cflags -# and VARIABLE-PREFIX_LIBS from --libs. -# -# Note that if there is a possibility the first call to -# PKG_CHECK_MODULES_STATIC might not happen, you should be sure to include -# an explicit call to PKG_PROG_PKG_CONFIG in your configure.ac. -AC_DEFUN([PKG_CHECK_MODULES_STATIC], -[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl -_save_PKG_CONFIG=$PKG_CONFIG -PKG_CONFIG="$PKG_CONFIG --static" -PKG_CHECK_MODULES($@) -PKG_CONFIG=$_save_PKG_CONFIG[]dnl -]) - - -# PKG_INSTALLDIR(DIRECTORY) -# ------------------------- -# Substitutes the variable pkgconfigdir as the location where a module -# should install pkg-config .pc files. By default the directory is -# $libdir/pkgconfig, but the default can be changed by passing -# DIRECTORY. The user can override through the --with-pkgconfigdir -# parameter. -AC_DEFUN([PKG_INSTALLDIR], -[m4_pushdef([pkg_default], [m4_default([$1], ['${libdir}/pkgconfig'])]) -m4_pushdef([pkg_description], - [pkg-config installation directory @<:@]pkg_default[@:>@]) -AC_ARG_WITH([pkgconfigdir], - [AS_HELP_STRING([--with-pkgconfigdir], pkg_description)],, - [with_pkgconfigdir=]pkg_default) -AC_SUBST([pkgconfigdir], [$with_pkgconfigdir]) -m4_popdef([pkg_default]) -m4_popdef([pkg_description]) -]) dnl PKG_INSTALLDIR - - -# PKG_NOARCH_INSTALLDIR(DIRECTORY) -# ------------------------- -# Substitutes the variable noarch_pkgconfigdir as the location where a -# module should install arch-independent pkg-config .pc files. By -# default the directory is $datadir/pkgconfig, but the default can be -# changed by passing DIRECTORY. The user can override through the -# --with-noarch-pkgconfigdir parameter. -AC_DEFUN([PKG_NOARCH_INSTALLDIR], -[m4_pushdef([pkg_default], [m4_default([$1], ['${datadir}/pkgconfig'])]) -m4_pushdef([pkg_description], - [pkg-config arch-independent installation directory @<:@]pkg_default[@:>@]) -AC_ARG_WITH([noarch-pkgconfigdir], - [AS_HELP_STRING([--with-noarch-pkgconfigdir], pkg_description)],, - [with_noarch_pkgconfigdir=]pkg_default) -AC_SUBST([noarch_pkgconfigdir], [$with_noarch_pkgconfigdir]) -m4_popdef([pkg_default]) -m4_popdef([pkg_description]) -]) dnl PKG_NOARCH_INSTALLDIR - - -# PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE, -# [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) -# ------------------------------------------- -# Retrieves the value of the pkg-config variable for the given module. -AC_DEFUN([PKG_CHECK_VAR], -[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl -AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl - -_PKG_CONFIG([$1], [variable="][$3]["], [$2]) -AS_VAR_COPY([$1], [pkg_cv_][$1]) - -AS_VAR_IF([$1], [""], [$5], [$4])dnl -])# PKG_CHECK_VAR diff --git a/maint/codes-net.pc.in b/maint/codes-net.pc.in deleted file mode 100644 index 9215d369..00000000 --- a/maint/codes-net.pc.in +++ /dev/null @@ -1,12 +0,0 @@ -prefix = @prefix@ -exec_prefix = @exec_prefix@ -libdir=@libdir@ -includedir=@includedir@ - -Name: codes-net -Description: Network functionality for CODES storage simulation -Version: @PACKAGE_VERSION@ -URL: https://github.com/codes-org/codes -Requires: codes-base -Libs: -L${libdir} -lcodes-net -Cflags: -I${includedir} diff --git a/maint/codes.pc.in b/maint/codes.pc.in deleted file mode 100644 index 97cf7d1d..00000000 --- a/maint/codes.pc.in +++ /dev/null @@ -1,32 +0,0 @@ -prefix = @prefix@ -exec_prefix = @exec_prefix@ -libdir=@libdir@ -includedir=@includedir@ -ross_cflags=@ROSS_CFLAGS@ -ross_libs=@ROSS_LIBS@ -darshan_libs=@DARSHAN_LIBS@ -darshan_cflags=@DARSHAN_CFLAGS@ -dumpi_cflags=@DUMPI_CFLAGS@ -dumpi_libs=@DUMPI_LIBS@ -cortex_cflags=@CORTEX_CFLAGS@ -cortex_libs=@CORTEX_LIBS@ -python_cflags=@PYTHON_CFLAGS@ -python_libs=@PYTHON_LIBS@ -boost_cflags=@BOOST_CFLAGS@ -boost_libs=@BOOST_LIBS@ -argobots_libs=@ARGOBOTS_LIBS@ -argobots_cflags=@ARGOBOTS_CFLAGS@ -swm_libs=@SWM_LIBS@ -swm_cflags=@SWM_CFLAGS@ -swm_datarootdir=@SWM_DATAROOTDIR@ -union_libs=@UNION_LIBS@ -union_cflags=@UNION_CFLAGS@ -union_datadir=@UNION_DATADIR@ - -Name: codes-base -Description: Base functionality for CODES storage simulation -Version: @PACKAGE_VERSION@ -URL: https://github.com/codes-org/codes -Requires: -Libs: -L${libdir} -lcodes ${ross_libs} ${argobots_libs} ${swm_libs} ${union_libs} ${darshan_libs} ${dumpi_libs} ${cortex_libs} -Cflags: -I${includedir} -I${swm_datarootdir} ${union_datadir} ${ross_cflags} ${darshan_cflags} ${swm_cflags} ${union_cflags} ${argobots_cflags} ${dumpi_cflags} ${cortex_cflags} diff --git a/prepare.sh b/prepare.sh deleted file mode 100755 index 2739136e..00000000 --- a/prepare.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh - -echo "Regenerating build files..." -autoreconf -fi -Im4 From 3d2b726b0574e1193ab6013569d6ecdefc417f24 Mon Sep 17 00:00:00 2001 From: helq Date: Tue, 22 Jul 2025 12:05:43 -0400 Subject: [PATCH 106/110] Adding some of Neil's and Elkin's contributions from the past 5 years --- CONTRIBUTORS.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 39e0bebd..7d8dd5b4 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -20,6 +20,8 @@ Contributors to date (with affiliations at time of contribution) - Lee Savoie, Univ. of Arizona - Ning Liu, Rensselaer Polytechnic Institute - Jason Cope, Argonne National Laboratory +- Kevin A. Brown, Argonne National Laboratory +- Elkin Cruz, Rensselaer Polytechnic Institute Contributions: @@ -40,6 +42,8 @@ Neil McGlohon (RPI) - Merged 1-D dragonfly and 2-D dragonfly network models. - Updated adaptive routing in megafly and 1-D dragonfly network models. - Extended slim fly network model's dual-rail mode to arbitrary number of rails (pending). + - Implemented Quality of Service (QoS) in 1-D dragonfly network. + - Implemented changes needed to allow ROSS's tiebreaker mechanism. Nikhil Jain, Abhinav Bhatele (LLNL) - Improvements in credit-based flow control of CODES dragonfly and torus network models. @@ -78,3 +82,17 @@ Caitlin Ross (RPI): - Added instrumentation so that network models can report sampled statistics over virtual time (pending). - Bug reporter for CODES models. + +Elkin Cruz (RPI) + - Added network surrogate for 1-D Dragonfly model (dragonfly-dally). + - Added application surrogate for MPI replay (model-net-mpi-replay). + - Implemented API to allow network and application surrogates to switch as + simulation runs (aka, hybrid simulation). + - Added network and application level directors, which coordinate data + transference between model and predictor. + - Added simple averaged-based network and application predictors (they are + given simulation data and are in charge of predicting future states of the + simulation, skipping computation). + - Implemented necessary scaffolding to check for bugs in reversible + computation (to be used with SEQUENTIAL_ROLLBACK_CHECK option in ROSS). + - Fixed reversible computation bugs on 1-D Dragonfly network. From ed9edf5a2f5fb48117e0b7f15c1194a788efd0a1 Mon Sep 17 00:00:00 2001 From: helq Date: Wed, 23 Jul 2025 10:50:35 -0400 Subject: [PATCH 107/110] Updating compilation script --- CODES-compile-instructions.sh | 16 ++++++++++------ README.md | 6 ++---- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/CODES-compile-instructions.sh b/CODES-compile-instructions.sh index 76d4c6a1..14178c99 100644 --- a/CODES-compile-instructions.sh +++ b/CODES-compile-instructions.sh @@ -21,13 +21,12 @@ CUR_DIR="$PWD" ##### Downloading everything ##### -git clone https://github.com/codes-org/codes --branch=develop -git clone https://github.com/ross-org/ross --depth=100 --branch=develop +git clone https://github.com/codes-org/codes --depth=100 --branch=v1.5.0 +git clone https://github.com/ross-org/ross --depth=100 --branch=v8.1.0 if [ $swm_enable = 1 ]; then git clone https://github.com/pmodels/argobots --depth=1 - # This version is one commit ahead - git clone https://github.com/helq/swm-workloads --branch=total-iterations-communication + git clone https://github.com/codes-org/swm-workloads --branch=v1.2 fi if [ $union_enable = 1 ]; then @@ -35,7 +34,8 @@ if [ $union_enable = 1 ]; then curl -L https://sourceforge.net/projects/conceptual/files/conceptual/1.5.1b/conceptual-1.5.1b.tar.gz -o conceptual-1.5.1b.tar.gz tar xvf conceptual-1.5.1b.tar.gz # Downloading union - git clone https://github.com/helq/Union --branch=master + git clone https://github.com/SPEAR-UIC/Union + pushd Union && git checkout 99b3df3 && popd fi ##### COMPILING ##### @@ -84,9 +84,13 @@ if [ $union_enable = 1 ]; then popd pushd Union + # Python 2 override. Union expects Python 2 ONLY + mkdir -p python-override + ln -s /usr/bin/python2 python-override/python + # compiling ./prepare.sh PYTHON=python2 ./configure --disable-shared --with-conceptual="$(realpath ../conceptual-1.5.1b/install)" --with-conceptual-src="$(realpath ../conceptual-1.5.1b)" --prefix="$(realpath ./install)" CC=mpicc CXX=mpicxx - make -j4 && make install + PATH="$PWD/python-override:$PATH" make -j4 && make install err=$? [[ $err -ne 0 ]] && exit $err popd diff --git a/README.md b/README.md index 7740a222..43f64438 100644 --- a/README.md +++ b/README.md @@ -9,21 +9,19 @@ The easiest way to build CODES is using our automated compilation script that ha 1. **Download the compilation script** [click here](https://raw.githubusercontent.com/codes-org/codes/master/CODES-compile-instructions.sh) or: ```bash - # Download the script to your desired directory wget https://raw.githubusercontent.com/codes-org/codes/master/CODES-compile-instructions.sh - chmod +x CODES-compile-instructions.sh ``` 2. **Edit and Run the script**: ```bash - ./CODES-compile-instructions.sh + bash ./CODES-compile-instructions.sh ``` The script will create a new directory with all dependencies and CODES compiled and ready to use. ## Prerequisites -- **MPI**: OpenMPI or MPICH for parallel execution +- **MPI**: MPICH for parallel execution (OpenMPI is not supported by Union, a dependency) - **CMake**: Version 3.12 or higher - **ROSS**: Rensselaer Optimistic Simulation System (handled by script) - **C/C++ compiler**: GCC or Clang with C++11 support From 39fbc4fb405087ba199a6264f051e056ab980b04 Mon Sep 17 00:00:00 2001 From: Sanjay Chari Date: Thu, 21 May 2026 10:33:23 -0400 Subject: [PATCH 108/110] Fix zmq and ROSS compilation issues The kronos-develop-director-b branch of CODES was using an outdated version of ROSS and also had compilation issues because of zeromq. This commit changes it to be compatible with the master branch of ROSS and fixes the zeromq compilation issues. --- CODES-compile-instructions.sh | 173 ++++++++++++++++++++++++++++++++++ codes/surrogate/switch.h | 2 +- src/CMakeLists.txt | 2 +- src/surrogate/init.c | 6 +- src/surrogate/switch.c | 61 ++++++------ src/surrogate/zmqml/Makefile | 2 +- src/util/rc-stack.c | 2 +- 7 files changed, 213 insertions(+), 35 deletions(-) create mode 100644 CODES-compile-instructions.sh diff --git a/CODES-compile-instructions.sh b/CODES-compile-instructions.sh new file mode 100644 index 00000000..c477e738 --- /dev/null +++ b/CODES-compile-instructions.sh @@ -0,0 +1,173 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x + +# Switches +swm_enable=0 +union_enable=0 +torch_enable=1 + +# Uncomment below for MPICH +#export PATH=/usr/local/mpich-4.1.2/bin/:"$PATH" +# Note: remember to compile MPICH with nemesis not with UCX support + +################## Actual scripts starts from here ################## + +# SWM has to be enabled for UNION to work +if [ $union_enable = 1 ]; then + swm_enable=1 +fi + +# What to compile +CUR_DIR="$PWD" + +##### Downloading everything ##### + +if [ ! -d codes/.git ]; then + git clone https://github.com/codes-org/codes --depth=100 --branch=v1.5.0 +else + echo "Using existing codes checkout: $(realpath codes)" +fi + +if [ ! -d ross/.git ]; then + git clone https://github.com/ross-org/ross --depth=100 --branch=v8.1.0 +else + echo "Using existing ross checkout: $(realpath ross)" +fi + +if [ $swm_enable = 1 ]; then + git clone https://github.com/pmodels/argobots --depth=1 + git clone https://github.com/codes-org/swm-workloads --branch=v1.2 +fi + +if [ $union_enable = 1 ]; then + # Downloading conceptual + curl -L https://sourceforge.net/projects/conceptual/files/conceptual/1.5.1b/conceptual-1.5.1b.tar.gz -o conceptual-1.5.1b.tar.gz + tar xvf conceptual-1.5.1b.tar.gz + # Downloading union + git clone https://github.com/SPEAR-UIC/Union + pushd Union && git checkout 99b3df3 && popd +fi + +##### COMPILING ##### + +mkdir -p ross/build +pushd ross/build +cmake .. -DROSS_BUILD_MODELS=ON -DCMAKE_INSTALL_PREFIX="$(realpath ./bin)" \ + -DCMAKE_C_COMPILER=mpicc -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_FLAGS="-g -Wall" +#make VERBOSE=1 +make install -j4 +err=$? +[[ $err -ne 0 ]] && exit $err +popd + +if [ $swm_enable = 1 ]; then + pushd swm-workloads/swm + ./prepare.sh + mkdir -p build + pushd build + ../configure --disable-shared --prefix="$(realpath ./bin)" CC=mpicc CXX=mpicxx CFLAGS=-g CXXFLAGS=-g + #make V=1 && make install + make -j4 && make install + err=$? + [[ $err -ne 0 ]] && exit $err + popd && popd + + pushd argobots + ./autogen.sh + mkdir -p build + pushd build + #../configure --enable-debug=all --disable-fast --disable-shared --prefix="$(realpath ./bin)" CC=mpicc CXX=mpicxx CFLAGS=-g CXXFLAGS=-g + ../configure --disable-shared --prefix="$(realpath ./bin)" CC=mpicc CXX=mpicxx CFLAGS=-g CXXFLAGS=-g + #make V=1 && make install + make -j4 && make install + err=$? + [[ $err -ne 0 ]] && exit $err + popd && popd +fi + +if [ $union_enable = 1 ]; then + pushd conceptual-1.5.1b + PYTHON=python2 ./configure --prefix="$(realpath ./install)" LIBS=-lm + make -j4 && make install + err=$? + [[ $err -ne 0 ]] && exit $err + popd + + pushd Union + # Python 2 override. Union expects Python 2 ONLY + mkdir -p python-override + ln -s /usr/bin/python2 python-override/python + # compiling + ./prepare.sh + PYTHON=python2 ./configure --disable-shared --with-conceptual="$(realpath ../conceptual-1.5.1b/install)" --with-conceptual-src="$(realpath ../conceptual-1.5.1b)" --prefix="$(realpath ./install)" CC=mpicc CXX=mpicxx + PATH="$PWD/python-override:$PATH" make -j4 && make install + err=$? + [[ $err -ne 0 ]] && exit $err + popd +fi + + +# Build local ZMQML requester library required by director-client.C +pushd codes/src/surrogate/zmqml +make clean +make +test -f libzmqmlrequester.so +test -f zmqmlrequester.h +popd + +# Make imported zmqmlrequester target visible to doc/example and tests. +python3 - <<'INNERPY' +from pathlib import Path +cm = Path("codes/src/CMakeLists.txt") +text = cm.read_text() +old = "add_library(zmqmlrequester SHARED IMPORTED )" +new = "add_library(zmqmlrequester SHARED IMPORTED GLOBAL)" +if old in text: + cm.write_text(text.replace(old, new)) +elif new in text: + pass +else: + raise SystemExit("Could not find zmqmlrequester imported target line in codes/src/CMakeLists.txt") +INNERPY + +mkdir -p codes/build +pushd codes/build + +make_args_codes=( + -DCMAKE_PREFIX_PATH="$(realpath "$CUR_DIR/ross/build/bin")" + -DCMAKE_CXX_COMPILER=mpicxx -DCMAKE_C_COMPILER=mpicc + -DCMAKE_C_FLAGS="-g -Wall" + -DCMAKE_CXX_FLAGS="-g -Wall" + -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTING=ON + -DCMAKE_INSTALL_PREFIX="$(realpath bin)" + -DZMQML_BUILD_PATH="$(realpath "$CUR_DIR/codes/src/surrogate/zmqml")" + -DZeroMQ_INCLUDE_DIR=/usr/include + -DZeroMQ_LIBRARY=/usr/lib/x86_64-linux-gnu/libzmq.so +) +if [ $swm_enable = 1 ]; then + make_args_codes=( + "${make_args_codes[@]}" + -DSWM_PKG_CONFIG_PATH="$(realpath "$CUR_DIR/swm-workloads/swm/build/maint")" + -DARGOBOTS_PKG_CONFIG_PATH="$(realpath "$CUR_DIR/argobots/build/maint")" + ) +fi +if [ $union_enable = 1 ]; then + make_args_codes=( + "${make_args_codes[@]}" + -DUNION_PKG_CONFIG_PATH="$(realpath "$CUR_DIR/Union/install/lib/pkgconfig")" + ) +fi +if [ $torch_enable = 1 ]; then + make_args_codes=("${make_args_codes[@]}" -DUSE_TORCH=true) +else + make_args_codes=("${make_args_codes[@]}" -DUSE_TORCH=false) +fi + +cmake .. "${make_args_codes[@]}" +#make VERBOSE=1 +make -j4 +err=$? +[[ $err -ne 0 ]] && exit $err + +popd diff --git a/codes/surrogate/switch.h b/codes/surrogate/switch.h index 553f3a11..82a31cf4 100644 --- a/codes/surrogate/switch.h +++ b/codes/surrogate/switch.h @@ -61,7 +61,7 @@ extern struct switch_at_struct switch_at; // Switch -void director_switch(tw_pe * pe, tw_event_sig gvt_sig); +void director_switch(tw_pe * pe, bool past_end_time); #ifdef __cplusplus } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cd38259e..9439ce2f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -157,7 +157,7 @@ if(USE_ONLINE) endif() # ZMQML -add_library(zmqmlrequester SHARED IMPORTED ) +add_library(zmqmlrequester SHARED IMPORTED GLOBAL) set_target_properties(zmqmlrequester PROPERTIES IMPORTED_LOCATION "${ZMQML_BUILD_PATH}/libzmqmlrequester.so" INTERFACE_INCLUDE_DIRECTORIES "${ZMQML_BUILD_PATH}") diff --git a/src/surrogate/init.c b/src/surrogate/init.c index 79bb7e71..42049d72 100644 --- a/src/surrogate/init.c +++ b/src/surrogate/init.c @@ -64,14 +64,14 @@ void surrogate_configure( PRINTF_ONCE("\n"); // Injecting into ROSS the function to be called at GVT and the instant in time to trigger GVT - g_tw_gvt_arbitrary_fun = director_switch; + g_tw_gvt_hook = director_switch; #ifdef USE_RAND_TIEBREAKER tw_event_sig time_stamp = {0}; time_stamp.recv_ts = switch_at.time_stampts[0]; - tw_trigger_arbitrary_fun_at(time_stamp); + tw_trigger_gvt_hook_at_event_sig(time_stamp); #else - tw_trigger_arbitrary_fun_at(switch_at.time_stampts[0]); + tw_trigger_gvt_hook_at(switch_at.time_stampts[0]); #endif // freeing timestamps before it dissapears diff --git a/src/surrogate/switch.c b/src/surrogate/switch.c index 4b29ab18..a906e152 100644 --- a/src/surrogate/switch.c +++ b/src/surrogate/switch.c @@ -79,9 +79,9 @@ static void rollback_and_cancel_events_pe(tw_pe * pe, tw_event_sig gvt_sig) { tw_stime const gvt = gvt_sig.recv_ts; // Backtracking the simulation to GVT for (unsigned int i = 0; i < g_tw_nkp; i++) { - tw_kp_rollback_to_sig(g_tw_kp[i], gvt_sig); + tw_kp_rollback_to_sig(g_tw_kp[i], &gvt_sig); } - assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0); + assert(tw_event_sig_compare_ptr(&pe->GVT_sig, &gvt_sig) == 0); assert(pe->GVT_sig.recv_ts == gvt); // redundant but needed because compiler cries that gvt is never used #else static void rollback_and_cancel_events_pe(tw_pe * pe, tw_stime gvt) { @@ -100,10 +100,7 @@ static void rollback_and_cancel_events_pe(tw_pe * pe, tw_stime gvt) { pe->stats.s_net_read += tw_clock_read() - start; } - pe->gvt_status = 1; - tw_sched_event_q(pe); - tw_sched_cancel_q(pe); - tw_gvt_step2(pe); + tw_scheduler_rollback_and_cancel_events_pe(pe); if (DEBUG_DIRECTOR > 1) { printf("PE %lu: Time stamp at the end of GVT time: %f - AVL-tree sized: %d\n", g_tw_mynode, gvt, pe->avl_tree_size); @@ -146,7 +143,7 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_stime gvt) { // Filtering events to freeze assert(next_event->prev == NULL); #ifdef USE_RAND_TIEBREAKER - assert(tw_event_sig_compare(next_event->sig, gvt_sig) >= 0); + assert(tw_event_sig_compare_ptr(&next_event->sig, &gvt_sig) >= 0); #else assert(next_event->recv_ts >= gvt); #endif @@ -165,11 +162,11 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_stime gvt) { next_event->recv_ts += switch_offset; next_event->sig.recv_ts = next_event->recv_ts; } - assert(next_event->recv_ts >= g_tw_trigger_arbitrary_fun.sig_at.recv_ts); + assert(next_event->recv_ts >= g_tw_gvt_hook_trigger.sig_at.recv_ts); #else next_event->recv_ts += switch_offset; } - assert(next_event->recv_ts >= g_tw_trigger_arbitrary_fun.at); + assert(next_event->recv_ts >= g_tw_gvt_hook_trigger.at); #endif // store event in deque_events to inject immediately back to the queue @@ -382,11 +379,12 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_stime gvt) { } +void director_switch(tw_pe * pe, bool past_end_time) { #ifdef USE_RAND_TIEBREAKER -void director_switch(tw_pe * pe, tw_event_sig gvt_sig) { + tw_event_sig const gvt_sig = pe->GVT_sig; tw_stime const gvt = gvt_sig.recv_ts; #else -void director_switch(tw_pe * pe, tw_stime gvt) { + tw_stime const gvt = pe->GVT; #endif assert(is_surrogate_configured); @@ -400,15 +398,18 @@ void director_switch(tw_pe * pe, tw_stime gvt) { printf("GVT %d at %f in %s arbitrary-fun-status=", i++, gvt, surr_config.director.is_surrogate_on() ? "surrogate-mode" : "high-definition"); - switch (g_tw_trigger_arbitrary_fun.active) { - case ARBITRARY_FUN_enabled: - printf("enabled\n"); + switch (g_tw_gvt_hook_trigger.status) { + case GVT_HOOK_STATUS_timestamp: + printf("timestamp\n"); break; - case ARBITRARY_FUN_disabled: + case GVT_HOOK_STATUS_disabled: printf("disabled\n"); break; - case ARBITRARY_FUN_triggered: - printf("triggered\n"); + case GVT_HOOK_STATUS_every_n_gvt: + printf("every-n-gvt\n"); + break; + case GVT_HOOK_STATUS_model_call: + printf("model-call\n"); break; } } @@ -430,16 +431,20 @@ void director_switch(tw_pe * pe, tw_stime gvt) { return; } - // Detecting if we are going to switch - if (switch_at.current_i < switch_at.total - && g_tw_trigger_arbitrary_fun.active == ARBITRARY_FUN_triggered) { + // Detecting if we are going to switch. + // + // Newer ROSS calls g_tw_gvt_hook only after the timestamp trigger fires, + // and it sets g_tw_gvt_hook_trigger.status back to GVT_HOOK_STATUS_disabled + // before entering this hook. Therefore, do not check for the old + // ARBITRARY_FUN_triggered state here; it no longer exists. + if (switch_at.current_i < switch_at.total) { double const switch_time = switch_at.time_stampts[switch_at.current_i]; #ifdef USE_RAND_TIEBREAKER - assert(g_tw_trigger_arbitrary_fun.sig_at.recv_ts == switch_at.time_stampts[switch_at.current_i]); + assert(g_tw_gvt_hook_trigger.sig_at.recv_ts == switch_time); #else - assert(g_tw_trigger_arbitrary_fun.at == switch_at.time_stampts[switch_at.current_i]); + assert(g_tw_gvt_hook_trigger.at == switch_time); #endif - assert(gvt >= switch_time); // current gvt shouldn't be that far ahead from the point we wanted to trigger it + assert(gvt >= switch_time); // current gvt should not be before the requested switch time } else { return; } @@ -457,10 +462,10 @@ void director_switch(tw_pe * pe, tw_stime gvt) { // Rollback if in optimistic mode #ifdef USE_RAND_TIEBREAKER if (g_tw_synchronization_protocol == OPTIMISTIC) { - assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0); + assert(tw_event_sig_compare_ptr(&pe->GVT_sig, &gvt_sig) == 0); rollback_and_cancel_events_pe(pe, gvt_sig); - //assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) <= 0); - assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0); + //assert(tw_event_sig_compare_ptr(&pe->GVT_sig, &gvt_sig) <= 0); + assert(tw_event_sig_compare_ptr(&pe->GVT_sig, &gvt_sig) == 0); } #else if (g_tw_synchronization_protocol == OPTIMISTIC) { @@ -502,10 +507,10 @@ void director_switch(tw_pe * pe, tw_stime gvt) { tw_event_sig time_stamp = {0}; time_stamp.recv_ts = next_switch; //printf("Adding a trigger to activate next switch!\n"); - tw_trigger_arbitrary_fun_at(time_stamp); + tw_trigger_gvt_hook_at_event_sig(time_stamp); #else //printf("Adding a trigger to activate next switch!\n"); - tw_trigger_arbitrary_fun_at(next_switch); + tw_trigger_gvt_hook_at(next_switch); #endif } diff --git a/src/surrogate/zmqml/Makefile b/src/surrogate/zmqml/Makefile index 4c28ed54..b4abcfab 100644 --- a/src/surrogate/zmqml/Makefile +++ b/src/surrogate/zmqml/Makefile @@ -7,7 +7,7 @@ TARGETS=libzmqmlrequester.so demozmqmlrequester all: $(TARGETS) libzmqmlrequester.so: zmqmlrequester.o - $(CXX) -shared -o $@ $^ + $(CXX) -shared -o $@ $^ $(LDFLAGS) zmqmlrequester.o: zmqmlrequester.cpp zmqmlrequester.h $(CXX) $(CXXFLAGS) -fPIC -c $< -o $@ diff --git a/src/util/rc-stack.c b/src/util/rc-stack.c index ebb2131f..5f68123e 100644 --- a/src/util/rc-stack.c +++ b/src/util/rc-stack.c @@ -107,7 +107,7 @@ void rc_stack_gc(tw_lp const *lp, struct rc_stack *s) { while (ent != &s->head) { rc_entry *r = qlist_entry(ent, rc_entry, ql); #ifdef USE_RAND_TIEBREAKER - if (lp == NULL || tw_event_sig_compare(r->e_sig, lp->pe->GVT_sig) == -1) { + if (lp == NULL || tw_event_sig_compare_ptr(&r->e_sig, &lp->pe->GVT_sig) == -1) { #else if (lp == NULL || r->time < lp->pe->GVT){ #endif From 0651b5ec34855b6ed258f046363682ff3ec1befa Mon Sep 17 00:00:00 2001 From: Sanjay Chari Date: Thu, 21 May 2026 13:58:01 -0400 Subject: [PATCH 109/110] Fix torch-jit compilation Compilation with torch-jit was not occuring even with torch_enable set to 1. This commit fixes torch-jit compilation with GPU support. --- CODES-compile-instructions.sh | 115 +++++++++++++++++++++++++++++++++- 1 file changed, 112 insertions(+), 3 deletions(-) diff --git a/CODES-compile-instructions.sh b/CODES-compile-instructions.sh index c477e738..46f27b8c 100644 --- a/CODES-compile-instructions.sh +++ b/CODES-compile-instructions.sh @@ -134,8 +134,99 @@ INNERPY mkdir -p codes/build pushd codes/build +torch_cmake_prefix="" +torch_dir="" + +if [ "$torch_enable" = 1 ]; then + torch_cmake_prefix="$(python3 - <<'INNERPY' +import torch +print(torch.utils.cmake_prefix_path) +INNERPY +)" + torch_dir="${torch_cmake_prefix}/Torch" + + if [ ! -f "${torch_dir}/TorchConfig.cmake" ]; then + echo "ERROR: TorchConfig.cmake not found at: ${torch_dir}/TorchConfig.cmake" >&2 + echo " torch.utils.cmake_prefix_path returned: ${torch_cmake_prefix}" >&2 + exit 1 + fi + + echo "Using Torch CMake prefix: ${torch_cmake_prefix}" + echo "Using Torch_DIR: ${torch_dir}" + + # Optional CUDA toolkit override for CUDA-enabled PyTorch. + # Set CUDA_HOME before running this script, e.g.: + # export CUDA_HOME=/usr/local/cuda-12.4 + # or: + # export CUDA_HOME=/usr/local/cuda + if python3 - <<'INNERPY' +import torch, sys +sys.exit(0 if torch.version.cuda is not None else 1) +INNERPY + then + if [ -z "${CUDA_HOME:-}" ]; then + if [ -d /usr/local/cuda ]; then + CUDA_HOME=/usr/local/cuda + else + echo "ERROR: CUDA-enabled PyTorch detected, but CUDA_HOME is not set and /usr/local/cuda does not exist." >&2 + echo " Set CUDA_HOME to your CUDA toolkit root, e.g. /usr/local/cuda-12.4." >&2 + exit 1 + fi + fi + + if [ ! -f "${CUDA_HOME}/include/cuda_runtime_api.h" ]; then + echo "ERROR: Missing CUDA header: ${CUDA_HOME}/include/cuda_runtime_api.h" >&2 + exit 1 + fi + + if [ ! -f "${CUDA_HOME}/lib64/libcudart.so" ] && [ ! -f "${CUDA_HOME}/lib/libcudart.so" ]; then + echo "ERROR: Missing CUDA runtime library under ${CUDA_HOME}/lib64 or ${CUDA_HOME}/lib" >&2 + exit 1 + fi + + if [ ! -x "${CUDA_HOME}/bin/nvcc" ]; then + echo "ERROR: Missing CUDA compiler: ${CUDA_HOME}/bin/nvcc" >&2 + exit 1 + fi + + if [ ! -d "${CUDA_HOME}/nvvm/libdevice" ]; then + echo "ERROR: Missing CUDA libdevice directory: ${CUDA_HOME}/nvvm/libdevice" >&2 + exit 1 + fi + + cuda_arch="" + if command -v nvidia-smi >/dev/null 2>&1; then + cuda_arch="$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | head -n1 | tr -d '.[:space:]' || true)" + fi + + if [ -z "${cuda_arch}" ]; then + echo "WARNING: Could not auto-detect GPU compute capability with nvidia-smi." >&2 + echo " Falling back to CMAKE_CUDA_ARCHITECTURES=80." >&2 + cuda_arch="80" + fi + + export CUDA_HOME + export CUDA_PATH="${CUDA_HOME}" + export CUDA_ROOT="${CUDA_HOME}" + export CUDA_TOOLKIT_ROOT_DIR="${CUDA_HOME}" + export CUDAToolkit_ROOT="${CUDA_HOME}" + export CUDACXX="${CUDA_HOME}/bin/nvcc" + export PATH="${CUDA_HOME}/bin:${PATH}" + export LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/lib:${LD_LIBRARY_PATH:-}" + + echo "Using CUDA_HOME: ${CUDA_HOME}" + echo "Using CUDACXX: ${CUDACXX}" + echo "Using CMAKE_CUDA_ARCHITECTURES=${cuda_arch}" + fi +fi + +cmake_prefix_path="$(realpath "$CUR_DIR/ross/build/bin")" +if [ "$torch_enable" = 1 ]; then + cmake_prefix_path="${cmake_prefix_path};${torch_cmake_prefix}" +fi + make_args_codes=( - -DCMAKE_PREFIX_PATH="$(realpath "$CUR_DIR/ross/build/bin")" + -DCMAKE_PREFIX_PATH="${cmake_prefix_path}" -DCMAKE_CXX_COMPILER=mpicxx -DCMAKE_C_COMPILER=mpicc -DCMAKE_C_FLAGS="-g -Wall" -DCMAKE_CXX_FLAGS="-g -Wall" @@ -158,8 +249,26 @@ if [ $union_enable = 1 ]; then -DUNION_PKG_CONFIG_PATH="$(realpath "$CUR_DIR/Union/install/lib/pkgconfig")" ) fi -if [ $torch_enable = 1 ]; then - make_args_codes=("${make_args_codes[@]}" -DUSE_TORCH=true) +if [ "$torch_enable" = 1 ]; then + make_args_codes=( + "${make_args_codes[@]}" + -DUSE_TORCH=true + -DTorch_DIR="${torch_dir}" + ) + + if [ -n "${CUDA_HOME:-}" ]; then + make_args_codes=( + "${make_args_codes[@]}" + -DCUDA_TOOLKIT_ROOT_DIR="${CUDA_HOME}" + -DCUDAToolkit_ROOT="${CUDA_HOME}" + -DCUDA_PATH="${CUDA_HOME}" + -DCUDA_ROOT="${CUDA_HOME}" + -DCMAKE_CUDA_COMPILER="${CUDA_HOME}/bin/nvcc" + -DCMAKE_CUDA_ARCHITECTURES="${cuda_arch}" + -DCUDA_INCLUDE_DIRS="${CUDA_HOME}/include" + -DCUDA_CUDART_LIBRARY="${CUDA_HOME}/lib64/libcudart.so" + ) + fi else make_args_codes=("${make_args_codes[@]}" -DUSE_TORCH=false) fi From 01a2b16c3bf4c38990262808ecc9bee8c9be4477 Mon Sep 17 00:00:00 2001 From: Sanjay Chari Date: Thu, 21 May 2026 14:50:42 -0400 Subject: [PATCH 110/110] Allow cpu-based PyTorch usage --- CODES-compile-instructions.sh | 72 ++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 22 deletions(-) diff --git a/CODES-compile-instructions.sh b/CODES-compile-instructions.sh index 46f27b8c..1ba6f3c6 100644 --- a/CODES-compile-instructions.sh +++ b/CODES-compile-instructions.sh @@ -154,47 +154,56 @@ INNERPY echo "Using Torch CMake prefix: ${torch_cmake_prefix}" echo "Using Torch_DIR: ${torch_dir}" - # Optional CUDA toolkit override for CUDA-enabled PyTorch. - # Set CUDA_HOME before running this script, e.g.: + # CUDA is intentionally opt-in. + # Default to CPU-only Torch-JIT compilation unless CUDA_HOME is explicitly set. + # + # To enable CUDA, run for example: # export CUDA_HOME=/usr/local/cuda-12.4 - # or: - # export CUDA_HOME=/usr/local/cuda - if python3 - <<'INNERPY' -import torch, sys -sys.exit(0 if torch.version.cuda is not None else 1) + # ./CODES-compile-instructions.sh + torch_cuda_version="$(python3 - <<'INNERPY' +import torch +print(torch.version.cuda or "") INNERPY - then - if [ -z "${CUDA_HOME:-}" ]; then - if [ -d /usr/local/cuda ]; then - CUDA_HOME=/usr/local/cuda - else - echo "ERROR: CUDA-enabled PyTorch detected, but CUDA_HOME is not set and /usr/local/cuda does not exist." >&2 - echo " Set CUDA_HOME to your CUDA toolkit root, e.g. /usr/local/cuda-12.4." >&2 - exit 1 - fi - fi +)" + + cuda_arch="" + if [ -z "${CUDA_HOME:-}" ] && [ -n "${torch_cuda_version}" ]; then + echo "ERROR: CUDA_HOME is not set, so this script is defaulting to CPU-only Torch-JIT compilation." >&2 + echo " However, the active Python environment has a CUDA-enabled PyTorch build:" >&2 + echo " torch.version.cuda=${torch_cuda_version}" >&2 + echo "" >&2 + echo " CMake cannot use a CUDA-enabled PyTorch package as a CPU-only LibTorch package." >&2 + echo " Choose one of the following:" >&2 + echo " 1. For CPU-only compilation, install a CPU-only PyTorch build in this environment." >&2 + echo " 2. For CUDA compilation, export CUDA_HOME to your CUDA toolkit root." >&2 + echo "" >&2 + echo " Example CUDA build:" >&2 + echo " export CUDA_HOME=/usr/local/cuda-12.4" >&2 + echo " bash CODES-compile-instructions.sh" >&2 + exit 1 + fi + if [ -n "${CUDA_HOME:-}" ]; then if [ ! -f "${CUDA_HOME}/include/cuda_runtime_api.h" ]; then - echo "ERROR: Missing CUDA header: ${CUDA_HOME}/include/cuda_runtime_api.h" >&2 + echo "ERROR: CUDA_HOME is set, but missing CUDA header: ${CUDA_HOME}/include/cuda_runtime_api.h" >&2 exit 1 fi if [ ! -f "${CUDA_HOME}/lib64/libcudart.so" ] && [ ! -f "${CUDA_HOME}/lib/libcudart.so" ]; then - echo "ERROR: Missing CUDA runtime library under ${CUDA_HOME}/lib64 or ${CUDA_HOME}/lib" >&2 + echo "ERROR: CUDA_HOME is set, but missing CUDA runtime library under ${CUDA_HOME}/lib64 or ${CUDA_HOME}/lib" >&2 exit 1 fi if [ ! -x "${CUDA_HOME}/bin/nvcc" ]; then - echo "ERROR: Missing CUDA compiler: ${CUDA_HOME}/bin/nvcc" >&2 + echo "ERROR: CUDA_HOME is set, but missing CUDA compiler: ${CUDA_HOME}/bin/nvcc" >&2 exit 1 fi if [ ! -d "${CUDA_HOME}/nvvm/libdevice" ]; then - echo "ERROR: Missing CUDA libdevice directory: ${CUDA_HOME}/nvvm/libdevice" >&2 + echo "ERROR: CUDA_HOME is set, but missing CUDA libdevice directory: ${CUDA_HOME}/nvvm/libdevice" >&2 exit 1 fi - cuda_arch="" if command -v nvidia-smi >/dev/null 2>&1; then cuda_arch="$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | head -n1 | tr -d '.[:space:]' || true)" fi @@ -214,9 +223,22 @@ INNERPY export PATH="${CUDA_HOME}/bin:${PATH}" export LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/lib:${LD_LIBRARY_PATH:-}" + echo "CUDA_HOME is set; enabling CUDA Torch-JIT compilation." echo "Using CUDA_HOME: ${CUDA_HOME}" echo "Using CUDACXX: ${CUDACXX}" echo "Using CMAKE_CUDA_ARCHITECTURES=${cuda_arch}" + else + echo "CUDA_HOME is not set; forcing CPU-only Torch-JIT compilation." + + # Prevent accidental CUDA discovery from /usr/local/cuda, nvcc on PATH, + # inherited CMake cache variables, or CUDA-enabled PyTorch metadata. + unset CUDA_HOME + unset CUDA_PATH + unset CUDA_ROOT + unset CUDA_TOOLKIT_ROOT_DIR + unset CUDAToolkit_ROOT + unset CUDACXX + unset CMAKE_CUDA_COMPILER fi fi @@ -268,6 +290,12 @@ if [ "$torch_enable" = 1 ]; then -DCUDA_INCLUDE_DIRS="${CUDA_HOME}/include" -DCUDA_CUDART_LIBRARY="${CUDA_HOME}/lib64/libcudart.so" ) + else + make_args_codes=( + "${make_args_codes[@]}" + -DCMAKE_DISABLE_FIND_PACKAGE_CUDA=ON + -DCMAKE_DISABLE_FIND_PACKAGE_CUDAToolkit=ON + ) fi else make_args_codes=("${make_args_codes[@]}" -DUSE_TORCH=false)