diff --git a/CODES-compile-instructions.sh b/CODES-compile-instructions.sh new file mode 100644 index 00000000..1ba6f3c6 --- /dev/null +++ b/CODES-compile-instructions.sh @@ -0,0 +1,310 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x + +# Switches +swm_enable=0 +union_enable=0 +torch_enable=1 + +# Uncomment below for MPICH +#export PATH=/usr/local/mpich-4.1.2/bin/:"$PATH" +# Note: remember to compile MPICH with nemesis not with UCX support + +################## Actual scripts starts from here ################## + +# SWM has to be enabled for UNION to work +if [ $union_enable = 1 ]; then + swm_enable=1 +fi + +# What to compile +CUR_DIR="$PWD" + +##### Downloading everything ##### + +if [ ! -d codes/.git ]; then + git clone https://github.com/codes-org/codes --depth=100 --branch=v1.5.0 +else + echo "Using existing codes checkout: $(realpath codes)" +fi + +if [ ! -d ross/.git ]; then + git clone https://github.com/ross-org/ross --depth=100 --branch=v8.1.0 +else + echo "Using existing ross checkout: $(realpath ross)" +fi + +if [ $swm_enable = 1 ]; then + git clone https://github.com/pmodels/argobots --depth=1 + git clone https://github.com/codes-org/swm-workloads --branch=v1.2 +fi + +if [ $union_enable = 1 ]; then + # Downloading conceptual + curl -L https://sourceforge.net/projects/conceptual/files/conceptual/1.5.1b/conceptual-1.5.1b.tar.gz -o conceptual-1.5.1b.tar.gz + tar xvf conceptual-1.5.1b.tar.gz + # Downloading union + git clone https://github.com/SPEAR-UIC/Union + pushd Union && git checkout 99b3df3 && popd +fi + +##### COMPILING ##### + +mkdir -p ross/build +pushd ross/build +cmake .. -DROSS_BUILD_MODELS=ON -DCMAKE_INSTALL_PREFIX="$(realpath ./bin)" \ + -DCMAKE_C_COMPILER=mpicc -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_FLAGS="-g -Wall" +#make VERBOSE=1 +make install -j4 +err=$? +[[ $err -ne 0 ]] && exit $err +popd + +if [ $swm_enable = 1 ]; then + pushd swm-workloads/swm + ./prepare.sh + mkdir -p build + pushd build + ../configure --disable-shared --prefix="$(realpath ./bin)" CC=mpicc CXX=mpicxx CFLAGS=-g CXXFLAGS=-g + #make V=1 && make install + make -j4 && make install + err=$? + [[ $err -ne 0 ]] && exit $err + popd && popd + + pushd argobots + ./autogen.sh + mkdir -p build + pushd build + #../configure --enable-debug=all --disable-fast --disable-shared --prefix="$(realpath ./bin)" CC=mpicc CXX=mpicxx CFLAGS=-g CXXFLAGS=-g + ../configure --disable-shared --prefix="$(realpath ./bin)" CC=mpicc CXX=mpicxx CFLAGS=-g CXXFLAGS=-g + #make V=1 && make install + make -j4 && make install + err=$? + [[ $err -ne 0 ]] && exit $err + popd && popd +fi + +if [ $union_enable = 1 ]; then + pushd conceptual-1.5.1b + PYTHON=python2 ./configure --prefix="$(realpath ./install)" LIBS=-lm + make -j4 && make install + err=$? + [[ $err -ne 0 ]] && exit $err + popd + + pushd Union + # Python 2 override. Union expects Python 2 ONLY + mkdir -p python-override + ln -s /usr/bin/python2 python-override/python + # compiling + ./prepare.sh + PYTHON=python2 ./configure --disable-shared --with-conceptual="$(realpath ../conceptual-1.5.1b/install)" --with-conceptual-src="$(realpath ../conceptual-1.5.1b)" --prefix="$(realpath ./install)" CC=mpicc CXX=mpicxx + PATH="$PWD/python-override:$PATH" make -j4 && make install + err=$? + [[ $err -ne 0 ]] && exit $err + popd +fi + + +# Build local ZMQML requester library required by director-client.C +pushd codes/src/surrogate/zmqml +make clean +make +test -f libzmqmlrequester.so +test -f zmqmlrequester.h +popd + +# Make imported zmqmlrequester target visible to doc/example and tests. +python3 - <<'INNERPY' +from pathlib import Path +cm = Path("codes/src/CMakeLists.txt") +text = cm.read_text() +old = "add_library(zmqmlrequester SHARED IMPORTED )" +new = "add_library(zmqmlrequester SHARED IMPORTED GLOBAL)" +if old in text: + cm.write_text(text.replace(old, new)) +elif new in text: + pass +else: + raise SystemExit("Could not find zmqmlrequester imported target line in codes/src/CMakeLists.txt") +INNERPY + +mkdir -p codes/build +pushd codes/build + +torch_cmake_prefix="" +torch_dir="" + +if [ "$torch_enable" = 1 ]; then + torch_cmake_prefix="$(python3 - <<'INNERPY' +import torch +print(torch.utils.cmake_prefix_path) +INNERPY +)" + torch_dir="${torch_cmake_prefix}/Torch" + + if [ ! -f "${torch_dir}/TorchConfig.cmake" ]; then + echo "ERROR: TorchConfig.cmake not found at: ${torch_dir}/TorchConfig.cmake" >&2 + echo " torch.utils.cmake_prefix_path returned: ${torch_cmake_prefix}" >&2 + exit 1 + fi + + echo "Using Torch CMake prefix: ${torch_cmake_prefix}" + echo "Using Torch_DIR: ${torch_dir}" + + # CUDA is intentionally opt-in. + # Default to CPU-only Torch-JIT compilation unless CUDA_HOME is explicitly set. + # + # To enable CUDA, run for example: + # export CUDA_HOME=/usr/local/cuda-12.4 + # ./CODES-compile-instructions.sh + torch_cuda_version="$(python3 - <<'INNERPY' +import torch +print(torch.version.cuda or "") +INNERPY +)" + + cuda_arch="" + if [ -z "${CUDA_HOME:-}" ] && [ -n "${torch_cuda_version}" ]; then + echo "ERROR: CUDA_HOME is not set, so this script is defaulting to CPU-only Torch-JIT compilation." >&2 + echo " However, the active Python environment has a CUDA-enabled PyTorch build:" >&2 + echo " torch.version.cuda=${torch_cuda_version}" >&2 + echo "" >&2 + echo " CMake cannot use a CUDA-enabled PyTorch package as a CPU-only LibTorch package." >&2 + echo " Choose one of the following:" >&2 + echo " 1. For CPU-only compilation, install a CPU-only PyTorch build in this environment." >&2 + echo " 2. For CUDA compilation, export CUDA_HOME to your CUDA toolkit root." >&2 + echo "" >&2 + echo " Example CUDA build:" >&2 + echo " export CUDA_HOME=/usr/local/cuda-12.4" >&2 + echo " bash CODES-compile-instructions.sh" >&2 + exit 1 + fi + + if [ -n "${CUDA_HOME:-}" ]; then + if [ ! -f "${CUDA_HOME}/include/cuda_runtime_api.h" ]; then + echo "ERROR: CUDA_HOME is set, but missing CUDA header: ${CUDA_HOME}/include/cuda_runtime_api.h" >&2 + exit 1 + fi + + if [ ! -f "${CUDA_HOME}/lib64/libcudart.so" ] && [ ! -f "${CUDA_HOME}/lib/libcudart.so" ]; then + echo "ERROR: CUDA_HOME is set, but missing CUDA runtime library under ${CUDA_HOME}/lib64 or ${CUDA_HOME}/lib" >&2 + exit 1 + fi + + if [ ! -x "${CUDA_HOME}/bin/nvcc" ]; then + echo "ERROR: CUDA_HOME is set, but missing CUDA compiler: ${CUDA_HOME}/bin/nvcc" >&2 + exit 1 + fi + + if [ ! -d "${CUDA_HOME}/nvvm/libdevice" ]; then + echo "ERROR: CUDA_HOME is set, but missing CUDA libdevice directory: ${CUDA_HOME}/nvvm/libdevice" >&2 + exit 1 + fi + + if command -v nvidia-smi >/dev/null 2>&1; then + cuda_arch="$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | head -n1 | tr -d '.[:space:]' || true)" + fi + + if [ -z "${cuda_arch}" ]; then + echo "WARNING: Could not auto-detect GPU compute capability with nvidia-smi." >&2 + echo " Falling back to CMAKE_CUDA_ARCHITECTURES=80." >&2 + cuda_arch="80" + fi + + export CUDA_HOME + export CUDA_PATH="${CUDA_HOME}" + export CUDA_ROOT="${CUDA_HOME}" + export CUDA_TOOLKIT_ROOT_DIR="${CUDA_HOME}" + export CUDAToolkit_ROOT="${CUDA_HOME}" + export CUDACXX="${CUDA_HOME}/bin/nvcc" + export PATH="${CUDA_HOME}/bin:${PATH}" + export LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/lib:${LD_LIBRARY_PATH:-}" + + echo "CUDA_HOME is set; enabling CUDA Torch-JIT compilation." + echo "Using CUDA_HOME: ${CUDA_HOME}" + echo "Using CUDACXX: ${CUDACXX}" + echo "Using CMAKE_CUDA_ARCHITECTURES=${cuda_arch}" + else + echo "CUDA_HOME is not set; forcing CPU-only Torch-JIT compilation." + + # Prevent accidental CUDA discovery from /usr/local/cuda, nvcc on PATH, + # inherited CMake cache variables, or CUDA-enabled PyTorch metadata. + unset CUDA_HOME + unset CUDA_PATH + unset CUDA_ROOT + unset CUDA_TOOLKIT_ROOT_DIR + unset CUDAToolkit_ROOT + unset CUDACXX + unset CMAKE_CUDA_COMPILER + fi +fi + +cmake_prefix_path="$(realpath "$CUR_DIR/ross/build/bin")" +if [ "$torch_enable" = 1 ]; then + cmake_prefix_path="${cmake_prefix_path};${torch_cmake_prefix}" +fi + +make_args_codes=( + -DCMAKE_PREFIX_PATH="${cmake_prefix_path}" + -DCMAKE_CXX_COMPILER=mpicxx -DCMAKE_C_COMPILER=mpicc + -DCMAKE_C_FLAGS="-g -Wall" + -DCMAKE_CXX_FLAGS="-g -Wall" + -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTING=ON + -DCMAKE_INSTALL_PREFIX="$(realpath bin)" + -DZMQML_BUILD_PATH="$(realpath "$CUR_DIR/codes/src/surrogate/zmqml")" + -DZeroMQ_INCLUDE_DIR=/usr/include + -DZeroMQ_LIBRARY=/usr/lib/x86_64-linux-gnu/libzmq.so +) +if [ $swm_enable = 1 ]; then + make_args_codes=( + "${make_args_codes[@]}" + -DSWM_PKG_CONFIG_PATH="$(realpath "$CUR_DIR/swm-workloads/swm/build/maint")" + -DARGOBOTS_PKG_CONFIG_PATH="$(realpath "$CUR_DIR/argobots/build/maint")" + ) +fi +if [ $union_enable = 1 ]; then + make_args_codes=( + "${make_args_codes[@]}" + -DUNION_PKG_CONFIG_PATH="$(realpath "$CUR_DIR/Union/install/lib/pkgconfig")" + ) +fi +if [ "$torch_enable" = 1 ]; then + make_args_codes=( + "${make_args_codes[@]}" + -DUSE_TORCH=true + -DTorch_DIR="${torch_dir}" + ) + + if [ -n "${CUDA_HOME:-}" ]; then + make_args_codes=( + "${make_args_codes[@]}" + -DCUDA_TOOLKIT_ROOT_DIR="${CUDA_HOME}" + -DCUDAToolkit_ROOT="${CUDA_HOME}" + -DCUDA_PATH="${CUDA_HOME}" + -DCUDA_ROOT="${CUDA_HOME}" + -DCMAKE_CUDA_COMPILER="${CUDA_HOME}/bin/nvcc" + -DCMAKE_CUDA_ARCHITECTURES="${cuda_arch}" + -DCUDA_INCLUDE_DIRS="${CUDA_HOME}/include" + -DCUDA_CUDART_LIBRARY="${CUDA_HOME}/lib64/libcudart.so" + ) + else + make_args_codes=( + "${make_args_codes[@]}" + -DCMAKE_DISABLE_FIND_PACKAGE_CUDA=ON + -DCMAKE_DISABLE_FIND_PACKAGE_CUDAToolkit=ON + ) + fi +else + make_args_codes=("${make_args_codes[@]}" -DUSE_TORCH=false) +fi + +cmake .. "${make_args_codes[@]}" +#make VERBOSE=1 +make -j4 +err=$? +[[ $err -ne 0 ]] && exit $err + +popd diff --git a/codes/surrogate/switch.h b/codes/surrogate/switch.h index 553f3a11..82a31cf4 100644 --- a/codes/surrogate/switch.h +++ b/codes/surrogate/switch.h @@ -61,7 +61,7 @@ extern struct switch_at_struct switch_at; // Switch -void director_switch(tw_pe * pe, tw_event_sig gvt_sig); +void director_switch(tw_pe * pe, bool past_end_time); #ifdef __cplusplus } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cd38259e..9439ce2f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -157,7 +157,7 @@ if(USE_ONLINE) endif() # ZMQML -add_library(zmqmlrequester SHARED IMPORTED ) +add_library(zmqmlrequester SHARED IMPORTED GLOBAL) set_target_properties(zmqmlrequester PROPERTIES IMPORTED_LOCATION "${ZMQML_BUILD_PATH}/libzmqmlrequester.so" INTERFACE_INCLUDE_DIRECTORIES "${ZMQML_BUILD_PATH}") diff --git a/src/surrogate/init.c b/src/surrogate/init.c index 79bb7e71..42049d72 100644 --- a/src/surrogate/init.c +++ b/src/surrogate/init.c @@ -64,14 +64,14 @@ void surrogate_configure( PRINTF_ONCE("\n"); // Injecting into ROSS the function to be called at GVT and the instant in time to trigger GVT - g_tw_gvt_arbitrary_fun = director_switch; + g_tw_gvt_hook = director_switch; #ifdef USE_RAND_TIEBREAKER tw_event_sig time_stamp = {0}; time_stamp.recv_ts = switch_at.time_stampts[0]; - tw_trigger_arbitrary_fun_at(time_stamp); + tw_trigger_gvt_hook_at_event_sig(time_stamp); #else - tw_trigger_arbitrary_fun_at(switch_at.time_stampts[0]); + tw_trigger_gvt_hook_at(switch_at.time_stampts[0]); #endif // freeing timestamps before it dissapears diff --git a/src/surrogate/switch.c b/src/surrogate/switch.c index 4b29ab18..a906e152 100644 --- a/src/surrogate/switch.c +++ b/src/surrogate/switch.c @@ -79,9 +79,9 @@ static void rollback_and_cancel_events_pe(tw_pe * pe, tw_event_sig gvt_sig) { tw_stime const gvt = gvt_sig.recv_ts; // Backtracking the simulation to GVT for (unsigned int i = 0; i < g_tw_nkp; i++) { - tw_kp_rollback_to_sig(g_tw_kp[i], gvt_sig); + tw_kp_rollback_to_sig(g_tw_kp[i], &gvt_sig); } - assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0); + assert(tw_event_sig_compare_ptr(&pe->GVT_sig, &gvt_sig) == 0); assert(pe->GVT_sig.recv_ts == gvt); // redundant but needed because compiler cries that gvt is never used #else static void rollback_and_cancel_events_pe(tw_pe * pe, tw_stime gvt) { @@ -100,10 +100,7 @@ static void rollback_and_cancel_events_pe(tw_pe * pe, tw_stime gvt) { pe->stats.s_net_read += tw_clock_read() - start; } - pe->gvt_status = 1; - tw_sched_event_q(pe); - tw_sched_cancel_q(pe); - tw_gvt_step2(pe); + tw_scheduler_rollback_and_cancel_events_pe(pe); if (DEBUG_DIRECTOR > 1) { printf("PE %lu: Time stamp at the end of GVT time: %f - AVL-tree sized: %d\n", g_tw_mynode, gvt, pe->avl_tree_size); @@ -146,7 +143,7 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_stime gvt) { // Filtering events to freeze assert(next_event->prev == NULL); #ifdef USE_RAND_TIEBREAKER - assert(tw_event_sig_compare(next_event->sig, gvt_sig) >= 0); + assert(tw_event_sig_compare_ptr(&next_event->sig, &gvt_sig) >= 0); #else assert(next_event->recv_ts >= gvt); #endif @@ -165,11 +162,11 @@ static void shift_events_to_future_pe(tw_pe * pe, tw_stime gvt) { next_event->recv_ts += switch_offset; next_event->sig.recv_ts = next_event->recv_ts; } - assert(next_event->recv_ts >= g_tw_trigger_arbitrary_fun.sig_at.recv_ts); + assert(next_event->recv_ts >= g_tw_gvt_hook_trigger.sig_at.recv_ts); #else next_event->recv_ts += switch_offset; } - assert(next_event->recv_ts >= g_tw_trigger_arbitrary_fun.at); + assert(next_event->recv_ts >= g_tw_gvt_hook_trigger.at); #endif // store event in deque_events to inject immediately back to the queue @@ -382,11 +379,12 @@ static void events_surrogate_to_high_def_switch(tw_pe * pe, tw_stime gvt) { } +void director_switch(tw_pe * pe, bool past_end_time) { #ifdef USE_RAND_TIEBREAKER -void director_switch(tw_pe * pe, tw_event_sig gvt_sig) { + tw_event_sig const gvt_sig = pe->GVT_sig; tw_stime const gvt = gvt_sig.recv_ts; #else -void director_switch(tw_pe * pe, tw_stime gvt) { + tw_stime const gvt = pe->GVT; #endif assert(is_surrogate_configured); @@ -400,15 +398,18 @@ void director_switch(tw_pe * pe, tw_stime gvt) { printf("GVT %d at %f in %s arbitrary-fun-status=", i++, gvt, surr_config.director.is_surrogate_on() ? "surrogate-mode" : "high-definition"); - switch (g_tw_trigger_arbitrary_fun.active) { - case ARBITRARY_FUN_enabled: - printf("enabled\n"); + switch (g_tw_gvt_hook_trigger.status) { + case GVT_HOOK_STATUS_timestamp: + printf("timestamp\n"); break; - case ARBITRARY_FUN_disabled: + case GVT_HOOK_STATUS_disabled: printf("disabled\n"); break; - case ARBITRARY_FUN_triggered: - printf("triggered\n"); + case GVT_HOOK_STATUS_every_n_gvt: + printf("every-n-gvt\n"); + break; + case GVT_HOOK_STATUS_model_call: + printf("model-call\n"); break; } } @@ -430,16 +431,20 @@ void director_switch(tw_pe * pe, tw_stime gvt) { return; } - // Detecting if we are going to switch - if (switch_at.current_i < switch_at.total - && g_tw_trigger_arbitrary_fun.active == ARBITRARY_FUN_triggered) { + // Detecting if we are going to switch. + // + // Newer ROSS calls g_tw_gvt_hook only after the timestamp trigger fires, + // and it sets g_tw_gvt_hook_trigger.status back to GVT_HOOK_STATUS_disabled + // before entering this hook. Therefore, do not check for the old + // ARBITRARY_FUN_triggered state here; it no longer exists. + if (switch_at.current_i < switch_at.total) { double const switch_time = switch_at.time_stampts[switch_at.current_i]; #ifdef USE_RAND_TIEBREAKER - assert(g_tw_trigger_arbitrary_fun.sig_at.recv_ts == switch_at.time_stampts[switch_at.current_i]); + assert(g_tw_gvt_hook_trigger.sig_at.recv_ts == switch_time); #else - assert(g_tw_trigger_arbitrary_fun.at == switch_at.time_stampts[switch_at.current_i]); + assert(g_tw_gvt_hook_trigger.at == switch_time); #endif - assert(gvt >= switch_time); // current gvt shouldn't be that far ahead from the point we wanted to trigger it + assert(gvt >= switch_time); // current gvt should not be before the requested switch time } else { return; } @@ -457,10 +462,10 @@ void director_switch(tw_pe * pe, tw_stime gvt) { // Rollback if in optimistic mode #ifdef USE_RAND_TIEBREAKER if (g_tw_synchronization_protocol == OPTIMISTIC) { - assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0); + assert(tw_event_sig_compare_ptr(&pe->GVT_sig, &gvt_sig) == 0); rollback_and_cancel_events_pe(pe, gvt_sig); - //assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) <= 0); - assert(tw_event_sig_compare(pe->GVT_sig, gvt_sig) == 0); + //assert(tw_event_sig_compare_ptr(&pe->GVT_sig, &gvt_sig) <= 0); + assert(tw_event_sig_compare_ptr(&pe->GVT_sig, &gvt_sig) == 0); } #else if (g_tw_synchronization_protocol == OPTIMISTIC) { @@ -502,10 +507,10 @@ void director_switch(tw_pe * pe, tw_stime gvt) { tw_event_sig time_stamp = {0}; time_stamp.recv_ts = next_switch; //printf("Adding a trigger to activate next switch!\n"); - tw_trigger_arbitrary_fun_at(time_stamp); + tw_trigger_gvt_hook_at_event_sig(time_stamp); #else //printf("Adding a trigger to activate next switch!\n"); - tw_trigger_arbitrary_fun_at(next_switch); + tw_trigger_gvt_hook_at(next_switch); #endif } diff --git a/src/surrogate/zmqml/Makefile b/src/surrogate/zmqml/Makefile index 4c28ed54..b4abcfab 100644 --- a/src/surrogate/zmqml/Makefile +++ b/src/surrogate/zmqml/Makefile @@ -7,7 +7,7 @@ TARGETS=libzmqmlrequester.so demozmqmlrequester all: $(TARGETS) libzmqmlrequester.so: zmqmlrequester.o - $(CXX) -shared -o $@ $^ + $(CXX) -shared -o $@ $^ $(LDFLAGS) zmqmlrequester.o: zmqmlrequester.cpp zmqmlrequester.h $(CXX) $(CXXFLAGS) -fPIC -c $< -o $@ diff --git a/src/util/rc-stack.c b/src/util/rc-stack.c index ebb2131f..5f68123e 100644 --- a/src/util/rc-stack.c +++ b/src/util/rc-stack.c @@ -107,7 +107,7 @@ void rc_stack_gc(tw_lp const *lp, struct rc_stack *s) { while (ent != &s->head) { rc_entry *r = qlist_entry(ent, rc_entry, ql); #ifdef USE_RAND_TIEBREAKER - if (lp == NULL || tw_event_sig_compare(r->e_sig, lp->pe->GVT_sig) == -1) { + if (lp == NULL || tw_event_sig_compare_ptr(&r->e_sig, &lp->pe->GVT_sig) == -1) { #else if (lp == NULL || r->time < lp->pe->GVT){ #endif