Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 24 additions & 17 deletions scripts/generate_data_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,13 @@ def suppress_stdout():
sys.stdout.close()
sys.stdout = old_stdout

def module_dict_from_module_string(module):
module_name, module_version = module.split("/", 1)
module_dict = {"module_name": module_name, "module_version": module_version, "full_module_name": module}

def load_and_list_modules(module_name):
return module_dict

def load_and_list_modules(full_module_name):
"""
Run `module load <name>` and `module list` inside a subshell.
Returns the list of loaded modules visible inside that subshell.
Expand All @@ -56,33 +61,37 @@ def load_and_list_modules(module_name):

# Run as one shell script so the same session is used
cmd = f"""
module load {module_name} || exit 1
module load {full_module_name} || exit 1
module --terse list 2>&1
"""

result = subprocess.run(["bash", "-c", cmd], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)

if result.returncode != 0:
raise RuntimeError(f"Failed to load module '{module_name}':\n{result.stdout}")
raise RuntimeError(f"Failed to load module '{full_module_name}':\n{result.stdout}")

# Parse module list output
modules = [line for line in result.stdout.splitlines() if "/" in line]
modules = [
module_dict_from_module_string(line)
for line in result.stdout.splitlines()
if "/" in line
]

# Filter out the modules we expect to be loaded
eessi_extend_module_stub = "EESSI-extend/"
eb_module_stub = "EasyBuild/"
if module_name.startswith(eessi_extend_module_stub):
eessi_extend_module_name = "EESSI-extend"
eb_module_name = "EasyBuild"
if full_module_name.startswith(f"{eessi_extend_module_name}/"):
# Don't filter anything
pass
elif module_name.startswith(eb_module_stub):
elif full_module_name.startswith(f"{eb_module_name}/"):
# Filter EESSI-extend
modules = [module for module in modules if not module.startswith(eessi_extend_module_stub)]
modules = [module for module in modules if module["module_name"] != eessi_extend_module_name]
else:
# Filter EESSI-extend and EasyBuild
modules = [
module
for module in modules
if not module.startswith(eessi_extend_module_stub) and not module.startswith(eb_module_stub)
if module["module_name"] != eessi_extend_module_name and module["module_name"] != eb_module_name
]

return modules
Expand Down Expand Up @@ -209,7 +218,9 @@ def collect_eb_files(base_path):
# Store the toolchain hierarchies supported by the EESSI version
eessi_software["eessi_version"][eessi_version]["toolchain_hierarchy"] = {}
for top_level_toolchain in EESSI_SUPPORTED_TOP_LEVEL_TOOLCHAINS[eessi_version]:
toolchain_family = f"{top_level_toolchain['name']}_{top_level_toolchain['version']}"
# versions are typically 2024a/2024b etc. for top level toolchains
# so let's use that to make sorting easy
toolchain_family = f"{top_level_toolchain['version']}_{top_level_toolchain['name']}"
# Get the hierarchy and always add the system toolchain
eessi_software["eessi_version"][eessi_version]["toolchain_hierarchy"][toolchain_family] = [
{"name": "system", "version": "system"}
Expand Down Expand Up @@ -241,6 +252,7 @@ def collect_eb_files(base_path):
]
shutil.rmtree(easyblocks_dir)

# Store everything we now know about the installation as a dict
# Use the path as the key since we know it is unique
eessi_software["eessi_version"][eessi_version][file] = parsed_ec["ec"].asdict()
eessi_software["eessi_version"][eessi_version][file]["mtime"] = os.path.getmtime(file)
Expand All @@ -255,13 +267,8 @@ def collect_eb_files(base_path):
eessi_software["eessi_version"][eessi_version].pop(file)
continue

# Store everything we now know about the installation as a dict
# Add important data that is related to the module environment
eessi_software["eessi_version"][eessi_version][file]["full_mod_name"] = parsed_ec["full_mod_name"]
eessi_software["eessi_version"][eessi_version][file]["short_mod_name"] = parsed_ec["short_mod_name"]
eessi_software["eessi_version"][eessi_version][file]["required_modules"] = load_and_list_modules(
parsed_ec["full_mod_name"]
)
eessi_software["eessi_version"][eessi_version][file]["module"] = module_dict_from_module_string(parsed_ec["full_mod_name"])
# Retain the easyblocks used so we can use a heuristic to figure out the type of extensions (R, Python, Perl)
eessi_software["eessi_version"][eessi_version][file]["easyblocks"] = easyblocks_used

Expand Down
26 changes: 15 additions & 11 deletions scripts/process_eessi_software_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@
]

TOOLCHAIN_FAMILIES = [
"foss_2025b",
"foss_2025a",
"foss_2024a",
"foss_2023b",
"foss_2023a",
"foss_2022b",
"2025b_foss",
"2025a_foss",
"2024a_foss",
"2023b_foss",
"2023a_foss",
"2022b_foss",
]


Expand All @@ -47,7 +47,7 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool
"toolchain_families_compatibility": [
key for key in toolchain_families.keys() if file_metadata["toolchain"] in toolchain_families[key]
],
"modulename": file_metadata["short_mod_name"],
"module": file_metadata["module"],
"required_modules": file_metadata["required_modules"],
}

Expand All @@ -65,7 +65,7 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool

# 2) Construct the modulefile path
before_arch, _, _ = original_path.partition(detected_arch)
modulefile = before_arch + detected_arch + "/modules/all/" + file_metadata["short_mod_name"] + '.lua'
modulefile = before_arch + detected_arch + "/modules/all/" + file_metadata["module"]["full_module_name"] + '.lua'
spider_cache = before_arch + detected_arch + "/.lmod/cache/spiderT.lua"

# 3) Substitute each architecture and test module file existence in spider cache
Expand Down Expand Up @@ -161,11 +161,11 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool
version_dict = copy.deepcopy(base_version_dict)
version_dict["version"] = component[1]
version_dict["versionsuffix"] = ""
version_dict["type"] = "Component"
# version_dict["type"] = "Component"
version_dict["parent_software"] = {
"name": file_metadata["name"],
"version": file_metadata["version"],
"version": file_metadata["versionsuffix"],
"versionsuffix": file_metadata["versionsuffix"],
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nasty little bug

}
version_dict["description"] = (
f"""{component[0]} is a component included in the software module for {version_dict['parent_software']['name']}"""
Expand Down Expand Up @@ -228,6 +228,7 @@ def get_all_software(eessi_files_by_eessi_version):
for version in all_software_information[software]["versions"]:
if toolchain_family in version["toolchain_families_compatibility"]:
reference_version = version
break
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should break here once we have a hit since we want the most recent version.

if reference_version is None:
raise ValueError(f"No toolchain compatibility in {all_software_information[software]}")
for top_level_info in top_level_info_list + ["description"]:
Expand Down Expand Up @@ -305,7 +306,10 @@ def main():
# - versionsuffix
# - cpu_arch (list)
# - gpu_arch (list, empty for now)
# - module_file
# - module
# - module_name
# - module_version
# - full_module_name
# - required_modules (list of modules)
base_json_metadata = {"timestamp": software_metadata["timestamp"]}
eessi_versions = software_metadata["eessi_version"].keys()
Expand Down