Skip to content

Commit 6732e5c

Browse files
authored
Merge pull request #11 from EESSI/module_dict
Make module information a dict with `name`, `version`, and `full_module_name`
2 parents ceb5d0d + fbcab54 commit 6732e5c

3 files changed

Lines changed: 147 additions & 61 deletions

File tree

.github/workflows/prs.yml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
name: Generate and serve API data for EESSI
2+
on:
3+
pull_request:
4+
branches:
5+
- main
6+
concurrency:
7+
group: pr-${{ github.event.pull_request.number }}
8+
cancel-in-progress: true
9+
permissions:
10+
contents: read
11+
jobs:
12+
test_data_generation:
13+
runs-on: ubuntu-latest
14+
steps:
15+
- uses: actions/checkout@v5
16+
- uses: eessi/github-action-eessi@v3
17+
with:
18+
use_eessi_module: true
19+
eessi_stack_version: "2025.06"
20+
- name: Create a virtualenv to install zensical
21+
run: |
22+
python -m venv /tmp/venv_docs
23+
source /tmp/venv_docs/bin/activate
24+
pip install zensical
25+
- name: Generate API data
26+
run: |
27+
echo "Generating data files..."
28+
module purge
29+
module unuse $MODULEPATH
30+
module use /cvmfs/software.eessi.io/init/modules/
31+
# Only do 2023.06 for EB 5 since this is just a test
32+
( module load EESSI/2023.06 && module load EasyBuild/5 && module load EESSI-extend && python scripts/generate_data_files.py --eessi-version=2023.06 ) &
33+
# Merge all these results together
34+
wait
35+
python scripts/merge_data_files.py out.yaml eessi*.yaml
36+
mv out.yaml docs/data/eessi_software_metadata.yaml
37+
# Generate json data files and markdown index/description for them
38+
cd docs/data
39+
python ../../scripts/process_eessi_software_metadata.py eessi_software_metadata.yaml eessi_api_metadata
40+
python ../../scripts/calculate_hashes.py
41+
for json_file in *.json; do
42+
python ../../scripts/generate_schema_md.py $json_file >> index.md
43+
done
44+
- name: Test building the website
45+
run: |
46+
source /tmp/venv_docs/bin/activate
47+
zensical build --clean
48+
- name: Upload EESSI API metadata
49+
uses: actions/upload-artifact@v4
50+
with:
51+
name: eessi-api-metadata
52+
path: docs/data/eessi_api_metadata_software.json

scripts/generate_data_files.py

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,14 @@ def suppress_stdout():
4747
sys.stdout = old_stdout
4848

4949

50-
def load_and_list_modules(module_name):
50+
def module_dict_from_module_string(module):
51+
module_name, module_version = module.split("/", 1)
52+
module_dict = {"module_name": module_name, "module_version": module_version, "full_module_name": module}
53+
54+
return module_dict
55+
56+
57+
def load_and_list_modules(full_module_name):
5158
"""
5259
Run `module load <name>` and `module list` inside a subshell.
5360
Returns the list of loaded modules visible inside that subshell.
@@ -56,33 +63,33 @@ def load_and_list_modules(module_name):
5663

5764
# Run as one shell script so the same session is used
5865
cmd = f"""
59-
module load {module_name} || exit 1
66+
module load {full_module_name} || exit 1
6067
module --terse list 2>&1
6168
"""
6269

6370
result = subprocess.run(["bash", "-c", cmd], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
6471

6572
if result.returncode != 0:
66-
raise RuntimeError(f"Failed to load module '{module_name}':\n{result.stdout}")
73+
raise RuntimeError(f"Failed to load module '{full_module_name}':\n{result.stdout}")
6774

6875
# Parse module list output
69-
modules = [line for line in result.stdout.splitlines() if "/" in line]
76+
modules = [module_dict_from_module_string(line) for line in result.stdout.splitlines() if "/" in line]
7077

7178
# Filter out the modules we expect to be loaded
72-
eessi_extend_module_stub = "EESSI-extend/"
73-
eb_module_stub = "EasyBuild/"
74-
if module_name.startswith(eessi_extend_module_stub):
79+
eessi_extend_module_name = "EESSI-extend"
80+
eb_module_name = "EasyBuild"
81+
if full_module_name.startswith(f"{eessi_extend_module_name}/"):
7582
# Don't filter anything
7683
pass
77-
elif module_name.startswith(eb_module_stub):
84+
elif full_module_name.startswith(f"{eb_module_name}/"):
7885
# Filter EESSI-extend
79-
modules = [module for module in modules if not module.startswith(eessi_extend_module_stub)]
86+
modules = [module for module in modules if module["module_name"] != eessi_extend_module_name]
8087
else:
8188
# Filter EESSI-extend and EasyBuild
8289
modules = [
8390
module
8491
for module in modules
85-
if not module.startswith(eessi_extend_module_stub) and not module.startswith(eb_module_stub)
92+
if module["module_name"] != eessi_extend_module_name and module["module_name"] != eb_module_name
8693
]
8794

8895
return modules
@@ -209,7 +216,9 @@ def collect_eb_files(base_path):
209216
# Store the toolchain hierarchies supported by the EESSI version
210217
eessi_software["eessi_version"][eessi_version]["toolchain_hierarchy"] = {}
211218
for top_level_toolchain in EESSI_SUPPORTED_TOP_LEVEL_TOOLCHAINS[eessi_version]:
212-
toolchain_family = f"{top_level_toolchain['name']}_{top_level_toolchain['version']}"
219+
# versions are typically 2024a/2024b etc. for top level toolchains
220+
# so let's use that to make sorting easy
221+
toolchain_family = f"{top_level_toolchain['version']}_{top_level_toolchain['name']}"
213222
# Get the hierarchy and always add the system toolchain
214223
eessi_software["eessi_version"][eessi_version]["toolchain_hierarchy"][toolchain_family] = [
215224
{"name": "system", "version": "system"}
@@ -241,6 +250,7 @@ def collect_eb_files(base_path):
241250
]
242251
shutil.rmtree(easyblocks_dir)
243252

253+
# Store everything we now know about the installation as a dict
244254
# Use the path as the key since we know it is unique
245255
eessi_software["eessi_version"][eessi_version][file] = parsed_ec["ec"].asdict()
246256
eessi_software["eessi_version"][eessi_version][file]["mtime"] = os.path.getmtime(file)
@@ -255,11 +265,8 @@ def collect_eb_files(base_path):
255265
eessi_software["eessi_version"][eessi_version].pop(file)
256266
continue
257267

258-
# Store everything we now know about the installation as a dict
259268
# Add important data that is related to the module environment
260-
eessi_software["eessi_version"][eessi_version][file]["full_mod_name"] = parsed_ec["full_mod_name"]
261-
eessi_software["eessi_version"][eessi_version][file]["short_mod_name"] = parsed_ec["short_mod_name"]
262-
eessi_software["eessi_version"][eessi_version][file]["required_modules"] = load_and_list_modules(
269+
eessi_software["eessi_version"][eessi_version][file]["module"] = module_dict_from_module_string(
263270
parsed_ec["full_mod_name"]
264271
)
265272
# Retain the easyblocks used so we can use a heuristic to figure out the type of extensions (R, Python, Perl)

scripts/process_eessi_software_metadata.py

Lines changed: 73 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@
2525
]
2626

2727
TOOLCHAIN_FAMILIES = [
28-
"foss_2025b",
29-
"foss_2025a",
30-
"foss_2024a",
31-
"foss_2023b",
32-
"foss_2023a",
33-
"foss_2022b",
28+
"2025b_foss",
29+
"2025a_foss",
30+
"2024a_foss",
31+
"2023b_foss",
32+
"2023a_foss",
33+
"2022b_foss",
3434
]
3535

3636

@@ -47,7 +47,7 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool
4747
"toolchain_families_compatibility": [
4848
key for key in toolchain_families.keys() if file_metadata["toolchain"] in toolchain_families[key]
4949
],
50-
"modulename": file_metadata["short_mod_name"],
50+
"module": file_metadata["module"],
5151
"required_modules": file_metadata["required_modules"],
5252
}
5353

@@ -65,7 +65,7 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool
6565

6666
# 2) Construct the modulefile path
6767
before_arch, _, _ = original_path.partition(detected_arch)
68-
modulefile = before_arch + detected_arch + "/modules/all/" + file_metadata["short_mod_name"] + '.lua'
68+
modulefile = before_arch + detected_arch + "/modules/all/" + file_metadata["module"]["full_module_name"] + ".lua"
6969
spider_cache = before_arch + detected_arch + "/.lmod/cache/spiderT.lua"
7070

7171
# 3) Substitute each architecture and test module file existence in spider cache
@@ -93,60 +93,77 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool
9393
version_dict["versionsuffix"] = file_metadata["versionsuffix"]
9494
# No need for as we separate out the different types
9595
# version_dict['type'] = "application"
96-
software[file_metadata["name"]]["versions"].append(version_dict)
97-
# - Now extensions
96+
# - Now extensions, we keep them both separately for each type and
97+
# as dicts with extension types in the specific installation
98+
version_dict["extensions"] = []
9899
python_extensions = {}
99100
perl_extensions = {}
100101
r_extensions = {}
101102
octave_extensions = {}
102103
ruby_extensions = {}
103104
for ext in file_metadata["exts_list"]:
104-
version_dict = copy.deepcopy(base_version_dict)
105+
ext_version_dict = copy.deepcopy(base_version_dict)
105106
# (extensions are tuples beginning with name and version)
106-
version_dict["version"] = ext[1]
107-
version_dict["versionsuffix"] = ""
107+
ext_version_dict["version"] = ext[1]
108+
ext_version_dict["versionsuffix"] = ""
108109
# Add the parent software name so we can make a set for all versions
109-
version_dict["parent_software"] = {
110+
ext_version_dict["parent_software"] = {
110111
"name": file_metadata["name"],
111112
"version": file_metadata["version"],
112113
"versionsuffix": file_metadata["versionsuffix"],
113114
}
114115
# First we do a heuristic to figure out the type of extension
115116
if "pythonpackage.py" in file_metadata["easyblocks"]:
116-
version_dict["description"] = (
117-
f"""{ext[0]} is a Python package included in the software module for {version_dict['parent_software']['name']}"""
117+
# First add it to our list of extensions for the parent software
118+
version_dict["extensions"].append({"type": "python", "name": ext[0], "version": ext[1]})
119+
120+
# Now create the custom entry
121+
ext_version_dict["description"] = (
122+
f"""{ext[0]} is a Python package included in the software module for {ext_version_dict['parent_software']['name']}"""
118123
)
119124
python_extensions[ext[0]] = {"versions": [], "parent_software": set()}
120-
python_extensions[ext[0]]["versions"].append(version_dict)
121-
python_extensions[ext[0]]["parent_software"].add(version_dict["parent_software"]["name"])
125+
python_extensions[ext[0]]["versions"].append(ext_version_dict)
126+
python_extensions[ext[0]]["parent_software"].add(ext_version_dict["parent_software"]["name"])
122127
elif "rpackage.py" in file_metadata["easyblocks"]:
123-
version_dict["description"] = (
124-
f"""{ext[0]} is an R package included in the software module for {version_dict['parent_software']['name']}"""
128+
# First add it to our list of extensions for the parent software
129+
version_dict["extensions"].append({"type": "r", "name": ext[0], "version": ext[1]})
130+
131+
ext_version_dict["description"] = (
132+
f"""{ext[0]} is an R package included in the software module for {ext_version_dict['parent_software']['name']}"""
125133
)
126134
r_extensions[ext[0]] = {"versions": [], "parent_software": set()}
127-
r_extensions[ext[0]]["versions"].append(version_dict)
128-
r_extensions[ext[0]]["parent_software"].add(version_dict["parent_software"]["name"])
135+
r_extensions[ext[0]]["versions"].append(ext_version_dict)
136+
r_extensions[ext[0]]["parent_software"].add(ext_version_dict["parent_software"]["name"])
129137
elif "perlmodule.py" in file_metadata["easyblocks"]:
130-
version_dict["description"] = (
131-
f"""{ext[0]} is a Perl module package included in the software module for {version_dict['parent_software']['name']}"""
138+
# First add it to our list of extensions for the parent software
139+
version_dict["extensions"].append({"type": "perl", "name": ext[0], "version": ext[1]})
140+
141+
ext_version_dict["description"] = (
142+
f"""{ext[0]} is a Perl module package included in the software module for {ext_version_dict['parent_software']['name']}"""
132143
)
133144
perl_extensions[ext[0]] = {"versions": [], "parent_software": set()}
134-
perl_extensions[ext[0]]["versions"].append(version_dict)
135-
perl_extensions[ext[0]]["parent_software"].add(version_dict["parent_software"]["name"])
145+
perl_extensions[ext[0]]["versions"].append(ext_version_dict)
146+
perl_extensions[ext[0]]["parent_software"].add(ext_version_dict["parent_software"]["name"])
136147
elif "octavepackage.py" in file_metadata["easyblocks"]:
137-
version_dict["description"] = (
138-
f"""{ext[0]} is an Octave package included in the software module for {version_dict['parent_software']['name']}"""
148+
# First add it to our list of extensions for the parent software
149+
version_dict["extensions"].append({"type": "octave", "name": ext[0], "version": ext[1]})
150+
151+
ext_version_dict["description"] = (
152+
f"""{ext[0]} is an Octave package included in the software module for {ext_version_dict['parent_software']['name']}"""
139153
)
140154
octave_extensions[ext[0]] = {"versions": [], "parent_software": set()}
141-
octave_extensions[ext[0]]["versions"].append(version_dict)
142-
octave_extensions[ext[0]]["parent_software"].add(version_dict["parent_software"]["name"])
155+
octave_extensions[ext[0]]["versions"].append(ext_version_dict)
156+
octave_extensions[ext[0]]["parent_software"].add(ext_version_dict["parent_software"]["name"])
143157
elif "rubygem.py" in file_metadata["easyblocks"]:
144-
version_dict["description"] = (
145-
f"""{ext[0]} is an Ruby gem included in the software module for {version_dict['parent_software']['name']}"""
158+
# First add it to our list of extensions for the parent software
159+
version_dict["extensions"].append({"type": "ruby", "name": ext[0], "version": ext[1]})
160+
161+
ext_version_dict["description"] = (
162+
f"""{ext[0]} is an Ruby gem included in the software module for {ext_version_dict['parent_software']['name']}"""
146163
)
147164
ruby_extensions[ext[0]] = {"versions": [], "parent_software": set()}
148-
ruby_extensions[ext[0]]["versions"].append(version_dict)
149-
ruby_extensions[ext[0]]["parent_software"].add(version_dict["parent_software"]["name"])
165+
ruby_extensions[ext[0]]["versions"].append(ext_version_dict)
166+
ruby_extensions[ext[0]]["parent_software"].add(ext_version_dict["parent_software"]["name"])
150167
else:
151168
raise ValueError(
152169
f"Only known extension types are R, Python and Perl! Easyblocks used by {original_path} were {file_metadata['easyblocks']}"
@@ -155,24 +172,30 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool
155172
components = {}
156173
if "components" in file_metadata.keys():
157174
for component in file_metadata["components"]:
175+
# First add it to our list of extensions for the parent software
176+
version_dict["extensions"].append({"type": "component", "name": component[0], "version": component[1]})
177+
158178
# extensions are tuples beginning with name and version
159179
if component[0] not in components.keys():
160180
components[component[0]] = {"versions": [], "parent_software": set()}
161-
version_dict = copy.deepcopy(base_version_dict)
162-
version_dict["version"] = component[1]
163-
version_dict["versionsuffix"] = ""
164-
version_dict["type"] = "Component"
165-
version_dict["parent_software"] = {
181+
ext_version_dict = copy.deepcopy(base_version_dict)
182+
ext_version_dict["version"] = component[1]
183+
ext_version_dict["versionsuffix"] = ""
184+
# version_dict["type"] = "Component"
185+
ext_version_dict["parent_software"] = {
166186
"name": file_metadata["name"],
167187
"version": file_metadata["version"],
168-
"version": file_metadata["versionsuffix"],
188+
"versionsuffix": file_metadata["versionsuffix"],
169189
}
170-
version_dict["description"] = (
171-
f"""{component[0]} is a component included in the software module for {version_dict['parent_software']['name']}"""
190+
ext_version_dict["description"] = (
191+
f"""{component[0]} is a component included in the software module for {ext_version_dict['parent_software']['name']}"""
172192
)
173-
components[component[0]]["versions"].append(version_dict)
174-
components[component[0]]["parent_software"].add(version_dict["parent_software"]["name"])
175-
# print(f"Software: {software}, Python: {python_extensions}, Perl: {perl_extensions}, R: {r_extensions}, Component: {components}")
193+
components[component[0]]["versions"].append(ext_version_dict)
194+
components[component[0]]["parent_software"].add(ext_version_dict["parent_software"]["name"])
195+
196+
# Now that we've processed all the information let's add the entry
197+
software[file_metadata["name"]]["versions"].append(version_dict)
198+
176199
return software, {
177200
"python": python_extensions,
178201
"perl": perl_extensions,
@@ -228,6 +251,7 @@ def get_all_software(eessi_files_by_eessi_version):
228251
for version in all_software_information[software]["versions"]:
229252
if toolchain_family in version["toolchain_families_compatibility"]:
230253
reference_version = version
254+
break
231255
if reference_version is None:
232256
raise ValueError(f"No toolchain compatibility in {all_software_information[software]}")
233257
for top_level_info in top_level_info_list + ["description"]:
@@ -305,7 +329,10 @@ def main():
305329
# - versionsuffix
306330
# - cpu_arch (list)
307331
# - gpu_arch (list, empty for now)
308-
# - module_file
332+
# - module
333+
# - module_name
334+
# - module_version
335+
# - full_module_name
309336
# - required_modules (list of modules)
310337
base_json_metadata = {"timestamp": software_metadata["timestamp"]}
311338
eessi_versions = software_metadata["eessi_version"].keys()

0 commit comments

Comments
 (0)