Skip to content

Commit 93dd668

Browse files
author
Obada Haddad
committed
change Dockerfile.compute_worker to remove docker; make GPU selection compatible with podman and docker
1 parent 6c6d0a3 commit 93dd668

2 files changed

Lines changed: 59 additions & 50 deletions

File tree

Dockerfile.compute_worker

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,7 @@ FROM --platform=linux/amd64 fedora:42
44
ENV PYTHONUNBUFFERED 1
55

66
# Install Docker
7-
RUN dnf -y install dnf-plugins-core && \
8-
dnf-3 config-manager --add-repo https://download.docker.com/linux/fedora/docker-ce.repo && \
9-
dnf -y update && \
10-
dnf install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin && \
7+
RUN dnf -y update && \
118
dnf install -y python3.9 && \
129
dnf clean all && \
1310
rm -rf /var/cache /var/log/dnf* /var/log/yum.*

compute_worker/compute_worker.py

Lines changed: 58 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -56,51 +56,54 @@
5656

5757

5858
def show_progress(line, progress):
59-
if "Status: Image is up to date" in line["status"]:
60-
logger.info(line["status"])
61-
62-
completed = False
63-
if line["status"] == "Download complete":
64-
description = f"[blue][Download complete, waiting for extraction {line['id']}]"
65-
completed = True
66-
elif line["status"] == "Downloading":
67-
description = f"[bold][Downloading {line['id']}]"
68-
elif line["status"] == "Pull complete":
69-
description = f"[green][Extraction complete {line['id']}]"
70-
completed = True
71-
elif line["status"] == "Extracting":
72-
description = f"[blue][Extracting {line['id']}]"
73-
74-
else:
75-
# skip other statuses, but show extraction progress
76-
return
77-
78-
task_id = line["id"]
79-
if task_id not in tasks.keys():
80-
if completed:
81-
# some layers are really small that they download immediately without showing
82-
# anything as Downloading in the stream.
83-
# For that case, show a completed progress bar
84-
tasks[task_id] = progress.add_task(description, total=100, completed=100)
85-
else:
86-
tasks[task_id] = progress.add_task(
87-
description, total=line["progressDetail"]["total"]
88-
)
89-
else:
90-
if completed:
91-
# due to the stream, the Download complete output can happen before the Downloading
92-
# bar outputs the 100%. So when we detect that the download is in fact complete,
93-
# update the progress bar to show 100%
94-
progress.update(
95-
tasks[task_id], description=description, total=100, completed=100
96-
)
59+
try:
60+
if "Status: Image is up to date" in line["status"]:
61+
logger.info(line["status"])
62+
63+
completed = False
64+
if line["status"] == "Download complete":
65+
description = f"[blue][Download complete, waiting for extraction {line['id']}]"
66+
completed = True
67+
elif line["status"] == "Downloading":
68+
description = f"[bold][Downloading {line['id']}]"
69+
elif line["status"] == "Pull complete":
70+
description = f"[green][Extraction complete {line['id']}]"
71+
completed = True
72+
elif line["status"] == "Extracting":
73+
description = f"[blue][Extracting {line['id']}]"
74+
9775
else:
98-
progress.update(
99-
tasks[task_id],
100-
completed=line["progressDetail"]["current"],
101-
total=line["progressDetail"]["total"],
102-
)
76+
# skip other statuses, but show extraction progress
77+
return
10378

79+
task_id = line["id"]
80+
if task_id not in tasks.keys():
81+
if completed:
82+
# some layers are really small that they download immediately without showing
83+
# anything as Downloading in the stream.
84+
# For that case, show a completed progress bar
85+
tasks[task_id] = progress.add_task(description, total=100, completed=100)
86+
else:
87+
tasks[task_id] = progress.add_task(
88+
description, total=line["progressDetail"]["total"]
89+
)
90+
else:
91+
if completed:
92+
# due to the stream, the Download complete output can happen before the Downloading
93+
# bar outputs the 100%. So when we detect that the download is in fact complete,
94+
# update the progress bar to show 100%
95+
progress.update(
96+
tasks[task_id], description=description, total=100, completed=100
97+
)
98+
else:
99+
progress.update(
100+
tasks[task_id],
101+
completed=line["progressDetail"]["current"],
102+
total=line["progressDetail"]["total"],
103+
)
104+
except Exception as e:
105+
logger.error("There was an error showing the progress bar")
106+
logger.exception(e)
104107

105108
# -----------------------------------------------
106109
# Celery + Rabbit MQ
@@ -882,16 +885,25 @@ async def _run_program_directory(self, program_dir, kind):
882885
"SYS_CHROOT",
883886
]
884887
# Configure whether or not we use the GPU. Also setting auto_remove to False because
888+
if os.environ.get("CONTAINER_ENGINE_EXECUTABLE", "docker").lower() == "docker":
889+
security_options = ["no-new-privileges"]
890+
else:
891+
security_options = ["label=disable"]
892+
# Setting the device ID like this allows users to specify which gpu to use in the .env file, with all being the default if no value is given
893+
device_id = [os.environg.get("GPU_DEVICE", "nvidia.com/gpu=all")]
885894
if os.environ.get("USE_GPU", "false").lower() == "true":
886895
logger.info("Running the container with GPU capabilities")
887896
host_config = client.create_host_config(
888897
auto_remove=False,
889898
cap_drop=cap_drop_list,
890899
binds=volumes_config,
891900
userns_mode="host",
892-
security_opt=["no-new-privileges"],
901+
security_opt=security_options,
893902
device_requests=[
894-
docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])
903+
{
904+
"Driver": "cdi",
905+
"DeviceIDs": device_id,
906+
},
895907
],
896908
)
897909
else:
@@ -900,7 +912,7 @@ async def _run_program_directory(self, program_dir, kind):
900912
cap_drop=cap_drop_list,
901913
binds=volumes_config,
902914
userns_mode="host",
903-
security_opt=["no-new-privileges"],
915+
security_opt=security_options,
904916
)
905917

906918
logger.info("Running container with command " + command)

0 commit comments

Comments
 (0)