mirror of
https://github.com/containers/podman-compose.git
synced 2025-01-24 23:08:38 +01:00
Merge pull request #920 from mokeyish/gpu
Add supoort for enabling GPU access
This commit is contained in:
commit
23ad5c3ef7
3
.gitignore
vendored
3
.gitignore
vendored
@ -105,3 +105,6 @@ venv.bak/
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
|
||||
|
||||
.vscode
|
||||
|
11
examples/nvidia-smi/docker-compose.yaml
Normal file
11
examples/nvidia-smi/docker-compose.yaml
Normal file
@ -0,0 +1,11 @@
|
||||
services:
|
||||
test:
|
||||
image: nvidia/cuda:12.3.1-base-ubuntu20.04
|
||||
command: nvidia-smi
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
@ -635,6 +635,62 @@ def get_secret_args(compose, cnt, secret, podman_is_building=False):
|
||||
|
||||
|
||||
def container_to_res_args(cnt, podman_args):
|
||||
container_to_cpu_res_args(cnt, podman_args)
|
||||
container_to_gpu_res_args(cnt, podman_args)
|
||||
|
||||
|
||||
def container_to_gpu_res_args(cnt, podman_args):
|
||||
# https://docs.docker.com/compose/gpu-support/
|
||||
# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html
|
||||
|
||||
deploy = cnt.get("deploy", None) or {}
|
||||
res = deploy.get("resources", None) or {}
|
||||
reservations = res.get("reservations", None) or {}
|
||||
devices = reservations.get("devices", [])
|
||||
gpu_on = False
|
||||
for device in devices:
|
||||
driver = device.get("driver", None)
|
||||
if driver is None:
|
||||
continue
|
||||
|
||||
capabilities = device.get("capabilities", None)
|
||||
if capabilities is None:
|
||||
continue
|
||||
|
||||
if driver != "nvidia" or "gpu" not in capabilities:
|
||||
continue
|
||||
|
||||
count = device.get("count", "all")
|
||||
device_ids = device.get("device_ids", "all")
|
||||
if device_ids != "all" and len(device_ids) > 0:
|
||||
for device_id in device_ids:
|
||||
podman_args.extend((
|
||||
"--device",
|
||||
f"nvidia.com/gpu={device_id}",
|
||||
))
|
||||
gpu_on = True
|
||||
continue
|
||||
|
||||
if count != "all":
|
||||
for device_id in range(count):
|
||||
podman_args.extend((
|
||||
"--device",
|
||||
f"nvidia.com/gpu={device_id}",
|
||||
))
|
||||
gpu_on = True
|
||||
continue
|
||||
|
||||
podman_args.extend((
|
||||
"--device",
|
||||
"nvidia.com/gpu=all",
|
||||
))
|
||||
gpu_on = True
|
||||
|
||||
if gpu_on:
|
||||
podman_args.append("--security-opt=label=disable")
|
||||
|
||||
|
||||
def container_to_cpu_res_args(cnt, podman_args):
|
||||
# v2: https://docs.docker.com/compose/compose-file/compose-file-v2/#cpu-and-other-resources
|
||||
# cpus, cpu_shares, mem_limit, mem_reservation
|
||||
cpus_limit_v2 = try_float(cnt.get("cpus", None), None)
|
||||
|
@ -325,3 +325,106 @@ class TestContainerToArgs(unittest.IsolatedAsyncioTestCase):
|
||||
"busybox",
|
||||
],
|
||||
)
|
||||
|
||||
async def test_gpu(self):
|
||||
c = create_compose_mock()
|
||||
|
||||
cnt = get_minimal_container()
|
||||
cnt["command"] = ["nvidia-smi"]
|
||||
cnt["deploy"] = {"resources": {"reservations": {"devices": [{}]}}}
|
||||
|
||||
# count: all
|
||||
cnt["deploy"]["resources"]["reservations"]["devices"][0] = {
|
||||
"driver": "nvidia",
|
||||
"count": "all",
|
||||
"capabilities": ["gpu"],
|
||||
}
|
||||
|
||||
args = await container_to_args(c, cnt)
|
||||
self.assertEqual(
|
||||
args,
|
||||
[
|
||||
"--name=project_name_service_name1",
|
||||
"-d",
|
||||
"--network=bridge",
|
||||
"--network-alias=service_name",
|
||||
"--device",
|
||||
"nvidia.com/gpu=all",
|
||||
"--security-opt=label=disable",
|
||||
"busybox",
|
||||
"nvidia-smi",
|
||||
],
|
||||
)
|
||||
|
||||
# count: 2
|
||||
cnt["deploy"]["resources"]["reservations"]["devices"][0] = {
|
||||
"driver": "nvidia",
|
||||
"count": 2,
|
||||
"capabilities": ["gpu"],
|
||||
}
|
||||
|
||||
args = await container_to_args(c, cnt)
|
||||
self.assertEqual(
|
||||
args,
|
||||
[
|
||||
"--name=project_name_service_name1",
|
||||
"-d",
|
||||
"--network=bridge",
|
||||
"--network-alias=service_name",
|
||||
"--device",
|
||||
"nvidia.com/gpu=0",
|
||||
"--device",
|
||||
"nvidia.com/gpu=1",
|
||||
"--security-opt=label=disable",
|
||||
"busybox",
|
||||
"nvidia-smi",
|
||||
],
|
||||
)
|
||||
|
||||
# device_ids: all
|
||||
cnt["deploy"]["resources"]["reservations"]["devices"][0] = {
|
||||
"driver": "nvidia",
|
||||
"device_ids": "all",
|
||||
"capabilities": ["gpu"],
|
||||
}
|
||||
|
||||
args = await container_to_args(c, cnt)
|
||||
self.assertEqual(
|
||||
args,
|
||||
[
|
||||
"--name=project_name_service_name1",
|
||||
"-d",
|
||||
"--network=bridge",
|
||||
"--network-alias=service_name",
|
||||
"--device",
|
||||
"nvidia.com/gpu=all",
|
||||
"--security-opt=label=disable",
|
||||
"busybox",
|
||||
"nvidia-smi",
|
||||
],
|
||||
)
|
||||
|
||||
# device_ids: 1,3
|
||||
cnt["deploy"]["resources"]["reservations"]["devices"][0] = {
|
||||
"driver": "nvidia",
|
||||
"device_ids": [1, 3],
|
||||
"capabilities": ["gpu"],
|
||||
}
|
||||
|
||||
args = await container_to_args(c, cnt)
|
||||
self.assertEqual(
|
||||
args,
|
||||
[
|
||||
"--name=project_name_service_name1",
|
||||
"-d",
|
||||
"--network=bridge",
|
||||
"--network-alias=service_name",
|
||||
"--device",
|
||||
"nvidia.com/gpu=1",
|
||||
"--device",
|
||||
"nvidia.com/gpu=3",
|
||||
"--security-opt=label=disable",
|
||||
"busybox",
|
||||
"nvidia-smi",
|
||||
],
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user