mirror of
https://github.com/containers/podman-compose.git
synced 2024-11-25 09:23:31 +01:00
Add support for enabling GPU access
Signed-off-by: YISH <mokeyish@hotmail.com>
This commit is contained in:
parent
33d7d35a4d
commit
79865c2e13
3
.gitignore
vendored
3
.gitignore
vendored
@ -105,3 +105,6 @@ venv.bak/
|
|||||||
|
|
||||||
# mypy
|
# mypy
|
||||||
.mypy_cache/
|
.mypy_cache/
|
||||||
|
|
||||||
|
|
||||||
|
.vscode
|
||||||
|
11
examples/nvidia-smi/docker-compose.yaml
Normal file
11
examples/nvidia-smi/docker-compose.yaml
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
services:
|
||||||
|
test:
|
||||||
|
image: nvidia/cuda:12.3.1-base-ubuntu20.04
|
||||||
|
command: nvidia-smi
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
count: 1
|
||||||
|
capabilities: [gpu]
|
@ -635,6 +635,62 @@ def get_secret_args(compose, cnt, secret, podman_is_building=False):
|
|||||||
|
|
||||||
|
|
||||||
def container_to_res_args(cnt, podman_args):
|
def container_to_res_args(cnt, podman_args):
|
||||||
|
container_to_cpu_res_args(cnt, podman_args)
|
||||||
|
container_to_gpu_res_args(cnt, podman_args)
|
||||||
|
|
||||||
|
|
||||||
|
def container_to_gpu_res_args(cnt, podman_args):
|
||||||
|
# https://docs.docker.com/compose/gpu-support/
|
||||||
|
# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html
|
||||||
|
|
||||||
|
deploy = cnt.get("deploy", None) or {}
|
||||||
|
res = deploy.get("resources", None) or {}
|
||||||
|
reservations = res.get("reservations", None) or {}
|
||||||
|
devices = reservations.get("devices", [])
|
||||||
|
gpu_on = False
|
||||||
|
for device in devices:
|
||||||
|
driver = device.get("driver", None)
|
||||||
|
if driver is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
capabilities = device.get("capabilities", None)
|
||||||
|
if capabilities is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if driver != "nvidia" or "gpu" not in capabilities:
|
||||||
|
continue
|
||||||
|
|
||||||
|
count = device.get("count", "all")
|
||||||
|
device_ids = device.get("device_ids", "all")
|
||||||
|
if device_ids != "all" and len(device_ids) > 0:
|
||||||
|
for device_id in device_ids:
|
||||||
|
podman_args.extend((
|
||||||
|
"--device",
|
||||||
|
f"nvidia.com/gpu={device_id}",
|
||||||
|
))
|
||||||
|
gpu_on = True
|
||||||
|
continue
|
||||||
|
|
||||||
|
if count != "all":
|
||||||
|
for device_id in range(count):
|
||||||
|
podman_args.extend((
|
||||||
|
"--device",
|
||||||
|
f"nvidia.com/gpu={device_id}",
|
||||||
|
))
|
||||||
|
gpu_on = True
|
||||||
|
continue
|
||||||
|
|
||||||
|
podman_args.extend((
|
||||||
|
"--device",
|
||||||
|
"nvidia.com/gpu=all",
|
||||||
|
))
|
||||||
|
gpu_on = True
|
||||||
|
|
||||||
|
if gpu_on:
|
||||||
|
podman_args.append("--security-opt=label=disable")
|
||||||
|
|
||||||
|
|
||||||
|
def container_to_cpu_res_args(cnt, podman_args):
|
||||||
# v2: https://docs.docker.com/compose/compose-file/compose-file-v2/#cpu-and-other-resources
|
# v2: https://docs.docker.com/compose/compose-file/compose-file-v2/#cpu-and-other-resources
|
||||||
# cpus, cpu_shares, mem_limit, mem_reservation
|
# cpus, cpu_shares, mem_limit, mem_reservation
|
||||||
cpus_limit_v2 = try_float(cnt.get("cpus", None), None)
|
cpus_limit_v2 = try_float(cnt.get("cpus", None), None)
|
||||||
|
@ -325,3 +325,106 @@ class TestContainerToArgs(unittest.IsolatedAsyncioTestCase):
|
|||||||
"busybox",
|
"busybox",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def test_gpu(self):
|
||||||
|
c = create_compose_mock()
|
||||||
|
|
||||||
|
cnt = get_minimal_container()
|
||||||
|
cnt["command"] = ["nvidia-smi"]
|
||||||
|
cnt["deploy"] = {"resources": {"reservations": {"devices": [{}]}}}
|
||||||
|
|
||||||
|
# count: all
|
||||||
|
cnt["deploy"]["resources"]["reservations"]["devices"][0] = {
|
||||||
|
"driver": "nvidia",
|
||||||
|
"count": "all",
|
||||||
|
"capabilities": ["gpu"],
|
||||||
|
}
|
||||||
|
|
||||||
|
args = await container_to_args(c, cnt)
|
||||||
|
self.assertEqual(
|
||||||
|
args,
|
||||||
|
[
|
||||||
|
"--name=project_name_service_name1",
|
||||||
|
"-d",
|
||||||
|
"--network=bridge",
|
||||||
|
"--network-alias=service_name",
|
||||||
|
"--device",
|
||||||
|
"nvidia.com/gpu=all",
|
||||||
|
"--security-opt=label=disable",
|
||||||
|
"busybox",
|
||||||
|
"nvidia-smi",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
# count: 2
|
||||||
|
cnt["deploy"]["resources"]["reservations"]["devices"][0] = {
|
||||||
|
"driver": "nvidia",
|
||||||
|
"count": 2,
|
||||||
|
"capabilities": ["gpu"],
|
||||||
|
}
|
||||||
|
|
||||||
|
args = await container_to_args(c, cnt)
|
||||||
|
self.assertEqual(
|
||||||
|
args,
|
||||||
|
[
|
||||||
|
"--name=project_name_service_name1",
|
||||||
|
"-d",
|
||||||
|
"--network=bridge",
|
||||||
|
"--network-alias=service_name",
|
||||||
|
"--device",
|
||||||
|
"nvidia.com/gpu=0",
|
||||||
|
"--device",
|
||||||
|
"nvidia.com/gpu=1",
|
||||||
|
"--security-opt=label=disable",
|
||||||
|
"busybox",
|
||||||
|
"nvidia-smi",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
# device_ids: all
|
||||||
|
cnt["deploy"]["resources"]["reservations"]["devices"][0] = {
|
||||||
|
"driver": "nvidia",
|
||||||
|
"device_ids": "all",
|
||||||
|
"capabilities": ["gpu"],
|
||||||
|
}
|
||||||
|
|
||||||
|
args = await container_to_args(c, cnt)
|
||||||
|
self.assertEqual(
|
||||||
|
args,
|
||||||
|
[
|
||||||
|
"--name=project_name_service_name1",
|
||||||
|
"-d",
|
||||||
|
"--network=bridge",
|
||||||
|
"--network-alias=service_name",
|
||||||
|
"--device",
|
||||||
|
"nvidia.com/gpu=all",
|
||||||
|
"--security-opt=label=disable",
|
||||||
|
"busybox",
|
||||||
|
"nvidia-smi",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
# device_ids: 1,3
|
||||||
|
cnt["deploy"]["resources"]["reservations"]["devices"][0] = {
|
||||||
|
"driver": "nvidia",
|
||||||
|
"device_ids": [1, 3],
|
||||||
|
"capabilities": ["gpu"],
|
||||||
|
}
|
||||||
|
|
||||||
|
args = await container_to_args(c, cnt)
|
||||||
|
self.assertEqual(
|
||||||
|
args,
|
||||||
|
[
|
||||||
|
"--name=project_name_service_name1",
|
||||||
|
"-d",
|
||||||
|
"--network=bridge",
|
||||||
|
"--network-alias=service_name",
|
||||||
|
"--device",
|
||||||
|
"nvidia.com/gpu=1",
|
||||||
|
"--device",
|
||||||
|
"nvidia.com/gpu=3",
|
||||||
|
"--security-opt=label=disable",
|
||||||
|
"busybox",
|
||||||
|
"nvidia-smi",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user