mirror of
https://github.com/ChristianLempa/boilerplates.git
synced 2024-11-25 09:44:24 +01:00
Added Nvidia_smi compose and readme
This commit is contained in:
parent
ccd4833bf4
commit
6e93d47604
@ -0,0 +1,23 @@
|
|||||||
|
# Prerequisite
|
||||||
|
|
||||||
|
NVIDIA container toolkit
|
||||||
|
sudo apt -y install build-essential nvidia-cuda-toolkit nvidia-headless-495 nvidia-utils-495 libnvidia-encode-495 \
|
||||||
|
&& distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
|
||||||
|
&& curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - \
|
||||||
|
&& curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list \
|
||||||
|
&& sudo apt update \
|
||||||
|
&& sudo apt -y install nvidia-container-toolkit nvidia-container-runtime nvidia-docker2
|
||||||
|
|
||||||
|
## Deployment
|
||||||
|
|
||||||
|
1. Modify the prometheus configuration template `/etc/prometheus/prometheus.yml` location.
|
||||||
|
# Job for Nvidia SMI exporter in prometheus config file
|
||||||
|
- job_name: 'nvidia_smi_exporter'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['nvidia_smi_exporter:9835'] # if nvidia_smi_exporter container is not on same docker network , change this line to "- targets: ['whichever ip your host is:9835']"
|
||||||
|
|
||||||
|
# Additional Referfences
|
||||||
|
[Nvidia container toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#install-guide)
|
||||||
|
[Nvidia GPU exporter Documentation](https://github.com/utkuozdemir/nvidia_gpu_exporter)
|
||||||
|
[Official Prometheus Documentation](https://prometheus.io/docs/introduction/overview/)
|
||||||
|
[Some grafana dashboard, not perfect, old, but configurable](https://grafana.com/grafana/dashboards/14574)
|
@ -0,0 +1,19 @@
|
|||||||
|
---
|
||||||
|
version: '3'
|
||||||
|
services:
|
||||||
|
nvidia_smi_exporter: #To export data from nvidia-smi, needs nvidia-smi and nvidia-container-toolkit installed on host.
|
||||||
|
image: utkuozdemir/nvidia_gpu_exporter:0.3.0
|
||||||
|
container_name: nvidia_smi_exporter
|
||||||
|
runtime: nvidia
|
||||||
|
environment:
|
||||||
|
- NVIDIA_VISIBLE_DEVICES=all
|
||||||
|
- NVIDIA_DRIVER_CAPABILITIES=all
|
||||||
|
ports:
|
||||||
|
- "9835:9835"
|
||||||
|
volumes:
|
||||||
|
- /usr/bin/nvidia-smi:/usr/bin/nvidia-smi
|
||||||
|
- /usr/lib/x86_64-linux-gnu/libnvidia-ml.so:/usr/lib/x86_64-linux-gnu/libnvidia-ml.so
|
||||||
|
- /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1:/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
#Mount points for volume work on Ubuntu 20.04
|
Loading…
Reference in New Issue
Block a user