ggml-cuda: Adding support for unified memory (llama/8035)

* Adding support for unified memory

* adding again the documentation about unified memory

* refactoring: Moved the unified memory code in the correct location.

* Fixed compilation error when using hipblas

* cleaning up the documentation

* Updating the documentation

Co-authored-by: Johannes Gäßler <johannesg@5d6.de>

* adding one more case where the PR should not be enabled

---------

Co-authored-by: matteo serva <matteo.serva@gmail.com>
Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
This commit is contained in:
matteo 2024-08-01 23:28:28 +02:00 committed by Georgi Gerganov
parent d26250f78c
commit 8e39ee171f

View File

@ -130,7 +130,22 @@ static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device)
}
return res;
#else
#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA)
cudaError_t err;
if (getenv("GGML_CUDA_ENABLE_UNIFIED_MEMORY") != nullptr)
{
err = cudaMallocManaged(ptr, size);
}
else
{
err = cudaMalloc(ptr, size);
}
return err;
#else
return cudaMalloc(ptr, size);
#endif // !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA)
#endif
}