From db7e1e88f83e0c3730884df748aa2f4ecfa13172 Mon Sep 17 00:00:00 2001 From: Jason Couture Date: Tue, 30 Jan 2024 02:39:41 -0500 Subject: [PATCH] Use llama instead of libllama in `[DllImport]` This results in windows users not needing to rename the DLL. This allows native llama builds to be dropped in, even on windows. I also took the time to update the documentation, removing references to renaming the files, since the names now match. Fixes #463 --- .github/workflows/compile.yml | 12 ++++----- CONTRIBUTING.md | 2 +- LLama/LLamaSharp.Runtime.targets | 24 +++++++++--------- LLama/Native/NativeApi.Load.cs | 2 +- .../build/LLamaSharp.Backend.Cpu.nuspec | 8 +++--- .../build/LLamaSharp.Backend.Cuda11.nuspec | 2 +- .../build/LLamaSharp.Backend.Cuda12.nuspec | 2 +- .../deps/avx/{libllama.dll => llama.dll} | Bin .../deps/avx2/{libllama.dll => llama.dll} | Bin .../deps/avx512/{libllama.dll => llama.dll} | Bin .../deps/cu11.7.1/{libllama.dll => llama.dll} | Bin .../deps/cu12.1.0/{libllama.dll => llama.dll} | Bin .../runtimes/deps/{libllama.dll => llama.dll} | Bin docs/ContributingGuide.md | 2 +- docs/index.md | 2 +- 15 files changed, 28 insertions(+), 28 deletions(-) rename LLama/runtimes/deps/avx/{libllama.dll => llama.dll} (100%) rename LLama/runtimes/deps/avx2/{libllama.dll => llama.dll} (100%) rename LLama/runtimes/deps/avx512/{libllama.dll => llama.dll} (100%) rename LLama/runtimes/deps/cu11.7.1/{libllama.dll => llama.dll} (100%) rename LLama/runtimes/deps/cu12.1.0/{libllama.dll => llama.dll} (100%) rename LLama/runtimes/deps/{libllama.dll => llama.dll} (100%) diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml index 2abd745c..97273263 100644 --- a/.github/workflows/compile.yml +++ b/.github/workflows/compile.yml @@ -204,18 +204,18 @@ jobs: cp artifacts/llama-bin-linux-avx2-x64.so/libllama.so deps/avx2/libllama.so cp artifacts/llama-bin-linux-avx512-x64.so/libllama.so deps/avx512/libllama.so - cp artifacts/llama-bin-win-noavx-x64.dll/llama.dll deps/libllama.dll - cp artifacts/llama-bin-win-avx-x64.dll/llama.dll deps/avx/libllama.dll - cp artifacts/llama-bin-win-avx2-x64.dll/llama.dll deps/avx2/libllama.dll - cp artifacts/llama-bin-win-avx512-x64.dll/llama.dll deps/avx512/libllama.dll + cp artifacts/llama-bin-win-noavx-x64.dll/llama.dll deps/llama.dll + cp artifacts/llama-bin-win-avx-x64.dll/llama.dll deps/avx/llama.dll + cp artifacts/llama-bin-win-avx2-x64.dll/llama.dll deps/avx2/llama.dll + cp artifacts/llama-bin-win-avx512-x64.dll/llama.dll deps/avx512/llama.dll cp artifacts/llama-bin-osx-arm64.dylib/libllama.dylib deps/osx-arm64/libllama.dylib cp artifacts/ggml-metal.metal/ggml-metal.metal deps/osx-arm64/ggml-metal.metal cp artifacts/llama-bin-osx-x64.dylib/libllama.dylib deps/osx-x64/libllama.dylib - cp artifacts/llama-bin-win-cublas-cu11.7.1-x64.dll/llama.dll deps/cu11.7.1/libllama.dll + cp artifacts/llama-bin-win-cublas-cu11.7.1-x64.dll/llama.dll deps/cu11.7.1/llama.dll cp artifacts/llama-bin-linux-cublas-cu11.7.1-x64.so/libllama.so deps/cu11.7.1/libllama.so - cp artifacts/llama-bin-win-cublas-cu12.1.0-x64.dll/llama.dll deps/cu12.1.0/libllama.dll + cp artifacts/llama-bin-win-cublas-cu12.1.0-x64.dll/llama.dll deps/cu12.1.0/llama.dll cp artifacts/llama-bin-linux-cublas-cu12.1.0-x64.so/libllama.so deps/cu12.1.0/libllama.so - name: Upload artifacts diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c8acd2da..d484501c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -16,7 +16,7 @@ When building from source, please add `-DBUILD_SHARED_LIBS=ON` to the cmake inst cmake .. -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=ON ``` -After running `cmake --build . --config Release`, you could find the `llama.dll`, `llama.so` or `llama.dylib` in your build directory. After pasting it to `LLamaSharp/LLama/runtimes` and renaming it to `libllama.dll`, `libllama.so` or `libllama.dylib`, you can use it as the native library in LLamaSharp. +After running `cmake --build . --config Release`, you could find the `llama.dll`, `llama.so` or `llama.dylib` in your build directory. After pasting it to `LLamaSharp/LLama/runtimes` you can use it as the native library in LLamaSharp. ## Add a new feature to LLamaSharp diff --git a/LLama/LLamaSharp.Runtime.targets b/LLama/LLamaSharp.Runtime.targets index 93600b29..f26ad24e 100644 --- a/LLama/LLamaSharp.Runtime.targets +++ b/LLama/LLamaSharp.Runtime.targets @@ -4,29 +4,29 @@ - + PreserveNewest - runtimes/win-x64/native/noavx/libllama.dll + runtimes/win-x64/native/noavx/llama.dll - + PreserveNewest - runtimes/win-x64/native/avx/libllama.dll + runtimes/win-x64/native/avx/llama.dll - + PreserveNewest - runtimes/win-x64/native/avx2/libllama.dll + runtimes/win-x64/native/avx2/llama.dll - + PreserveNewest - runtimes/win-x64/native/avx512/libllama.dll + runtimes/win-x64/native/avx512/llama.dll - + PreserveNewest - runtimes/win-x64/native/cuda11/libllama.dll + runtimes/win-x64/native/cuda11/llama.dll - + PreserveNewest - runtimes/win-x64/native/cuda12/libllama.dll + runtimes/win-x64/native/cuda12/llama.dll diff --git a/LLama/Native/NativeApi.Load.cs b/LLama/Native/NativeApi.Load.cs index 9153c1f2..0dc5abd3 100644 --- a/LLama/Native/NativeApi.Load.cs +++ b/LLama/Native/NativeApi.Load.cs @@ -329,7 +329,7 @@ namespace LLama.Native #endif } - internal const string libraryName = "libllama"; + internal const string libraryName = "llama"; private const string cudaVersionFile = "version.json"; private const string loggingPrefix = "[LLamaSharp Native]"; private static bool enableLogging = false; diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec index e7ae5e58..72183304 100644 --- a/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec +++ b/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec @@ -18,10 +18,10 @@ - - - - + + + + diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.nuspec index 4b7b6f04..43eb4005 100644 --- a/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.nuspec +++ b/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.nuspec @@ -18,7 +18,7 @@ - + diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.nuspec index d915ba4c..41d8aafb 100644 --- a/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.nuspec +++ b/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.nuspec @@ -18,7 +18,7 @@ - + diff --git a/LLama/runtimes/deps/avx/libllama.dll b/LLama/runtimes/deps/avx/llama.dll similarity index 100% rename from LLama/runtimes/deps/avx/libllama.dll rename to LLama/runtimes/deps/avx/llama.dll diff --git a/LLama/runtimes/deps/avx2/libllama.dll b/LLama/runtimes/deps/avx2/llama.dll similarity index 100% rename from LLama/runtimes/deps/avx2/libllama.dll rename to LLama/runtimes/deps/avx2/llama.dll diff --git a/LLama/runtimes/deps/avx512/libllama.dll b/LLama/runtimes/deps/avx512/llama.dll similarity index 100% rename from LLama/runtimes/deps/avx512/libllama.dll rename to LLama/runtimes/deps/avx512/llama.dll diff --git a/LLama/runtimes/deps/cu11.7.1/libllama.dll b/LLama/runtimes/deps/cu11.7.1/llama.dll similarity index 100% rename from LLama/runtimes/deps/cu11.7.1/libllama.dll rename to LLama/runtimes/deps/cu11.7.1/llama.dll diff --git a/LLama/runtimes/deps/cu12.1.0/libllama.dll b/LLama/runtimes/deps/cu12.1.0/llama.dll similarity index 100% rename from LLama/runtimes/deps/cu12.1.0/libllama.dll rename to LLama/runtimes/deps/cu12.1.0/llama.dll diff --git a/LLama/runtimes/deps/libllama.dll b/LLama/runtimes/deps/llama.dll similarity index 100% rename from LLama/runtimes/deps/libllama.dll rename to LLama/runtimes/deps/llama.dll diff --git a/docs/ContributingGuide.md b/docs/ContributingGuide.md index 1f3b3d47..458a4511 100644 --- a/docs/ContributingGuide.md +++ b/docs/ContributingGuide.md @@ -16,7 +16,7 @@ When building from source, please add `-DBUILD_SHARED_LIBS=ON` to the cmake inst cmake .. -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=ON ``` -After running `cmake --build . --config Release`, you could find the `llama.dll`, `llama.so` or `llama.dylib` in your build directory. After pasting it to `LLamaSharp/LLama/runtimes` and renaming it to `libllama.dll`, `libllama.so` or `libllama.dylib`, you can use it as the native library in LLamaSharp. +After running `cmake --build . --config Release`, you could find the `llama.dll`, `llama.so` or `llama.dylib` in your build directory. After pasting it to `LLamaSharp/LLama/runtimes` , you can use it as the native library in LLamaSharp. ## Add a new feature to LLamaSharp diff --git a/docs/index.md b/docs/index.md index 26fb68c0..5f82ccb9 100644 --- a/docs/index.md +++ b/docs/index.md @@ -20,7 +20,7 @@ LLamaSharp is the C#/.NET binding of [llama.cpp](https://github.com/ggerganov/ll If you are new to LLM, here're some tips for you to help you to get start with `LLamaSharp`. If you are experienced in this field, we'd still recommend you to take a few minutes to read it because some things perform differently compared to cpp/python. 1. The main ability of LLamaSharp is to provide an efficient way to run inference of LLM (Large Language Model) locally (and fine-tune model in the future). The model weights, however, need to be downloaded from other resources such as [huggingface](https://huggingface.co). -2. Since LLamaSharp supports multiple platforms, The nuget package is split into `LLamaSharp` and `LLama.Backend`. After installing `LLamaSharp`, please install one of `LLama.Backend.Cpu`, `LLama.Backend.Cuda11` or `LLama.Backend.Cuda12`. If you use the source code, dynamic libraries can be found in `LLama/Runtimes`. Rename the one you want to use to `libllama.dll`. +2. Since LLamaSharp supports multiple platforms, The nuget package is split into `LLamaSharp` and `LLama.Backend`. After installing `LLamaSharp`, please install one of `LLama.Backend.Cpu`, `LLama.Backend.Cuda11` or `LLama.Backend.Cuda12`. If you use the source code, dynamic libraries can be found in `LLama/Runtimes`. 3. `LLaMa` originally refers to the weights released by Meta (Facebook Research). After that, many models are fine-tuned based on it, such as `Vicuna`, `GPT4All`, and `Pyglion`. Though all of these models are supported by LLamaSharp, some steps are necessary with different file formats. There're mainly three kinds of files, which are `.pth`, `.bin (ggml)`, `.bin (quantized)`. If you have the `.bin (quantized)` file, it could be used directly by LLamaSharp. If you have the `.bin (ggml)` file, you could use it directly but get higher inference speed after the quantization. If you have the `.pth` file, you need to follow [the instructions in llama.cpp](https://github.com/ggerganov/llama.cpp#prepare-data--run) to convert it to `.bin (ggml)` file at first. 4. LLamaSharp supports GPU acceleration, but it requires cuda installation. Please install cuda 11 or cuda 12 on your system before using LLamaSharp to enable GPU. If you have another cuda version, you could compile llama.cpp from source to get the dll. For building from source, please refer to [issue #5](https://github.com/SciSharp/LLamaSharp/issues/5).