Merge branch 'master' of github.com:SciSharp/LLamaSharp into rinne-dev

2023-08-06 01:30:03 +08:00 · 2023-08-06 01:30:03 +08:00 · 9fcbd16b74
parent 2968125daf fa75e9d964
commit 9fcbd16b74
9 changed files with 94 additions and 13 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -0,0 +1,55 @@
+name: CI
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: [master]
+
+jobs:
+  build:
+    name: Test
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        build: [linux-debug, linux-release, macos-debug, macos-release, windows-debug, windows-release]
+        include:
+          - build: linux-debug
+            os: ubuntu-latest
+            config: debug
+          - build: linux-release
+            os: ubuntu-latest
+            config: release
+          - build: macos-debug
+            os: macos-latest
+            config: debug
+          - build: macos-release
+            os: macos-latest
+            config: release
+          - build: windows-debug
+            os: windows-2019
+            config: debug
+          - build: windows-release
+            os: windows-2019
+            config: release
+    steps:
+    - uses: actions/checkout@v2
+    - uses: actions/setup-dotnet@v1
+      with:
+        dotnet-version: | 
+          6.0.x
+          7.0.x
+    - name: Cache Gradle packages
+      uses: actions/cache@v3
+      with:
+        key: "unit_test_models"
+        path: LLama.Unittest/Models
+    #  workaround for actions/setup-dotnet#155
+    - name: Clear package cache
+      run: dotnet clean LLamaSharp.sln && dotnet nuget locals all --clear
+    - name: Restore packages
+      run: dotnet restore LLamaSharp.sln
+    - name: Build
+      run: dotnet build LLamaSharp.sln -c ${{ matrix.config }} --no-restore
+    - name: Test
+      run: dotnet test LLamaSharp.sln -c ${{ matrix.config }}
--- a/.gitignore
+++ b/.gitignore
@ -341,4 +341,7 @@ test/TensorFlowNET.Examples/mnist
 *.xsd

 # docs
-site/
+site/
+
+/LLama.Unittest/Models/*.bin
+
--- a/LLama.Unittest/BasicTest.cs
+++ b/LLama.Unittest/BasicTest.cs
@ -1,11 +1,15 @@
+using LLama;
+using LLama.Common;
+
 namespace LLama.Unittest
 {
    public class BasicTest
    {
        [Fact]
-        public void SimpleQA()
+        public void LoadModel()
        {
-            
+            var model = new LLamaModel(new ModelParams("Models/llama-2-7b-chat.ggmlv3.q3_K_S.bin", contextSize: 256));
+            model.Dispose();
        }
    }
 }
--- a/LLama.Unittest/LLama.Unittest.csproj
+++ b/LLama.Unittest/LLama.Unittest.csproj
@ -23,8 +23,22 @@
    </PackageReference>
  </ItemGroup>

+  <Target Name="DownloadContentFiles" BeforeTargets="Build">
+      <DownloadFile SourceUrl="https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q3_K_S.bin" DestinationFolder="Models" DestinationFileName="llama-2-7b-chat.ggmlv3.q3_K_S.bin" SkipUnchangedFiles="true">
+    </DownloadFile>
+  </Target>
+
  <ItemGroup>
    <ProjectReference Include="..\LLama\LLamaSharp.csproj" />
  </ItemGroup>

+  <ItemGroup>
+    <Folder Include="Models\" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <None Update="Models\llama-2-7b-chat.ggmlv3.q3_K_S.bin">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+  </ItemGroup>
 </Project>
--- a/LLama/Common/FixedSizeQueue.cs
+++ b/LLama/Common/FixedSizeQueue.cs
@ -30,6 +30,7 @@ namespace LLama.Common
        /// <param name="data"></param>
        public FixedSizeQueue(int size, IEnumerable<T> data)
        {
+#if NETCOREAPP3_0_OR_GREATER
            // Try an early check on the amount of data supplied (if possible)
 #if NETSTANDARD2_0
            var dataCount = data.Count();
@ -52,7 +53,7 @@ namespace LLama.Common
                throw new ArgumentException($"The max size set for the quene is {size}, but got {count} initial values.");
 #endif
        }
-
+/
        /// <summary>
        /// Replace every item in the queue with the given value
        /// </summary>
--- a/LLama/Common/ModelParams.cs
+++ b/LLama/Common/ModelParams.cs
@ -84,7 +84,7 @@ namespace LLama.Common
        /// <summary>
        /// how split tensors should be distributed across GPUs
        /// </summary>
-        public float[] TensorSplits { get; set; } = new float[] { 0 };
+        public nint TensorSplits { get; set; }

        /// <summary>
        /// 
--- a/LLama/Native/LLamaContextParams.cs
+++ b/LLama/Native/LLamaContextParams.cs
@ -47,7 +47,8 @@ namespace LLama.Native
        /// <summary>
        /// how to split layers across multiple GPUs
        /// </summary>
-        public float[] tensor_split;
+        public nint tensor_split;
+

        /// <summary>
        /// ref: https://github.com/ggerganov/llama.cpp/pull/2054
@ -78,6 +79,11 @@ namespace LLama.Native
        [MarshalAs(UnmanagedType.I1)]
        public bool low_vram;

+        /// <summary>
+        /// if true, use experimental mul_mat_q kernels
+        /// </summary>
+        [MarshalAs(UnmanagedType.I1)] public bool mul_mat_q;
+
        /// <summary>
        /// use fp16 for KV cache
        /// </summary>
@ -114,9 +120,5 @@ namespace LLama.Native
        [MarshalAs(UnmanagedType.I1)] 
        public bool embedding;
    }
-
-    public struct TensorSplits
-    {
-        public float Item1;
-    }
 }
+
--- a/LLama/Utils.cs
+++ b/LLama/Utils.cs
@ -28,12 +28,14 @@ namespace LLama
            lparams.logits_all = @params.Perplexity;
            lparams.embedding = @params.EmbeddingMode;
            lparams.low_vram = @params.LowVram;
-       
+
+            /*
            if (@params.TensorSplits.Length != 1)
            {
                throw new ArgumentException("Currently multi-gpu support is not supported by " +
                    "both llama.cpp and LLamaSharp.");
-            }
+            }*/
+
            lparams.tensor_split = @params.TensorSplits;

            if (!File.Exists(@params.ModelPath))
--- a/LLama/runtimes/libllama.dylib
+++ b/LLama/runtimes/libllama.dylib