Merge branch 'master' of github.com:SciSharp/LLamaSharp into rinne-dev

This commit is contained in:
Yaohui Liu 2023-08-06 01:30:03 +08:00
commit 9fcbd16b74
No known key found for this signature in database
GPG Key ID: E86D01E1809BD23E
9 changed files with 94 additions and 13 deletions

55
.github/workflows/main.yml vendored Normal file
View File

@ -0,0 +1,55 @@
name: CI
on:
push:
branches: [master]
pull_request:
branches: [master]
jobs:
build:
name: Test
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
build: [linux-debug, linux-release, macos-debug, macos-release, windows-debug, windows-release]
include:
- build: linux-debug
os: ubuntu-latest
config: debug
- build: linux-release
os: ubuntu-latest
config: release
- build: macos-debug
os: macos-latest
config: debug
- build: macos-release
os: macos-latest
config: release
- build: windows-debug
os: windows-2019
config: debug
- build: windows-release
os: windows-2019
config: release
steps:
- uses: actions/checkout@v2
- uses: actions/setup-dotnet@v1
with:
dotnet-version: |
6.0.x
7.0.x
- name: Cache Gradle packages
uses: actions/cache@v3
with:
key: "unit_test_models"
path: LLama.Unittest/Models
# workaround for actions/setup-dotnet#155
- name: Clear package cache
run: dotnet clean LLamaSharp.sln && dotnet nuget locals all --clear
- name: Restore packages
run: dotnet restore LLamaSharp.sln
- name: Build
run: dotnet build LLamaSharp.sln -c ${{ matrix.config }} --no-restore
- name: Test
run: dotnet test LLamaSharp.sln -c ${{ matrix.config }}

5
.gitignore vendored
View File

@ -341,4 +341,7 @@ test/TensorFlowNET.Examples/mnist
*.xsd
# docs
site/
site/
/LLama.Unittest/Models/*.bin

View File

@ -1,11 +1,15 @@
using LLama;
using LLama.Common;
namespace LLama.Unittest
{
public class BasicTest
{
[Fact]
public void SimpleQA()
public void LoadModel()
{
var model = new LLamaModel(new ModelParams("Models/llama-2-7b-chat.ggmlv3.q3_K_S.bin", contextSize: 256));
model.Dispose();
}
}
}

View File

@ -23,8 +23,22 @@
</PackageReference>
</ItemGroup>
<Target Name="DownloadContentFiles" BeforeTargets="Build">
<DownloadFile SourceUrl="https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q3_K_S.bin" DestinationFolder="Models" DestinationFileName="llama-2-7b-chat.ggmlv3.q3_K_S.bin" SkipUnchangedFiles="true">
</DownloadFile>
</Target>
<ItemGroup>
<ProjectReference Include="..\LLama\LLamaSharp.csproj" />
</ItemGroup>
<ItemGroup>
<Folder Include="Models\" />
</ItemGroup>
<ItemGroup>
<None Update="Models\llama-2-7b-chat.ggmlv3.q3_K_S.bin">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>
</Project>

View File

@ -30,6 +30,7 @@ namespace LLama.Common
/// <param name="data"></param>
public FixedSizeQueue(int size, IEnumerable<T> data)
{
#if NETCOREAPP3_0_OR_GREATER
// Try an early check on the amount of data supplied (if possible)
#if NETSTANDARD2_0
var dataCount = data.Count();
@ -52,7 +53,7 @@ namespace LLama.Common
throw new ArgumentException($"The max size set for the quene is {size}, but got {count} initial values.");
#endif
}
/
/// <summary>
/// Replace every item in the queue with the given value
/// </summary>

View File

@ -84,7 +84,7 @@ namespace LLama.Common
/// <summary>
/// how split tensors should be distributed across GPUs
/// </summary>
public float[] TensorSplits { get; set; } = new float[] { 0 };
public nint TensorSplits { get; set; }
/// <summary>
///

View File

@ -47,7 +47,8 @@ namespace LLama.Native
/// <summary>
/// how to split layers across multiple GPUs
/// </summary>
public float[] tensor_split;
public nint tensor_split;
/// <summary>
/// ref: https://github.com/ggerganov/llama.cpp/pull/2054
@ -78,6 +79,11 @@ namespace LLama.Native
[MarshalAs(UnmanagedType.I1)]
public bool low_vram;
/// <summary>
/// if true, use experimental mul_mat_q kernels
/// </summary>
[MarshalAs(UnmanagedType.I1)] public bool mul_mat_q;
/// <summary>
/// use fp16 for KV cache
/// </summary>
@ -114,9 +120,5 @@ namespace LLama.Native
[MarshalAs(UnmanagedType.I1)]
public bool embedding;
}
public struct TensorSplits
{
public float Item1;
}
}

View File

@ -28,12 +28,14 @@ namespace LLama
lparams.logits_all = @params.Perplexity;
lparams.embedding = @params.EmbeddingMode;
lparams.low_vram = @params.LowVram;
/*
if (@params.TensorSplits.Length != 1)
{
throw new ArgumentException("Currently multi-gpu support is not supported by " +
"both llama.cpp and LLamaSharp.");
}
}*/
lparams.tensor_split = @params.TensorSplits;
if (!File.Exists(@params.ModelPath))

Binary file not shown.