Merge branch 'master' of github.com:SciSharp/LLamaSharp into rinne-dev
This commit is contained in:
commit
9fcbd16b74
|
@ -0,0 +1,55 @@
|
|||
name: CI
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
pull_request:
|
||||
branches: [master]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Test
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build: [linux-debug, linux-release, macos-debug, macos-release, windows-debug, windows-release]
|
||||
include:
|
||||
- build: linux-debug
|
||||
os: ubuntu-latest
|
||||
config: debug
|
||||
- build: linux-release
|
||||
os: ubuntu-latest
|
||||
config: release
|
||||
- build: macos-debug
|
||||
os: macos-latest
|
||||
config: debug
|
||||
- build: macos-release
|
||||
os: macos-latest
|
||||
config: release
|
||||
- build: windows-debug
|
||||
os: windows-2019
|
||||
config: debug
|
||||
- build: windows-release
|
||||
os: windows-2019
|
||||
config: release
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/setup-dotnet@v1
|
||||
with:
|
||||
dotnet-version: |
|
||||
6.0.x
|
||||
7.0.x
|
||||
- name: Cache Gradle packages
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
key: "unit_test_models"
|
||||
path: LLama.Unittest/Models
|
||||
# workaround for actions/setup-dotnet#155
|
||||
- name: Clear package cache
|
||||
run: dotnet clean LLamaSharp.sln && dotnet nuget locals all --clear
|
||||
- name: Restore packages
|
||||
run: dotnet restore LLamaSharp.sln
|
||||
- name: Build
|
||||
run: dotnet build LLamaSharp.sln -c ${{ matrix.config }} --no-restore
|
||||
- name: Test
|
||||
run: dotnet test LLamaSharp.sln -c ${{ matrix.config }}
|
|
@ -341,4 +341,7 @@ test/TensorFlowNET.Examples/mnist
|
|||
*.xsd
|
||||
|
||||
# docs
|
||||
site/
|
||||
site/
|
||||
|
||||
/LLama.Unittest/Models/*.bin
|
||||
|
||||
|
|
|
@ -1,11 +1,15 @@
|
|||
using LLama;
|
||||
using LLama.Common;
|
||||
|
||||
namespace LLama.Unittest
|
||||
{
|
||||
public class BasicTest
|
||||
{
|
||||
[Fact]
|
||||
public void SimpleQA()
|
||||
public void LoadModel()
|
||||
{
|
||||
|
||||
var model = new LLamaModel(new ModelParams("Models/llama-2-7b-chat.ggmlv3.q3_K_S.bin", contextSize: 256));
|
||||
model.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -23,8 +23,22 @@
|
|||
</PackageReference>
|
||||
</ItemGroup>
|
||||
|
||||
<Target Name="DownloadContentFiles" BeforeTargets="Build">
|
||||
<DownloadFile SourceUrl="https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q3_K_S.bin" DestinationFolder="Models" DestinationFileName="llama-2-7b-chat.ggmlv3.q3_K_S.bin" SkipUnchangedFiles="true">
|
||||
</DownloadFile>
|
||||
</Target>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\LLama\LLamaSharp.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Folder Include="Models\" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Update="Models\llama-2-7b-chat.ggmlv3.q3_K_S.bin">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
|
|
|
@ -30,6 +30,7 @@ namespace LLama.Common
|
|||
/// <param name="data"></param>
|
||||
public FixedSizeQueue(int size, IEnumerable<T> data)
|
||||
{
|
||||
#if NETCOREAPP3_0_OR_GREATER
|
||||
// Try an early check on the amount of data supplied (if possible)
|
||||
#if NETSTANDARD2_0
|
||||
var dataCount = data.Count();
|
||||
|
@ -52,7 +53,7 @@ namespace LLama.Common
|
|||
throw new ArgumentException($"The max size set for the quene is {size}, but got {count} initial values.");
|
||||
#endif
|
||||
}
|
||||
|
||||
/
|
||||
/// <summary>
|
||||
/// Replace every item in the queue with the given value
|
||||
/// </summary>
|
||||
|
|
|
@ -84,7 +84,7 @@ namespace LLama.Common
|
|||
/// <summary>
|
||||
/// how split tensors should be distributed across GPUs
|
||||
/// </summary>
|
||||
public float[] TensorSplits { get; set; } = new float[] { 0 };
|
||||
public nint TensorSplits { get; set; }
|
||||
|
||||
/// <summary>
|
||||
///
|
||||
|
|
|
@ -47,7 +47,8 @@ namespace LLama.Native
|
|||
/// <summary>
|
||||
/// how to split layers across multiple GPUs
|
||||
/// </summary>
|
||||
public float[] tensor_split;
|
||||
public nint tensor_split;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// ref: https://github.com/ggerganov/llama.cpp/pull/2054
|
||||
|
@ -78,6 +79,11 @@ namespace LLama.Native
|
|||
[MarshalAs(UnmanagedType.I1)]
|
||||
public bool low_vram;
|
||||
|
||||
/// <summary>
|
||||
/// if true, use experimental mul_mat_q kernels
|
||||
/// </summary>
|
||||
[MarshalAs(UnmanagedType.I1)] public bool mul_mat_q;
|
||||
|
||||
/// <summary>
|
||||
/// use fp16 for KV cache
|
||||
/// </summary>
|
||||
|
@ -114,9 +120,5 @@ namespace LLama.Native
|
|||
[MarshalAs(UnmanagedType.I1)]
|
||||
public bool embedding;
|
||||
}
|
||||
|
||||
public struct TensorSplits
|
||||
{
|
||||
public float Item1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -28,12 +28,14 @@ namespace LLama
|
|||
lparams.logits_all = @params.Perplexity;
|
||||
lparams.embedding = @params.EmbeddingMode;
|
||||
lparams.low_vram = @params.LowVram;
|
||||
|
||||
|
||||
/*
|
||||
if (@params.TensorSplits.Length != 1)
|
||||
{
|
||||
throw new ArgumentException("Currently multi-gpu support is not supported by " +
|
||||
"both llama.cpp and LLamaSharp.");
|
||||
}
|
||||
}*/
|
||||
|
||||
lparams.tensor_split = @params.TensorSplits;
|
||||
|
||||
if (!File.Exists(@params.ModelPath))
|
||||
|
|
Binary file not shown.
Loading…
Reference in New Issue