Revert "Merge branch 'pr/268' into RuntimeDetection"
This reverts commit091b8d58b3
, reversing changes made to9b2ca9cf8e
.
This commit is contained in:
parent
200011e186
commit
5fe721bdbe
|
@ -30,7 +30,6 @@
|
|||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="7.0.0" />
|
||||
<PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-beta4" />
|
||||
<PackageReference Include="Spectre.Console" Version="0.47.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
|
|
@ -4,7 +4,7 @@ namespace LLama.Examples.NewVersion
|
|||
{
|
||||
public class GetEmbeddings
|
||||
{
|
||||
public static Task Run()
|
||||
public static void Run()
|
||||
{
|
||||
Console.Write("Please input your model path: ");
|
||||
var modelPath = Console.ReadLine();
|
||||
|
@ -23,7 +23,6 @@ namespace LLama.Examples.NewVersion
|
|||
Console.WriteLine(string.Join(", ", embedder.GetEmbeddings(text)));
|
||||
Console.WriteLine();
|
||||
}
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
{
|
||||
public class QuantizeModel
|
||||
{
|
||||
public static Task Run()
|
||||
public static void Run()
|
||||
{
|
||||
Console.Write("Please input your original model path: ");
|
||||
var inputPath = Console.ReadLine();
|
||||
|
@ -21,8 +21,6 @@
|
|||
{
|
||||
Console.WriteLine("Quantization failed!");
|
||||
}
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,54 +1,109 @@
|
|||
using System.Linq.Expressions;
|
||||
using Spectre.Console;
|
||||
|
||||
namespace LLama.Examples.NewVersion
|
||||
namespace LLama.Examples.NewVersion
|
||||
{
|
||||
public class NewVersionTestRunner
|
||||
{
|
||||
static Dictionary<string, Func<Task>> Examples = new Dictionary<string, Func<Task>>
|
||||
{
|
||||
{"Run a chat session without stripping the role names.", () => ChatSessionWithRoleName.Run()},
|
||||
{"Run a chat session with the role names stripped.",()=> ChatSessionStripRoleName.Run()},
|
||||
{"Interactive mode chat by using executor.",()=> InteractiveModeExecute.Run()},
|
||||
{"Instruct mode chat by using executor.",()=> InstructModeExecute.Run()},
|
||||
{"Stateless mode chat by using executor.",()=> StatelessModeExecute.Run()},
|
||||
{"Load and save chat session.",()=> SaveAndLoadSession.Run()},
|
||||
{"Load and save state of model and executor.",()=> LoadAndSaveState.Run()},
|
||||
{"Get embeddings from LLama model.",()=> GetEmbeddings.Run()},
|
||||
{"Quantize the model.",()=> QuantizeModel.Run()},
|
||||
{"Automatic conversation.",()=> TalkToYourself.Run()},
|
||||
{"Constrain response to json format using grammar.",()=> GrammarJsonResponse.Run()},
|
||||
{"Semantic Kernel Prompt.",()=> SemanticKernelPrompt.Run()},
|
||||
{"Semantic Kernel Chat.",()=> SemanticKernelChat.Run()},
|
||||
{"Semantic Kernel Memory.",()=> SemanticKernelMemory.Run()},
|
||||
{"Coding Assistant.",()=> CodingAssistant.Run()},
|
||||
{"Batch Decoding.",()=> BatchedDecoding.Run()},
|
||||
{"SK Kernel Memory.",()=> KernelMemory.Run()},
|
||||
{"Exit", ()=> Task.CompletedTask}
|
||||
};
|
||||
public static async Task Run()
|
||||
{
|
||||
AnsiConsole.Write(new Rule("LLamaSharp Examples"));
|
||||
Console.WriteLine("================LLamaSharp Examples (New Version)==================\n");
|
||||
|
||||
Console.WriteLine("Please input a number to choose an example to run:");
|
||||
Console.WriteLine("0: Run a chat session without stripping the role names.");
|
||||
Console.WriteLine("1: Run a chat session with the role names stripped.");
|
||||
Console.WriteLine("2: Interactive mode chat by using executor.");
|
||||
Console.WriteLine("3: Instruct mode chat by using executor.");
|
||||
Console.WriteLine("4: Stateless mode chat by using executor.");
|
||||
Console.WriteLine("5: Load and save chat session.");
|
||||
Console.WriteLine("6: Load and save state of model and executor.");
|
||||
Console.WriteLine("7: Get embeddings from LLama model.");
|
||||
Console.WriteLine("8: Quantize the model.");
|
||||
Console.WriteLine("9: Automatic conversation.");
|
||||
Console.WriteLine("10: Constrain response to json format using grammar.");
|
||||
Console.WriteLine("11: Semantic Kernel Prompt.");
|
||||
Console.WriteLine("12: Semantic Kernel Chat.");
|
||||
Console.WriteLine("13: Semantic Kernel Memory.");
|
||||
Console.WriteLine("14: Coding Assistant.");
|
||||
Console.WriteLine("15: Batch Decoding.");
|
||||
Console.WriteLine("16: SK Kernel Memory.");
|
||||
|
||||
while (true)
|
||||
{
|
||||
var choice = AnsiConsole.Prompt(
|
||||
new SelectionPrompt<string>()
|
||||
.Title("Please choose[green] an example[/] to run: ")
|
||||
.AddChoices(Examples.Keys));
|
||||
Console.Write("\nYour choice: ");
|
||||
int choice = int.Parse(Console.ReadLine());
|
||||
|
||||
|
||||
if (Examples.TryGetValue(choice, out var example))
|
||||
if (choice == 0)
|
||||
{
|
||||
if (choice == "Exit")
|
||||
{
|
||||
break;
|
||||
}
|
||||
AnsiConsole.Write(new Rule(choice));
|
||||
await example();
|
||||
await ChatSessionWithRoleName.Run();
|
||||
}
|
||||
|
||||
AnsiConsole.Clear();
|
||||
else if (choice == 1)
|
||||
{
|
||||
await ChatSessionStripRoleName.Run();
|
||||
}
|
||||
else if (choice == 2)
|
||||
{
|
||||
await InteractiveModeExecute.Run();
|
||||
}
|
||||
else if (choice == 3)
|
||||
{
|
||||
await InstructModeExecute.Run();
|
||||
}
|
||||
else if (choice == 4)
|
||||
{
|
||||
await StatelessModeExecute.Run();
|
||||
}
|
||||
else if (choice == 5)
|
||||
{
|
||||
await SaveAndLoadSession.Run();
|
||||
}
|
||||
else if (choice == 6)
|
||||
{
|
||||
await LoadAndSaveState.Run();
|
||||
}
|
||||
else if (choice == 7)
|
||||
{
|
||||
GetEmbeddings.Run();
|
||||
}
|
||||
else if (choice == 8)
|
||||
{
|
||||
QuantizeModel.Run();
|
||||
}
|
||||
else if (choice == 9)
|
||||
{
|
||||
await TalkToYourself.Run();
|
||||
}
|
||||
else if (choice == 10)
|
||||
{
|
||||
await GrammarJsonResponse.Run();
|
||||
}
|
||||
else if (choice == 11)
|
||||
{
|
||||
await SemanticKernelPrompt.Run();
|
||||
}
|
||||
else if (choice == 12)
|
||||
{
|
||||
await SemanticKernelChat.Run();
|
||||
}
|
||||
else if (choice == 13)
|
||||
{
|
||||
await SemanticKernelMemory.Run();
|
||||
}
|
||||
else if (choice == 14)
|
||||
{
|
||||
await CodingAssistant.Run();
|
||||
}
|
||||
else if (choice == 15)
|
||||
{
|
||||
await BatchedDecoding.Run();
|
||||
}
|
||||
else if (choice == 16)
|
||||
{
|
||||
await KernelMemory.Run();
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine("Cannot parse your choice. Please select again.");
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,9 +17,9 @@ namespace LLama.Web.Common
|
|||
public int MaxInstances { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Model context size (n_ctx). Null to use value from model.
|
||||
/// Model context size (n_ctx)
|
||||
/// </summary>
|
||||
public uint? ContextSize { get; set; }
|
||||
public uint ContextSize { get; set; } = 512;
|
||||
|
||||
/// <summary>
|
||||
/// the GPU that is used for scratch and small tensors
|
||||
|
|
|
@ -8,9 +8,9 @@ namespace LLama.Abstractions;
|
|||
public interface IContextParams
|
||||
{
|
||||
/// <summary>
|
||||
/// Model context size (n_ctx). Null to use value from model file.
|
||||
/// Model context size (n_ctx)
|
||||
/// </summary>
|
||||
uint? ContextSize { get; set; }
|
||||
uint ContextSize { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
|
||||
|
|
|
@ -43,7 +43,7 @@ namespace LLama.Common
|
|||
/// <param name="data"></param>
|
||||
public FixedSizeQueue(int size, IEnumerable<T> data)
|
||||
{
|
||||
#if NET6_0_OR_GREATER
|
||||
#if !NETSTANDARD2_0
|
||||
// Try to check the size without enumerating the entire IEnumerable. This may not be able to get the count,
|
||||
// in which case we'll have to check later
|
||||
if (data.TryGetNonEnumeratedCount(out var dataCount) && dataCount > size)
|
||||
|
|
|
@ -12,68 +12,105 @@ namespace LLama.Common
|
|||
public record ModelParams
|
||||
: ILLamaParams
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public uint? ContextSize { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
/// <summary>
|
||||
/// Model context size (n_ctx)
|
||||
/// </summary>
|
||||
public uint ContextSize { get; set; } = 512;
|
||||
/// <summary>
|
||||
/// the GPU that is used for scratch and small tensors
|
||||
/// </summary>
|
||||
public int MainGpu { get; set; } = 0;
|
||||
|
||||
/// <inheritdoc />
|
||||
/// <summary>
|
||||
/// Number of layers to run in VRAM / GPU memory (n_gpu_layers)
|
||||
/// </summary>
|
||||
public int GpuLayerCount { get; set; } = 20;
|
||||
|
||||
/// <inheritdoc />
|
||||
/// <summary>
|
||||
/// Seed for the random number generator (seed)
|
||||
/// </summary>
|
||||
public uint Seed { get; set; } = 0xFFFFFFFF;
|
||||
|
||||
/// <inheritdoc />
|
||||
/// <summary>
|
||||
/// Use f16 instead of f32 for memory kv (memory_f16)
|
||||
/// </summary>
|
||||
public bool UseFp16Memory { get; set; } = true;
|
||||
|
||||
/// <inheritdoc />
|
||||
/// <summary>
|
||||
/// Use mmap for faster loads (use_mmap)
|
||||
/// </summary>
|
||||
public bool UseMemorymap { get; set; } = true;
|
||||
|
||||
/// <inheritdoc />
|
||||
/// <summary>
|
||||
/// Use mlock to keep model in memory (use_mlock)
|
||||
/// </summary>
|
||||
public bool UseMemoryLock { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
/// <summary>
|
||||
/// Compute perplexity over the prompt (perplexity)
|
||||
/// </summary>
|
||||
public bool Perplexity { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
/// <summary>
|
||||
/// Model path (model)
|
||||
/// </summary>
|
||||
public string ModelPath { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
/// <summary>
|
||||
/// List of LoRAs to apply
|
||||
/// </summary>
|
||||
public AdapterCollection LoraAdapters { get; set; } = new();
|
||||
|
||||
/// <inheritdoc />
|
||||
/// <summary>
|
||||
/// base model path for the lora adapter (lora_base)
|
||||
/// </summary>
|
||||
public string LoraBase { get; set; } = string.Empty;
|
||||
|
||||
/// <inheritdoc />
|
||||
/// <summary>
|
||||
/// Number of threads (null = autodetect) (n_threads)
|
||||
/// </summary>
|
||||
public uint? Threads { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
/// <summary>
|
||||
/// Number of threads to use for batch processing (null = autodetect) (n_threads)
|
||||
/// </summary>
|
||||
public uint? BatchThreads { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
/// <summary>
|
||||
/// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
|
||||
/// </summary>
|
||||
public uint BatchSize { get; set; } = 512;
|
||||
|
||||
/// <inheritdoc />
|
||||
/// <summary>
|
||||
/// Whether to use embedding mode. (embedding) Note that if this is set to true,
|
||||
/// The LLamaModel won't produce text response anymore.
|
||||
/// </summary>
|
||||
public bool EmbeddingMode { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
/// <summary>
|
||||
/// how split tensors should be distributed across GPUs.
|
||||
/// </summary>
|
||||
/// <remarks>"[ 3, 2 ]" will assign 60% of the data to GPU 0 and 40% to GPU 1.</remarks>
|
||||
[JsonConverter(typeof(TensorSplitsCollectionConverter))]
|
||||
public TensorSplitsCollection TensorSplits { get; set; } = new();
|
||||
|
||||
/// <inheritdoc />
|
||||
public float? RopeFrequencyBase { get; set; }
|
||||
/// <summary>
|
||||
/// RoPE base frequency
|
||||
/// </summary>
|
||||
public float? RopeFrequencyBase { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public float? RopeFrequencyScale { get; set; }
|
||||
/// <summary>
|
||||
/// RoPE frequency scaling factor
|
||||
/// </summary>
|
||||
public float? RopeFrequencyScale { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool MulMatQ { get; set; }
|
||||
/// <summary>
|
||||
/// Use experimental mul_mat_q kernels
|
||||
/// </summary>
|
||||
public bool MulMatQ { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
/// <summary>
|
||||
/// Load vocab only (no weights)
|
||||
/// </summary>
|
||||
public bool VocabOnly { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
/// <summary>
|
||||
/// The encoding to use to convert text for the model
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(EncodingConverter))]
|
||||
public Encoding Encoding { get; set; } = Encoding.UTF8;
|
||||
|
||||
|
|
|
@ -9,8 +9,6 @@ namespace LLama.Extensions
|
|||
{
|
||||
return GetValueOrDefaultImpl(dictionary, key, defaultValue);
|
||||
}
|
||||
#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
|
||||
#error Target framework not supported!
|
||||
#endif
|
||||
|
||||
internal static TValue GetValueOrDefaultImpl<TKey, TValue>(IReadOnlyDictionary<TKey, TValue> dictionary, TKey key, TValue defaultValue)
|
||||
|
|
|
@ -15,8 +15,6 @@ internal static class EncodingExtensions
|
|||
{
|
||||
return GetCharCountImpl(encoding, bytes);
|
||||
}
|
||||
#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
|
||||
#error Target framework not supported!
|
||||
#endif
|
||||
|
||||
internal static int GetCharsImpl(Encoding encoding, ReadOnlySpan<byte> bytes, Span<char> output)
|
||||
|
|
|
@ -21,7 +21,7 @@ namespace LLama.Extensions
|
|||
public static void ToLlamaContextParams(this IContextParams @params, out LLamaContextParams result)
|
||||
{
|
||||
result = NativeApi.llama_context_default_params();
|
||||
result.n_ctx = @params.ContextSize ?? 0;
|
||||
result.n_ctx = @params.ContextSize;
|
||||
result.n_batch = @params.BatchSize;
|
||||
result.seed = @params.Seed;
|
||||
result.f16_kv = @params.UseFp16Memory;
|
||||
|
|
|
@ -10,8 +10,6 @@ namespace LLama.Extensions
|
|||
{
|
||||
return TakeLastImpl(source, count);
|
||||
}
|
||||
#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
|
||||
#error Target framework not supported!
|
||||
#endif
|
||||
|
||||
internal static IEnumerable<T> TakeLastImpl<T>(IEnumerable<T> source, int count)
|
||||
|
|
|
@ -19,7 +19,5 @@ internal static class KeyValuePairExtensions
|
|||
first = pair.Key;
|
||||
second = pair.Value;
|
||||
}
|
||||
#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
|
||||
#error Target framework not supported!
|
||||
#endif
|
||||
}
|
|
@ -5,7 +5,7 @@ namespace LLama.Extensions
|
|||
{
|
||||
internal static class ListExtensions
|
||||
{
|
||||
#if !NET6_0_OR_GREATER
|
||||
#if NETSTANDARD2_0
|
||||
public static void EnsureCapacity<T>(this List<T> list, int capacity)
|
||||
{
|
||||
if (list.Capacity < capacity)
|
||||
|
|
|
@ -47,7 +47,6 @@
|
|||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="ManagedCuda" Version="10.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="7.0.1" />
|
||||
</ItemGroup>
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ namespace LLama.Native
|
|||
public uint seed;
|
||||
|
||||
/// <summary>
|
||||
/// text context, 0 = from model
|
||||
/// text context
|
||||
/// </summary>
|
||||
public uint n_ctx;
|
||||
|
||||
|
|
|
@ -1,13 +1,8 @@
|
|||
using System;
|
||||
using System.Buffers;
|
||||
using System.Reflection;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Text;
|
||||
using LLama.Exceptions;
|
||||
using ManagedCuda;
|
||||
#if NET6_0_OR_GREATER
|
||||
using System.Runtime.Intrinsics.X86;
|
||||
#endif
|
||||
|
||||
#pragma warning disable IDE1006 // Naming Styles
|
||||
|
||||
|
@ -29,9 +24,8 @@ namespace LLama.Native
|
|||
{
|
||||
static NativeApi()
|
||||
{
|
||||
#if NET6_0_OR_GREATER
|
||||
NativeLibrary.SetDllImportResolver(typeof(NativeApi).Assembly, LLamaImportResolver);
|
||||
#endif
|
||||
// Try to load a preferred library, based on CPU feature detection
|
||||
TryLoadLibrary();
|
||||
|
||||
try
|
||||
{
|
||||
|
@ -50,120 +44,63 @@ namespace LLama.Native
|
|||
}
|
||||
|
||||
/// <summary>
|
||||
/// Get the cuda version if possible.
|
||||
/// Try to load libllama, using CPU feature detection to try and load a more specialised DLL if possible
|
||||
/// </summary>
|
||||
/// <returns> -1 for no cuda</returns>
|
||||
private static int GetCudaVersion()
|
||||
/// <returns>The library handle to unload later, or IntPtr.Zero if no library was loaded</returns>
|
||||
private static IntPtr TryLoadLibrary()
|
||||
{
|
||||
int deviceCount = CudaContext.GetDeviceCount();
|
||||
for (int deviceIndex = 0; deviceIndex < deviceCount; deviceIndex++)
|
||||
{
|
||||
using (CudaContext ctx = new CudaContext(deviceIndex))
|
||||
{
|
||||
var version = ctx.GetAPIVersionOfCurrentContext();
|
||||
return version.Major;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Get the xla flag for native library name.
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
private static string GetAvxFlag()
|
||||
{
|
||||
AvxLevel level = AvxLevel.None;
|
||||
#if NET6_0_OR_GREATER
|
||||
if (Avx.IsSupported) level = AvxLevel.Avx;
|
||||
if (Avx2.IsSupported) level = AvxLevel.Avx2;
|
||||
#if NET8_0_OR_GREATER
|
||||
if(Avx512F.IsSupported) level = AvxLevel.Avx512;
|
||||
#endif
|
||||
|
||||
return level switch
|
||||
{
|
||||
AvxLevel.None => "",
|
||||
AvxLevel.Avx => "-avx",
|
||||
AvxLevel.Avx2 => "-avx2",
|
||||
AvxLevel.Avx512 => "-avx512",
|
||||
};
|
||||
#else
|
||||
return string.Empty;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if NET6_0_OR_GREATER
|
||||
private static IntPtr LLamaImportResolver(string name, Assembly assembly, DllImportSearchPath? searchPath)
|
||||
{
|
||||
IntPtr handle = IntPtr.Zero;
|
||||
if(!name.Equals(libraryName))
|
||||
{
|
||||
return NativeLibrary.Load(name, assembly, searchPath);
|
||||
}
|
||||
|
||||
string libraryPath = string.Empty;
|
||||
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
|
||||
{
|
||||
var avxFlag = GetAvxFlag();
|
||||
// check cuda
|
||||
var cudaVersion = GetCudaVersion();
|
||||
if(cudaVersion == 11)
|
||||
{
|
||||
libraryPath = $"runtimes/win-x64/native/libllama-cuda11{avxFlag}.dll";
|
||||
}
|
||||
else if (cudaVersion == 12)
|
||||
{
|
||||
libraryPath = $"runtimes/win-x64/native/libllama-cuda12{avxFlag}.dll";
|
||||
}
|
||||
else if(cudaVersion == -1) // cpu version
|
||||
{
|
||||
libraryPath = $"runtimes/win-x64/native/libllama{avxFlag}.dll";
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new NotImplementedException($"Cuda version {cudaVersion} has not been supported, please compile dll yourself or open an issue in LLamaSharp.");
|
||||
}
|
||||
}
|
||||
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
|
||||
{
|
||||
var avxFlag = GetAvxFlag();
|
||||
// check cuda
|
||||
var cudaVersion = GetCudaVersion();
|
||||
if (cudaVersion == 11)
|
||||
{
|
||||
libraryPath = $"runtimes/linux-x64/native/libllama-cuda11{avxFlag}.so";
|
||||
}
|
||||
else if (cudaVersion == 12)
|
||||
{
|
||||
libraryPath = $"runtimes/linux-x64/native/libllama-cuda12{avxFlag}.so";
|
||||
}
|
||||
else if (cudaVersion == -1) // cpu version
|
||||
{
|
||||
libraryPath = $"runtimes/linux-x64/native/libllama{avxFlag}.so";
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new NotImplementedException($"Cuda version {cudaVersion} has not been supported, please compile dll yourself or open an issue in LLamaSharp.");
|
||||
}
|
||||
}
|
||||
else if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
|
||||
{
|
||||
if (System.Runtime.Intrinsics.Arm.ArmBase.Arm64.IsSupported)
|
||||
{
|
||||
libraryPath = $"runtimes/osx-arm64/native/libllama.dylib";
|
||||
}
|
||||
else
|
||||
{
|
||||
libraryPath = $"runtimes/osx-x64/native/libllama.dylib";
|
||||
}
|
||||
// All of the Windows libraries, in order of preference
|
||||
return TryLoad("cu12.1.0/libllama.dll")
|
||||
?? TryLoad("cu11.7.1/libllama.dll")
|
||||
#if NET8_0_OR_GREATER
|
||||
?? TryLoad("avx512/libllama.dll", System.Runtime.Intrinsics.X86.Avx512.IsSupported)
|
||||
#endif
|
||||
?? TryLoad("avx2/libllama.dll", System.Runtime.Intrinsics.X86.Avx2.IsSupported)
|
||||
?? TryLoad("avx/libllama.dll", System.Runtime.Intrinsics.X86.Avx.IsSupported)
|
||||
?? IntPtr.Zero;
|
||||
}
|
||||
|
||||
NativeLibrary.TryLoad(libraryPath, assembly, searchPath, out handle);
|
||||
return handle;
|
||||
}
|
||||
if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
|
||||
{
|
||||
// All of the Linux libraries, in order of preference
|
||||
return TryLoad("cu12.1.0/libllama.so")
|
||||
?? TryLoad("cu11.7.1/libllama.so")
|
||||
#if NET8_0_OR_GREATER
|
||||
?? TryLoad("avx512/libllama.so", System.Runtime.Intrinsics.X86.Avx512.IsSupported)
|
||||
#endif
|
||||
?? TryLoad("avx2/libllama.so", System.Runtime.Intrinsics.X86.Avx2.IsSupported)
|
||||
?? TryLoad("avx/libllama.so", System.Runtime.Intrinsics.X86.Avx.IsSupported)
|
||||
?? IntPtr.Zero;
|
||||
}
|
||||
|
||||
if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
|
||||
{
|
||||
return TryLoad("runtimes/macos-arm64/libllama.dylib", System.Runtime.Intrinsics.Arm.ArmBase.Arm64.IsSupported)
|
||||
?? TryLoad("runtimes/macos-x86_64/libllama.dylib")
|
||||
?? IntPtr.Zero;
|
||||
}
|
||||
#endif
|
||||
|
||||
return IntPtr.Zero;
|
||||
|
||||
#if NET6_0_OR_GREATER
|
||||
// Try to load a DLL from the path if supported. Returns null if nothing is loaded.
|
||||
static IntPtr? TryLoad(string path, bool supported = true)
|
||||
{
|
||||
if (!supported)
|
||||
return null;
|
||||
|
||||
if (NativeLibrary.TryLoad(path, out var handle))
|
||||
return handle;
|
||||
|
||||
return null;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
private const string libraryName = "libllama";
|
||||
|
||||
|
@ -637,13 +574,5 @@ namespace LLama.Native
|
|||
/// <returns></returns>
|
||||
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
|
||||
public static extern int llama_set_n_threads(SafeLLamaContextHandle ctx, uint n_threads, uint n_threads_batch);
|
||||
|
||||
private enum AvxLevel
|
||||
{
|
||||
None = 0,
|
||||
Avx = 1,
|
||||
Avx2 = 2,
|
||||
Avx512 = 3
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
|
||||
|
||||
**The C#/.NET binding of [llama.cpp](https://github.com/ggerganov/llama.cpp). It provides higher-level APIs to inference the LLaMA Models and deploy it on local device with C#/.NET. It works on
|
||||
both Windows, Linux and MAC without requirment for compiling llama.cpp yourself. Even without GPU or not enough GPU memory, you can still apply LLaMA models well with this repo. 🤗**
|
||||
both Windows, Linux and MAC without requirment for compiling llama.cpp yourself. Even without GPU or not enought GPU memory, you can still apply LLaMA models well with this repo. 🤗**
|
||||
|
||||
**Furthermore, it provides integrations with other projects such as [semantic-kernel](https://github.com/microsoft/semantic-kernel), [kernel-memory](https://github.com/microsoft/kernel-memory) and [BotSharp](https://github.com/SciSharp/BotSharp) to provide higher-level applications.**
|
||||
|
||||
|
|
Loading…
Reference in New Issue