LLamaSharp/LLama/Extensions/IModelParamsExtensions.cs

56 lines
2.1 KiB
C#

using System.IO;
using System;
using System.Buffers;
using LLama.Abstractions;
using LLama.Native;
namespace LLama.Extensions
{
/// <summary>
/// Extention methods to the IModelParams interface
/// </summary>
public static class IModelParamsExtensions
{
/// <summary>
/// Convert the given `IModelParams` into a `LLamaContextParams`
/// </summary>
/// <param name="params"></param>
/// <param name="result"></param>
/// <returns></returns>
/// <exception cref="FileNotFoundException"></exception>
/// <exception cref="ArgumentException"></exception>
public static MemoryHandle ToLlamaContextParams(this IModelParams @params, out LLamaContextParams result)
{
if (!File.Exists(@params.ModelPath))
throw new FileNotFoundException($"The model file does not exist: {@params.ModelPath}");
if (@params.TensorSplits != null && @params.TensorSplits.Length != 1)
throw new ArgumentException("Currently multi-gpu support is not supported by both llama.cpp and LLamaSharp.");
result = NativeApi.llama_context_default_params();
result.n_ctx = @params.ContextSize;
result.n_batch = @params.BatchSize;
result.main_gpu = @params.MainGpu;
result.n_gpu_layers = @params.GpuLayerCount;
result.seed = @params.Seed;
result.f16_kv = @params.UseFp16Memory;
result.use_mmap = @params.UseMemorymap;
result.use_mlock = @params.UseMemoryLock;
result.logits_all = @params.Perplexity;
result.embedding = @params.EmbeddingMode;
result.low_vram = @params.LowVram;
result.rope_freq_base = @params.RopeFrequencyBase;
result.rope_freq_scale = @params.RopeFrequencyScale;
result.mul_mat_q = @params.MulMatQ;
var pin = @params.TensorSplits.AsMemory().Pin();
unsafe
{
result.tensor_split = (nint)pin.Pointer;
}
return pin;
}
}
}