Removed all setters in `IModelParams` and `IContextParams`, allowing implementations to be immutable.
This commit is contained in:
parent
f9a9aaabca
commit
9b995510d6
|
@ -9,7 +9,10 @@ public sealed class LLamaEmbedderTests
|
|||
|
||||
public LLamaEmbedderTests()
|
||||
{
|
||||
var @params = new ModelParams(Constants.ModelPath);
|
||||
var @params = new ModelParams(Constants.ModelPath)
|
||||
{
|
||||
EmbeddingMode = true,
|
||||
};
|
||||
using var weights = LLamaWeights.LoadFromFile(@params);
|
||||
_embedder = new(weights, @params);
|
||||
}
|
||||
|
|
|
@ -11,91 +11,91 @@ public interface IContextParams
|
|||
/// <summary>
|
||||
/// Model context size (n_ctx)
|
||||
/// </summary>
|
||||
uint? ContextSize { get; set; }
|
||||
uint? ContextSize { get; }
|
||||
|
||||
/// <summary>
|
||||
/// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
|
||||
/// </summary>
|
||||
uint BatchSize { get; set; }
|
||||
uint BatchSize { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Seed for the random number generator (seed)
|
||||
/// </summary>
|
||||
uint Seed { get; set; }
|
||||
uint Seed { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether to use embedding mode. (embedding) Note that if this is set to true,
|
||||
/// The LLamaModel won't produce text response anymore.
|
||||
/// </summary>
|
||||
bool EmbeddingMode { get; set; }
|
||||
bool EmbeddingMode { get; }
|
||||
|
||||
/// <summary>
|
||||
/// RoPE base frequency (null to fetch from the model)
|
||||
/// </summary>
|
||||
float? RopeFrequencyBase { get; set; }
|
||||
float? RopeFrequencyBase { get; }
|
||||
|
||||
/// <summary>
|
||||
/// RoPE frequency scaling factor (null to fetch from the model)
|
||||
/// </summary>
|
||||
float? RopeFrequencyScale { get; set; }
|
||||
float? RopeFrequencyScale { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The encoding to use for models
|
||||
/// </summary>
|
||||
Encoding Encoding { get; set; }
|
||||
Encoding Encoding { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of threads (null = autodetect) (n_threads)
|
||||
/// </summary>
|
||||
uint? Threads { get; set; }
|
||||
uint? Threads { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of threads to use for batch processing (null = autodetect) (n_threads)
|
||||
/// </summary>
|
||||
uint? BatchThreads { get; set; }
|
||||
uint? BatchThreads { get; }
|
||||
|
||||
/// <summary>
|
||||
/// YaRN extrapolation mix factor (null = from model)
|
||||
/// </summary>
|
||||
float? YarnExtrapolationFactor { get; set; }
|
||||
float? YarnExtrapolationFactor { get; }
|
||||
|
||||
/// <summary>
|
||||
/// YaRN magnitude scaling factor (null = from model)
|
||||
/// </summary>
|
||||
float? YarnAttentionFactor { get; set; }
|
||||
float? YarnAttentionFactor { get; }
|
||||
|
||||
/// <summary>
|
||||
/// YaRN low correction dim (null = from model)
|
||||
/// </summary>
|
||||
float? YarnBetaFast { get; set; }
|
||||
float? YarnBetaFast { get; }
|
||||
|
||||
/// <summary>
|
||||
/// YaRN high correction dim (null = from model)
|
||||
/// </summary>
|
||||
float? YarnBetaSlow { get; set; }
|
||||
float? YarnBetaSlow { get; }
|
||||
|
||||
/// <summary>
|
||||
/// YaRN original context length (null = from model)
|
||||
/// </summary>
|
||||
uint? YarnOriginalContext { get; set; }
|
||||
uint? YarnOriginalContext { get; }
|
||||
|
||||
/// <summary>
|
||||
/// YaRN scaling method to use.
|
||||
/// </summary>
|
||||
RopeScalingType? YarnScalingType { get; set; }
|
||||
RopeScalingType? YarnScalingType { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Override the type of the K cache
|
||||
/// </summary>
|
||||
GGMLType? TypeK { get; set; }
|
||||
GGMLType? TypeK { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Override the type of the V cache
|
||||
/// </summary>
|
||||
GGMLType? TypeV { get; set; }
|
||||
GGMLType? TypeV { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether to disable offloading the KQV cache to the GPU
|
||||
/// </summary>
|
||||
bool NoKqvOffload { get; set; }
|
||||
bool NoKqvOffload { get; }
|
||||
}
|
|
@ -18,37 +18,37 @@ namespace LLama.Abstractions
|
|||
/// <summary>
|
||||
/// the GPU that is used for scratch and small tensors
|
||||
/// </summary>
|
||||
int MainGpu { get; set; }
|
||||
int MainGpu { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of layers to run in VRAM / GPU memory (n_gpu_layers)
|
||||
/// </summary>
|
||||
int GpuLayerCount { get; set; }
|
||||
int GpuLayerCount { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Use mmap for faster loads (use_mmap)
|
||||
/// </summary>
|
||||
bool UseMemorymap { get; set; }
|
||||
bool UseMemorymap { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Use mlock to keep model in memory (use_mlock)
|
||||
/// </summary>
|
||||
bool UseMemoryLock { get; set; }
|
||||
bool UseMemoryLock { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Model path (model)
|
||||
/// </summary>
|
||||
string ModelPath { get; set; }
|
||||
string ModelPath { get; }
|
||||
|
||||
/// <summary>
|
||||
/// how split tensors should be distributed across GPUs
|
||||
/// </summary>
|
||||
TensorSplitsCollection TensorSplits { get; set; }
|
||||
TensorSplitsCollection TensorSplits { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Load vocab only (no weights)
|
||||
/// </summary>
|
||||
bool VocabOnly { get; set; }
|
||||
bool VocabOnly { get; }
|
||||
|
||||
/// <summary>
|
||||
/// List of LoRA adapters to apply
|
||||
|
@ -58,7 +58,7 @@ namespace LLama.Abstractions
|
|||
/// <summary>
|
||||
/// base model path for the lora adapter (lora_base)
|
||||
/// </summary>
|
||||
string LoraBase { get; set; }
|
||||
string LoraBase { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Override specific metadata items in the model
|
||||
|
|
|
@ -25,14 +25,12 @@ public static class IModelParamsExtensions
|
|||
throw new NotSupportedException("'UseMemoryLock' is not supported (llama_mlock_supported() == false)");
|
||||
if (@params.UseMemorymap && !NativeApi.llama_mmap_supported())
|
||||
throw new NotSupportedException("'UseMemorymap' is not supported (llama_mmap_supported() == false)");
|
||||
if (@params.GpuLayerCount < 0)
|
||||
@params.GpuLayerCount = int.MaxValue;
|
||||
|
||||
var disposer = new GroupDisposable();
|
||||
|
||||
result = NativeApi.llama_model_default_params();
|
||||
result.main_gpu = @params.MainGpu;
|
||||
result.n_gpu_layers = @params.GpuLayerCount;
|
||||
result.n_gpu_layers = @params.GpuLayerCount < 0 ? int.MaxValue : @params.GpuLayerCount;
|
||||
result.use_mlock = @params.UseMemoryLock;
|
||||
result.use_mmap = @params.UseMemorymap;
|
||||
result.vocab_only = @params.VocabOnly;
|
||||
|
|
|
@ -30,7 +30,9 @@ namespace LLama
|
|||
/// <param name="logger"></param>
|
||||
public LLamaEmbedder(LLamaWeights weights, IContextParams @params, ILogger? logger = null)
|
||||
{
|
||||
@params.EmbeddingMode = true;
|
||||
if (!@params.EmbeddingMode)
|
||||
throw new ArgumentException("EmbeddingMode must be true", nameof(@params));
|
||||
|
||||
Context = weights.CreateContext(@params, logger);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue