Removed all setters in `IModelParams` and `IContextParams`, allowing implementations to be immutable.

2024-01-31 17:51:50 +00:00 · 2024-01-31 17:51:50 +00:00 · 9b995510d6
parent f9a9aaabca
commit 9b995510d6
5 changed files with 34 additions and 31 deletions
--- a/LLama.Unittest/LLamaEmbedderTests.cs
+++ b/LLama.Unittest/LLamaEmbedderTests.cs
@ -9,7 +9,10 @@ public sealed class LLamaEmbedderTests

    public LLamaEmbedderTests()
    {
-        var @params = new ModelParams(Constants.ModelPath);
+        var @params = new ModelParams(Constants.ModelPath)
+        {
+            EmbeddingMode = true,
+        };
        using var weights = LLamaWeights.LoadFromFile(@params);
        _embedder = new(weights, @params);
    }
--- a/LLama/Abstractions/IContextParams.cs
+++ b/LLama/Abstractions/IContextParams.cs
@ -11,91 +11,91 @@ public interface IContextParams
    /// <summary>
    /// Model context size (n_ctx)
    /// </summary>
-    uint? ContextSize { get; set; }
+    uint? ContextSize { get; }

    /// <summary>
    /// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
    /// </summary>
-    uint BatchSize { get; set; }
+    uint BatchSize { get; }

    /// <summary>
    /// Seed for the random number generator (seed)
    /// </summary>
-    uint Seed { get; set; }
+    uint Seed { get; }

    /// <summary>
    /// Whether to use embedding mode. (embedding) Note that if this is set to true, 
    /// The LLamaModel won't produce text response anymore.
    /// </summary>
-    bool EmbeddingMode { get; set; }
+    bool EmbeddingMode { get; }

    /// <summary>
    /// RoPE base frequency (null to fetch from the model)
    /// </summary>
-    float? RopeFrequencyBase { get; set; }
+    float? RopeFrequencyBase { get; }

    /// <summary>
    /// RoPE frequency scaling factor (null to fetch from the model)
    /// </summary>
-    float? RopeFrequencyScale { get; set; }
+    float? RopeFrequencyScale { get; }

    /// <summary>
    /// The encoding to use for models
    /// </summary>
-    Encoding Encoding { get; set; }
+    Encoding Encoding { get; }

    /// <summary>
    /// Number of threads (null = autodetect) (n_threads)
    /// </summary>
-    uint? Threads { get; set; }
+    uint? Threads { get; }

    /// <summary>
    /// Number of threads to use for batch processing (null = autodetect) (n_threads)
    /// </summary>
-    uint? BatchThreads { get; set; }
+    uint? BatchThreads { get; }

    /// <summary>
    /// YaRN extrapolation mix factor (null = from model)
    /// </summary>
-    float? YarnExtrapolationFactor { get; set; }
+    float? YarnExtrapolationFactor { get; }

    /// <summary>
    /// YaRN magnitude scaling factor (null = from model)
    /// </summary>
-    float? YarnAttentionFactor { get; set; }
+    float? YarnAttentionFactor { get; }

    /// <summary>
    /// YaRN low correction dim (null = from model)
    /// </summary>
-    float? YarnBetaFast { get; set; }
+    float? YarnBetaFast { get; }

    /// <summary>
    /// YaRN high correction dim (null = from model)
    /// </summary>
-    float? YarnBetaSlow { get; set; }
+    float? YarnBetaSlow { get; }

    /// <summary>
    /// YaRN original context length (null = from model)
    /// </summary>
-    uint? YarnOriginalContext { get; set; }
+    uint? YarnOriginalContext { get; }

    /// <summary>
    /// YaRN scaling method to use.
    /// </summary>
-    RopeScalingType? YarnScalingType { get; set; }
+    RopeScalingType? YarnScalingType { get; }

    /// <summary>
    /// Override the type of the K cache
    /// </summary>
-    GGMLType? TypeK { get; set; }
+    GGMLType? TypeK { get; }

    /// <summary>
    /// Override the type of the V cache
    /// </summary>
-    GGMLType? TypeV { get; set; }
+    GGMLType? TypeV { get; }

    /// <summary>
    /// Whether to disable offloading the KQV cache to the GPU
    /// </summary>
-    bool NoKqvOffload { get; set; }
+    bool NoKqvOffload { get; }
 }
--- a/LLama/Abstractions/IModelParams.cs
+++ b/LLama/Abstractions/IModelParams.cs
@ -18,37 +18,37 @@ namespace LLama.Abstractions
        /// <summary>
        /// the GPU that is used for scratch and small tensors
        /// </summary>
-        int MainGpu { get; set; }
+        int MainGpu { get; }

        /// <summary>
        /// Number of layers to run in VRAM / GPU memory (n_gpu_layers)
        /// </summary>
-        int GpuLayerCount { get; set; }
+        int GpuLayerCount { get; }

        /// <summary>
        /// Use mmap for faster loads (use_mmap)
        /// </summary>
-        bool UseMemorymap { get; set; }
+        bool UseMemorymap { get; }

        /// <summary>
        /// Use mlock to keep model in memory (use_mlock)
        /// </summary>
-        bool UseMemoryLock { get; set; }
+        bool UseMemoryLock { get; }

        /// <summary>
        /// Model path (model)
        /// </summary>
-        string ModelPath { get; set; }
+        string ModelPath { get; }

        /// <summary>
        /// how split tensors should be distributed across GPUs
        /// </summary>
-        TensorSplitsCollection TensorSplits { get; set; }
+        TensorSplitsCollection TensorSplits { get; }

        /// <summary>
        /// Load vocab only (no weights)
        /// </summary>
-        bool VocabOnly { get; set; }
+        bool VocabOnly { get; }

        /// <summary>
        /// List of LoRA adapters to apply
@ -58,7 +58,7 @@ namespace LLama.Abstractions
        /// <summary>
        /// base model path for the lora adapter (lora_base)
        /// </summary>
-        string LoraBase { get; set; }
+        string LoraBase { get; }

        /// <summary>
        /// Override specific metadata items in the model
--- a/LLama/Extensions/IModelParamsExtensions.cs
+++ b/LLama/Extensions/IModelParamsExtensions.cs
@ -25,14 +25,12 @@ public static class IModelParamsExtensions
            throw new NotSupportedException("'UseMemoryLock' is not supported (llama_mlock_supported() == false)");
        if (@params.UseMemorymap && !NativeApi.llama_mmap_supported())
            throw new NotSupportedException("'UseMemorymap' is not supported (llama_mmap_supported() == false)");
-        if (@params.GpuLayerCount < 0)
-            @params.GpuLayerCount = int.MaxValue;

        var disposer = new GroupDisposable();

        result = NativeApi.llama_model_default_params();
        result.main_gpu = @params.MainGpu;
-        result.n_gpu_layers = @params.GpuLayerCount;
+        result.n_gpu_layers = @params.GpuLayerCount < 0 ? int.MaxValue : @params.GpuLayerCount;
        result.use_mlock = @params.UseMemoryLock;
        result.use_mmap = @params.UseMemorymap;
        result.vocab_only = @params.VocabOnly;
--- a/LLama/LLamaEmbedder.cs
+++ b/LLama/LLamaEmbedder.cs
@ -30,7 +30,9 @@ namespace LLama
        /// <param name="logger"></param>
        public LLamaEmbedder(LLamaWeights weights, IContextParams @params, ILogger? logger = null)
        {
-            @params.EmbeddingMode = true;
+            if (!@params.EmbeddingMode)
+                throw new ArgumentException("EmbeddingMode must be true", nameof(@params));
+
            Context = weights.CreateContext(@params, logger);
        }