LLamaSharp/LLama/Native/LLamaContextParams.cs

using System;
using System.Runtime.InteropServices;

namespace LLama.Native
{
    /// <summary>
    /// Called by llama.cpp with a progress value between 0 and 1
    /// </summary>
    /// <param name="progress"></param>
    /// <param name="ctx"></param>
    /// <remarks>llama_progress_callback</remarks>
    public delegate bool LlamaProgressCallback(float progress, IntPtr ctx);

    /// <summary>
    /// A C# representation of the llama.cpp `llama_context_params` struct
    /// </summary>
    [StructLayout(LayoutKind.Sequential)]
    public struct LLamaContextParams
    {
        /// <summary>
        /// RNG seed, -1 for random
        /// </summary>
        public uint seed;

        /// <summary>
        /// text context, 0 = from model
        /// </summary>
        public uint n_ctx;

        /// <summary>
        /// prompt processing batch size
        /// </summary>
        public uint n_batch;

        /// <summary>
        /// number of threads to use for generation
        /// </summary>
        public uint n_threads;

        /// <summary>
        /// number of threads to use for batch processing
        /// </summary>
        public uint n_threads_batch;

        /// <summary>
        /// RoPE scaling type, from `enum llama_rope_scaling_type`
        /// </summary>
        public RopeScalingType rope_scaling_type;

        /// <summary>
        /// RoPE base frequency, 0 = from model
        /// </summary>
        public float rope_freq_base;

        /// <summary>
        /// RoPE frequency scaling factor, 0 = from model
        /// </summary>
        public float rope_freq_scale;

        /// <summary>
        /// YaRN extrapolation mix factor, negative = from model
        /// </summary>
        public float yarn_ext_factor;

        /// <summary>
        /// YaRN magnitude scaling factor
        /// </summary>
        public float yarn_attn_factor;

        /// <summary>
        /// YaRN low correction dim
        /// </summary>
        public float yarn_beta_fast;

        /// <summary>
        /// YaRN high correction dim
        /// </summary>
        public float yarn_beta_slow;

        /// <summary>
        /// YaRN original context size
        /// </summary>
        public uint yarn_orig_ctx;

        /// <summary>
        /// defragment the KV cache if holes/size &gt; defrag_threshold, Set to &lt; 0 to disable (default)
        /// </summary>
        public float defrag_threshold;

        /// <summary>
        /// ggml_backend_sched_eval_callback
        /// </summary>
        public IntPtr cb_eval;

        /// <summary>
        /// User data passed into cb_eval
        /// </summary>
        public IntPtr cb_eval_user_data;

        /// <summary>
        /// data type for K cache
        /// </summary>
        public GGMLType type_k;

        /// <summary>
        /// data type for V cache
        /// </summary>
        public GGMLType type_v;

        /// <summary>
        /// Deprecated!
        /// </summary>
        private sbyte _logits_all;

        /// <summary>
        /// embedding mode only
        /// </summary>
        public bool embedding
        {
            readonly get => Convert.ToBoolean(_embedding);
            set => _embedding = Convert.ToSByte(value);
        }
        private sbyte _embedding;

        /// <summary>
        /// whether to offload the KQV ops (including the KV cache) to GPU
        /// </summary>
        public bool offload_kqv
        {
            readonly get => Convert.ToBoolean(_offload_kqv);
            set => _offload_kqv = Convert.ToSByte(value);
        }
        private sbyte _offload_kqv;

        /// <summary>
        /// Whether to pool (sum) embedding results by sequence id (ignored if no pooling layer)
        /// </summary>
        public bool do_pooling
        {
            readonly get => Convert.ToBoolean(_do_pooling);
            set => _do_pooling = Convert.ToSByte(value);
        }
        private sbyte _do_pooling;
    }
}