115 lines
3.2 KiB
C#
115 lines
3.2 KiB
C#
namespace LLama.Native
|
|
{
|
|
/// <summary>
|
|
/// Supported model file types
|
|
/// </summary>
|
|
public enum LLamaFtype
|
|
{
|
|
/// <summary>
|
|
/// All f32
|
|
/// </summary>
|
|
/// <remarks>Benchmark@7B: 26GB</remarks>
|
|
LLAMA_FTYPE_ALL_F32 = 0,
|
|
|
|
/// <summary>
|
|
/// Mostly f16
|
|
/// </summary>
|
|
/// <remarks>Benchmark@7B: 13GB</remarks>
|
|
LLAMA_FTYPE_MOSTLY_F16 = 1,
|
|
|
|
/// <summary>
|
|
/// Mostly 8 bit
|
|
/// </summary>
|
|
/// <remarks>Benchmark@7B: 6.7GB, +0.0004ppl</remarks>
|
|
LLAMA_FTYPE_MOSTLY_Q8_0 = 7,
|
|
|
|
/// <summary>
|
|
/// Mostly 4 bit
|
|
/// </summary>
|
|
/// <remarks>Benchmark@7B: 3.50GB, +0.2499 ppl</remarks>
|
|
LLAMA_FTYPE_MOSTLY_Q4_0 = 2,
|
|
|
|
/// <summary>
|
|
/// Mostly 4 bit
|
|
/// </summary>
|
|
/// <remarks>Benchmark@7B: 3.90GB, +0.1846 ppl</remarks>
|
|
LLAMA_FTYPE_MOSTLY_Q4_1 = 3,
|
|
|
|
/// <summary>
|
|
/// Mostly 4 bit, tok_embeddings.weight and output.weight are f16
|
|
/// </summary>
|
|
LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4,
|
|
|
|
/// <summary>
|
|
/// Mostly 5 bit
|
|
/// </summary>
|
|
/// <remarks>Benchmark@7B: 4.30GB @ 7B tokens, +0.0796 ppl</remarks>
|
|
LLAMA_FTYPE_MOSTLY_Q5_0 = 8,
|
|
|
|
/// <summary>
|
|
/// Mostly 5 bit
|
|
/// </summary>
|
|
/// <remarks>Benchmark@7B: 4.70GB, +0.0415 ppl</remarks>
|
|
LLAMA_FTYPE_MOSTLY_Q5_1 = 9,
|
|
|
|
/// <summary>
|
|
/// K-Quant 2 bit
|
|
/// </summary>
|
|
/// <remarks>Benchmark@7B: 2.67GB @ 7N parameters, +0.8698 ppl</remarks>
|
|
LLAMA_FTYPE_MOSTLY_Q2_K = 10,
|
|
|
|
/// <summary>
|
|
/// K-Quant 3 bit (Small)
|
|
/// </summary>
|
|
/// <remarks>Benchmark@7B: 2.75GB, +0.5505 ppl</remarks>
|
|
LLAMA_FTYPE_MOSTLY_Q3_K_S = 11,
|
|
|
|
/// <summary>
|
|
/// K-Quant 3 bit (Medium)
|
|
/// </summary>
|
|
/// <remarks>Benchmark@7B: 3.06GB, +0.2437 ppl</remarks>
|
|
LLAMA_FTYPE_MOSTLY_Q3_K_M = 12,
|
|
|
|
/// <summary>
|
|
/// K-Quant 3 bit (Large)
|
|
/// </summary>
|
|
/// <remarks>Benchmark@7B: 3.35GB, +0.1803 ppl</remarks>
|
|
LLAMA_FTYPE_MOSTLY_Q3_K_L = 13,
|
|
|
|
/// <summary>
|
|
/// K-Quant 4 bit (Small)
|
|
/// </summary>
|
|
/// <remarks>Benchmark@7B: 3.56GB, +0.1149 ppl</remarks>
|
|
LLAMA_FTYPE_MOSTLY_Q4_K_S = 14,
|
|
|
|
/// <summary>
|
|
/// K-Quant 4 bit (Medium)
|
|
/// </summary>
|
|
/// <remarks>Benchmark@7B: 3.80GB, +0.0535 ppl</remarks>
|
|
LLAMA_FTYPE_MOSTLY_Q4_K_M = 15,
|
|
|
|
/// <summary>
|
|
/// K-Quant 5 bit (Small)
|
|
/// </summary>
|
|
/// <remarks>Benchmark@7B: 4.33GB, +0.0353 ppl</remarks>
|
|
LLAMA_FTYPE_MOSTLY_Q5_K_S = 16,
|
|
|
|
/// <summary>
|
|
/// K-Quant 5 bit (Medium)
|
|
/// </summary>
|
|
/// <remarks>Benchmark@7B: 4.45GB, +0.0142 ppl</remarks>
|
|
LLAMA_FTYPE_MOSTLY_Q5_K_M = 17,
|
|
|
|
/// <summary>
|
|
/// K-Quant 6 bit
|
|
/// </summary>
|
|
/// <remarks>Benchmark@7B: 5.15GB, +0.0044 ppl</remarks>
|
|
LLAMA_FTYPE_MOSTLY_Q6_K = 18,
|
|
|
|
/// <summary>
|
|
/// File type was not specified
|
|
/// </summary>
|
|
LLAMA_FTYPE_GUESSED = 1024
|
|
}
|
|
}
|