From c7103e86e4586886bd475b1a3973eb21f5d386b9 Mon Sep 17 00:00:00 2001 From: Martin Evans Date: Tue, 6 Feb 2024 18:06:10 +0000 Subject: [PATCH] Added new file types to quantisation --- LLama/LLamaQuantizer.cs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/LLama/LLamaQuantizer.cs b/LLama/LLamaQuantizer.cs index 54b0ed02..9ffc780e 100644 --- a/LLama/LLamaQuantizer.cs +++ b/LLama/LLamaQuantizer.cs @@ -59,7 +59,7 @@ namespace LLama private static bool ValidateFtype(LLamaFtype ftype) { // Validation copies from here: - // https://github.com/ggerganov/llama.cpp/blob/e59fcb2bc129881f4a269fee748fb38bce0a64de/llama.cpp#L2960 + // https://github.com/ggerganov/llama.cpp/blob/d71ac90985854b0905e1abba778e407e17f9f887/llama.cpp#L9613 switch (ftype) { @@ -70,15 +70,27 @@ namespace LLama case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q8_0: case LLamaFtype.LLAMA_FTYPE_MOSTLY_F16: case LLamaFtype.LLAMA_FTYPE_ALL_F32: + + case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q2_K_S: case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q2_K: + + case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q3_K_XS: case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q3_K_S: case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q3_K_M: case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q3_K_L: + case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_K_S: case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_K_M: + case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_K_S: case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_K_M: + case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q6_K: + + case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ2_XXS: + case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ2_XS: + + case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ3_XXS: return true; case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16: