using LLama.Native; using System; using System.Collections.Generic; namespace LLama { /// /// The quantizer to quantize the model. /// public static class LLamaQuantizer { /// /// Quantize the model. /// /// The model file to be quantized. /// The path to save the quantized model. /// The type of quantization. /// Thread to be used during the quantization. By default it's the physical core number. /// /// /// Whether the quantization is successful. /// public static bool Quantize(string srcFileName, string dstFilename, LLamaFtype ftype, int nthread = -1, bool allowRequantize = true, bool quantizeOutputTensor = false) { if (!ValidateFtype(ftype)) { throw new ArgumentException($"The type {Enum.GetName(typeof(LLamaFtype), ftype)} is not a valid type " + $"to perform quantization."); } var quantizeParams = NativeApi.llama_model_quantize_default_params(); quantizeParams.ftype = ftype; quantizeParams.nthread = nthread; quantizeParams.allow_requantize = allowRequantize; quantizeParams.quantize_output_tensor = quantizeOutputTensor; unsafe { return NativeApi.llama_model_quantize(srcFileName, dstFilename, &quantizeParams) == 0; } } /// /// Quantize the model. /// /// The model file to be quantized. /// The path to save the quantized model. /// The type of quantization. /// Thread to be used during the quantization. By default it's the physical core number. /// /// /// Whether the quantization is successful. /// public static bool Quantize(string srcFileName, string dstFilename, string ftype, int nthread = -1, bool allowRequantize = true, bool quantizeOutputTensor = false) { return Quantize(srcFileName, dstFilename, StringToFtype(ftype), nthread, allowRequantize, quantizeOutputTensor); } private static bool ValidateFtype(LLamaFtype ftype) { // Validation copies from here: // https://github.com/ggerganov/llama.cpp/blob/3ab8b3a92ede46df88bc5a2dfca3777de4a2b2b6/llama.cpp#L10965 switch (ftype) { case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_0: case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_1: case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_0: case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_1: case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q8_0: case LLamaFtype.LLAMA_FTYPE_MOSTLY_F16: case LLamaFtype.LLAMA_FTYPE_ALL_F32: case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q2_K_S: case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q2_K: case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ3_K_XS: case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q3_K_S: case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q3_K_M: case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q3_K_L: case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_K_S: case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_K_M: case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_K_S: case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_K_M: case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q6_K: case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ2_XXS: case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ2_XS: case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ2_S: case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ2_M: case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ3_XXS: case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ1_S: case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ4_NL: case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ4_XS: case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ3_S: case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ3_M: return true; case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16: case LLamaFtype.LLAMA_FTYPE_GUESSED: default: return false; } } /// /// Parse a string into a LLamaFtype. This is a "relaxed" parsing, which allows any string which is contained within /// the enum name to be used. /// /// For example "Q5_K_M" will convert to "LLAMA_FTYPE_MOSTLY_Q5_K_M" /// /// /// /// private static LLamaFtype StringToFtype(string str) { // Find all variants which contain the input string var matches = new List(); foreach (LLamaFtype ftype in Enum.GetValues(typeof(LLamaFtype))) { var name = Enum.GetName(typeof(LLamaFtype), ftype); // Note: this is using "IndexOf" instead of "Contains" to be compatible with netstandard2.0 #pragma warning disable CA2249 if (name != null && name.IndexOf(str, StringComparison.OrdinalIgnoreCase) >= 0) matches.Add(ftype); #pragma warning restore CA2249 } // If there was just one match, success! if (matches.Count == 1) return matches[0]; // If none matched throw a generic error if (matches.Count == 0) throw new ArgumentException($"Unknown ftype \"{str}\" for quantization."); // There were several matches, throw an error asking the user to be more specific throw new ArgumentException($"\"{str}\" matches multiple potential ftypes: {string.Join(",", matches)}"); } } }