Merge pull request #495 from martindevans/quantise_new_formats

Added new file types to quantisation
This commit is contained in:
Martin Evans 2024-02-07 01:58:09 +00:00 committed by GitHub
commit ac7faa0f93
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 13 additions and 1 deletions

View File

@ -59,7 +59,7 @@ namespace LLama
private static bool ValidateFtype(LLamaFtype ftype)
{
// Validation copies from here:
// https://github.com/ggerganov/llama.cpp/blob/e59fcb2bc129881f4a269fee748fb38bce0a64de/llama.cpp#L2960
// https://github.com/ggerganov/llama.cpp/blob/d71ac90985854b0905e1abba778e407e17f9f887/llama.cpp#L9613
switch (ftype)
{
@ -70,15 +70,27 @@ namespace LLama
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q8_0:
case LLamaFtype.LLAMA_FTYPE_MOSTLY_F16:
case LLamaFtype.LLAMA_FTYPE_ALL_F32:
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q2_K_S:
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q2_K:
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q3_K_XS:
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q3_K_S:
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q3_K_M:
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q3_K_L:
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_K_S:
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_K_M:
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_K_S:
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_K_M:
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q6_K:
case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ2_XXS:
case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ2_XS:
case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ3_XXS:
return true;
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16: