feat: run quantization in csharp.

This commit is contained in:
Yaohui Liu 2023-05-11 17:38:28 +08:00
parent bc6ebaaf44
commit 33067f990f
No known key found for this signature in database
GPG Key ID: E86D01E1809BD23E
5 changed files with 98 additions and 3 deletions

View File

@ -2,11 +2,11 @@
using LLama.Examples;
using LLama.Types;
int choice = 0;
int choice = 3;
if(choice == 0)
{
ChatSession chat = new(@"<Your model file path>", "<Your prompt file path>", new string[] { "User:" });
ChatSession chat = new(@"D:\development\llama\weights\LLaMA\7B\ggml-model-q4_0.bin", @"D:\development\llama\llama.cpp\prompts\chat-with-bob.txt", new string[] { "User:" });
chat.Run();
}
else if(choice == 1)
@ -18,4 +18,10 @@ else if(choice == 2)
{
ChatWithLLamaModelV1 chat = new(@"<Your model file path>");
chat.Run();
}
else if (choice == 3) // quantization
{
Quantize q = new Quantize();
q.Run(@"D:\development\llama\weights\LLaMA\7B\ggml-model-f16.bin",
@"D:\development\llama\weights\LLaMA\7B\ggml-model-q4_1.bin", "q4_1");
}

View File

@ -0,0 +1,28 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace LLama.Examples
{
public class Quantize
{
public Quantize()
{
}
public void Run(string srcFileName, string dstFilename, string ftype, int nthread = 0, bool printInfo = true)
{
if(Quantizer.Quantize(srcFileName, dstFilename, ftype, nthread, printInfo))
{
Console.WriteLine("Quantization succeed!");
}
else
{
Console.WriteLine("Quantization failed!");
}
}
}
}

View File

@ -4,7 +4,7 @@ using System.Text;
namespace LLama.Native
{
internal enum LLamaFtype
public enum LLamaFtype
{
LLAMA_FTYPE_ALL_F32 = 0,
LLAMA_FTYPE_MOSTLY_F16 = 1, // except 1d tensors

View File

@ -0,0 +1,14 @@
using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Text;
namespace LLama.Native
{
internal partial class NativeApi
{
[DllImport(libraryName)]
public static extern bool ggml_custom_quantize(string src_filename, string dst_filename,
string ftype_str, int nthread, bool print_info);
}
}

47
LLama/Quantizer.cs Normal file
View File

@ -0,0 +1,47 @@
using LLama.Native;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace LLama
{
public class Quantizer
{
public static bool Quantize(string srcFileName, string dstFilename, LLamaFtype ftype, int nthread = 0, bool printInfo = true)
{
return Quantize(srcFileName, dstFilename, FtypeToString(ftype), nthread, printInfo);
}
public static bool Quantize(string srcFileName, string dstFilename, string ftype, int nthread = 0, bool printInfo = true)
{
if (!ValidateFtype(ftype))
{
throw new ArgumentException($"The type {Enum.GetName(typeof(LLamaFtype), ftype)} is not a valid type " +
$"to perform quantization.");
}
return NativeApi.ggml_custom_quantize(srcFileName, dstFilename, ftype, nthread, printInfo);
}
private static bool ValidateFtype(string ftype)
{
return new string[] { "q4_0", "q4_1", "q4_2", "q5_0", "q5_1", "q8_0" }.Contains(ftype);
}
private static string FtypeToString(LLamaFtype ftype)
{
return ftype switch
{
LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_0 => "q4_0",
LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_1 => "q4_1",
LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_2 => "q4_2",
LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_0 => "q5_0",
LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_1 => "q5_1",
LLamaFtype.LLAMA_FTYPE_MOSTLY_Q8_0 => "q8_0",
_ => throw new ArgumentException($"The type {Enum.GetName(typeof(LLamaFtype), ftype)} is not a valid type " +
$"to perform quantization.")
};
}
}
}