feat: run quantization in csharp.
This commit is contained in:
parent
bc6ebaaf44
commit
33067f990f
|
@ -2,11 +2,11 @@
|
|||
using LLama.Examples;
|
||||
using LLama.Types;
|
||||
|
||||
int choice = 0;
|
||||
int choice = 3;
|
||||
|
||||
if(choice == 0)
|
||||
{
|
||||
ChatSession chat = new(@"<Your model file path>", "<Your prompt file path>", new string[] { "User:" });
|
||||
ChatSession chat = new(@"D:\development\llama\weights\LLaMA\7B\ggml-model-q4_0.bin", @"D:\development\llama\llama.cpp\prompts\chat-with-bob.txt", new string[] { "User:" });
|
||||
chat.Run();
|
||||
}
|
||||
else if(choice == 1)
|
||||
|
@ -18,4 +18,10 @@ else if(choice == 2)
|
|||
{
|
||||
ChatWithLLamaModelV1 chat = new(@"<Your model file path>");
|
||||
chat.Run();
|
||||
}
|
||||
else if (choice == 3) // quantization
|
||||
{
|
||||
Quantize q = new Quantize();
|
||||
q.Run(@"D:\development\llama\weights\LLaMA\7B\ggml-model-f16.bin",
|
||||
@"D:\development\llama\weights\LLaMA\7B\ggml-model-q4_1.bin", "q4_1");
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace LLama.Examples
|
||||
{
|
||||
public class Quantize
|
||||
{
|
||||
public Quantize()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
public void Run(string srcFileName, string dstFilename, string ftype, int nthread = 0, bool printInfo = true)
|
||||
{
|
||||
if(Quantizer.Quantize(srcFileName, dstFilename, ftype, nthread, printInfo))
|
||||
{
|
||||
Console.WriteLine("Quantization succeed!");
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine("Quantization failed!");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -4,7 +4,7 @@ using System.Text;
|
|||
|
||||
namespace LLama.Native
|
||||
{
|
||||
internal enum LLamaFtype
|
||||
public enum LLamaFtype
|
||||
{
|
||||
LLAMA_FTYPE_ALL_F32 = 0,
|
||||
LLAMA_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Text;
|
||||
|
||||
namespace LLama.Native
|
||||
{
|
||||
internal partial class NativeApi
|
||||
{
|
||||
[DllImport(libraryName)]
|
||||
public static extern bool ggml_custom_quantize(string src_filename, string dst_filename,
|
||||
string ftype_str, int nthread, bool print_info);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
using LLama.Native;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
|
||||
namespace LLama
|
||||
{
|
||||
public class Quantizer
|
||||
{
|
||||
public static bool Quantize(string srcFileName, string dstFilename, LLamaFtype ftype, int nthread = 0, bool printInfo = true)
|
||||
{
|
||||
return Quantize(srcFileName, dstFilename, FtypeToString(ftype), nthread, printInfo);
|
||||
}
|
||||
|
||||
public static bool Quantize(string srcFileName, string dstFilename, string ftype, int nthread = 0, bool printInfo = true)
|
||||
{
|
||||
if (!ValidateFtype(ftype))
|
||||
{
|
||||
throw new ArgumentException($"The type {Enum.GetName(typeof(LLamaFtype), ftype)} is not a valid type " +
|
||||
$"to perform quantization.");
|
||||
}
|
||||
|
||||
return NativeApi.ggml_custom_quantize(srcFileName, dstFilename, ftype, nthread, printInfo);
|
||||
}
|
||||
|
||||
private static bool ValidateFtype(string ftype)
|
||||
{
|
||||
return new string[] { "q4_0", "q4_1", "q4_2", "q5_0", "q5_1", "q8_0" }.Contains(ftype);
|
||||
}
|
||||
|
||||
private static string FtypeToString(LLamaFtype ftype)
|
||||
{
|
||||
return ftype switch
|
||||
{
|
||||
LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_0 => "q4_0",
|
||||
LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_1 => "q4_1",
|
||||
LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_2 => "q4_2",
|
||||
LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_0 => "q5_0",
|
||||
LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_1 => "q5_1",
|
||||
LLamaFtype.LLAMA_FTYPE_MOSTLY_Q8_0 => "q8_0",
|
||||
_ => throw new ArgumentException($"The type {Enum.GetName(typeof(LLamaFtype), ftype)} is not a valid type " +
|
||||
$"to perform quantization.")
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue