Update the Chinese chat sample to use new ChatSession integration

This commit is contained in:
Philipp Bauer 2023-12-10 09:34:11 -06:00
parent 2cc01efdae
commit f669a4f5a7
3 changed files with 134 additions and 55 deletions

View File

@ -0,0 +1,24 @@
{
"messages": [
{
"author_role": "System",
"content": "<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD>û<EFBFBD><C3BB>ĶԻ<C4B6><D4BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD>ڸ<EFBFBD><DAB8><EFBFBD><EFBFBD>涼ӵ<E6B6BC>зḻ<D0B7><E1B8BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>dz<EFBFBD><C7B3><EFBFBD><EFBFBD>ڻش<DABB><D8B4>û<EFBFBD><C3BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ͱ<EFBFBD><CDB0><EFBFBD><EFBFBD>û<EFBFBD><C3BB><EFBFBD>?"
},
{
"author_role": "User",
"content": "<22><>ã<EFBFBD><C3A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>?"
},
{
"author_role": "Assistant",
"content": "<22><>ã<EFBFBD><C3A3><EFBFBD>ʲô<CAB2><C3B4><EFBFBD>ܰ<EFBFBD><DCB0><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>"
},
{
"author_role": "User",
"content": "<22>й<EFBFBD><D0B9><EFBFBD><EFBFBD>׶<EFBFBD><D7B6><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>У<EFBFBD>"
},
{
"author_role": "Assistant",
"content": "<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>˭<EFBFBD><CBAD>"
}
]
}

View File

@ -1,69 +1,124 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Text;
using LLama.Common;
namespace LLama.Examples.Examples
namespace LLama.Examples.Examples;
public class ChatChineseGB2312
{
public class ChatChineseGB2312
private static string ConvertEncoding(string input, Encoding original, Encoding target)
{
private static string ConvertFromEncodingToAnother(string input, Encoding original, Encoding target)
byte[] bytes = original.GetBytes(input);
var convertedBytes = Encoding.Convert(original, target, bytes);
return target.GetString(convertedBytes);
}
public static async Task Run()
{
// Register provider for GB2312 encoding
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine("This example shows how to use Chinese with gb2312 encoding, which is common in windows. It's recommended" +
" to use https://huggingface.co/hfl/chinese-alpaca-2-7b-gguf/blob/main/ggml-model-q5_0.gguf, which has been verified by LLamaSharp developers.");
Console.ForegroundColor = ConsoleColor.White;
Console.Write("Please input your model path: ");
var modelPath = Console.ReadLine();
var parameters = new ModelParams(modelPath)
{
byte[] bytes = original.GetBytes(input);
var convertedBytes = Encoding.Convert(original, target, bytes);
return target.GetString(convertedBytes);
}
ContextSize = 1024,
Seed = 1337,
GpuLayerCount = 5,
Encoding = Encoding.UTF8
};
using var model = LLamaWeights.LoadFromFile(parameters);
using var context = model.CreateContext(parameters);
var executor = new InteractiveExecutor(context);
public static async Task Run()
ChatSession session;
if (Directory.Exists("Assets/chat-with-kunkun-chinese"))
{
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); // Register gb2312 encoding
Console.Write("Please input your model path: ");
var modelPath = Console.ReadLine();
var prompt = File.ReadAllText("Assets/chat-with-kunkun-chinese.txt", encoding: Encoding.GetEncoding("gb2312")).Trim();
prompt = ConvertFromEncodingToAnother(prompt, Encoding.GetEncoding("gb2312"), Encoding.UTF8);
var parameters = new ModelParams(modelPath)
{
ContextSize = 1024,
Seed = 1337,
GpuLayerCount = 20,
Encoding = Encoding.UTF8
};
using var model = LLamaWeights.LoadFromFile(parameters);
using var context = model.CreateContext(parameters);
var executor = new InteractiveExecutor(context);
var session = new ChatSession(executor).WithHistoryTransform(new LLamaTransforms.DefaultHistoryTransform("用户"));
Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine("This example shows how to use Chinese with gb2312 encoding, which is common in windows. It's recommended" +
" to use https://huggingface.co/hfl/chinese-alpaca-2-7b-gguf/blob/main/ggml-model-q5_0.gguf, which has been verified by LLamaSharp developers.");
Console.WriteLine("Loading session from disk.");
Console.ForegroundColor = ConsoleColor.White;
// show the prompt
Console.Write(prompt);
while (true)
{
await foreach (var text in session.ChatAsync(prompt, new InferenceParams()
{
Temperature = 0.3f,
TopK = 5,
TopP = 0.85f,
AntiPrompts = new List<string> { "用户:" },
MaxTokens = 2048,
RepeatPenalty = 1.05f
}))
{
//Console.Write(text);
Console.Write(ConvertFromEncodingToAnother(text, Encoding.UTF8, Encoding.GetEncoding("gb2312")));
}
session = new ChatSession(executor);
session.LoadSession("Assets/chat-with-kunkun-chinese");
}
else
{
var chatHistoryJson = File.ReadAllText("Assets/chat-with-kunkun-chinese.json");
ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory();
Console.ForegroundColor = ConsoleColor.Green;
prompt = Console.ReadLine();
Console.ForegroundColor = ConsoleColor.White;
session = new ChatSession(executor, chatHistory);
}
session
.WithHistoryTransform(new LLamaTransforms.DefaultHistoryTransform("用户"))
.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(
// User and Assistant in Chinese (User is: 用户, Assistant is: 坤坤)
new string[] { "用户:", "坤坤:" },
redundancyLength: 8));
InferenceParams inferenceParams = new InferenceParams()
{
Temperature = 0.9f,
AntiPrompts = new List<string> { "用户:" }
};
Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine("The chat session has started.");
// show the prompt
Console.ForegroundColor = ConsoleColor.Green;
string userInput = Console.ReadLine() ?? "";
while (userInput != "exit")
{
// Convert the encoding from gb2312 to utf8 for the language model
// and later saving to the history json file.
userInput = ConvertEncoding(userInput, Encoding.GetEncoding("gb2312"), Encoding.UTF8);
if (userInput == "save")
{
session.SaveSession("Assets/chat-with-kunkun-chinese");
Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine("Session saved.");
}
else if (userInput == "regenerate")
{
Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine("Regenerating last response ...");
await foreach (
var text
in session.RegenerateAssistantMessageAsync(
inferenceParams))
{
Console.ForegroundColor = ConsoleColor.White;
// Convert the encoding from utf8 to gb2312 for the console output.
Console.Write(ConvertEncoding(text, Encoding.UTF8, Encoding.GetEncoding("gb2312")));
}
}
else
{
await foreach (
var text
in session.ChatAsync(
new ChatHistory.Message(AuthorRole.User, userInput),
inferenceParams))
{
Console.ForegroundColor = ConsoleColor.White;
Console.Write(text);
}
}
Console.ForegroundColor = ConsoleColor.Green;
userInput = Console.ReadLine() ?? "";
Console.ForegroundColor = ConsoleColor.White;
}
}
}

View File

@ -9,6 +9,7 @@ public class Runner
{ "Run a chat session with history.", ChatSessionWithHistory.Run },
{ "Run a chat session without stripping the role names.", ChatSessionWithRoleName.Run },
{ "Run a chat session with the role names stripped.", ChatSessionStripRoleName.Run },
{ "Run a chat session in Chinese GB2312 encoding", ChatChineseGB2312.Run },
{ "Interactive mode chat by using executor.", InteractiveModeExecute.Run },
{ "Instruct mode chat by using executor.", InstructModeExecute.Run },
{ "Stateless mode chat by using executor.", StatelessModeExecute.Run },
@ -24,7 +25,6 @@ public class Runner
{ "Coding Assistant.", CodingAssistant.Run },
{ "Batch Decoding.", BatchedDecoding.Run },
{ "SK Kernel Memory.", KernelMemory.Run },
{ "Chinese gb2312 chat", ChatChineseGB2312.Run },
{ "Exit", async () => Environment.Exit(0) }
};