From 9a1d6f99f26ffbe38e3a6c59d1e7d1198e9106be Mon Sep 17 00:00:00 2001 From: Tim Miller Date: Thu, 31 Aug 2023 17:24:44 +0900 Subject: [PATCH] Add Semantic Kernel support --- LLama.Examples/LLama.Examples.csproj | 5 + .../NewVersion/SemanticKernelChat.cs | 72 ++++++++ .../NewVersion/SemanticKernelMemorySkill.cs | 173 ++++++++++++++++++ .../NewVersion/SemanticKernelPrompt.cs | 55 ++++++ LLama.Examples/NewVersion/TestRunner.cs | 17 +- .../ChatCompletion/HistoryTransform.cs | 17 ++ .../LLamaSharpChatCompletion.cs | 74 ++++++++ .../ChatCompletion/LLamaSharpChatMessage.cs | 14 ++ .../ChatCompletion/LLamaSharpChatResult.cs | 38 ++++ LLama.SemanticKernel/ExtensionMethods.cs | 72 ++++++++ .../LLamaSharp.SemanticKernel.csproj | 22 +++ .../LLamaSharpTextCompletion.cs | 27 +++ .../TextCompletion/LLamaTextResult.cs | 37 ++++ .../LLamaSharpEmbeddingGeneration.cs | 21 +++ LLama/LLamaEmbedder.cs | 5 + LLama/OldVersion/LLamaEmbedder.cs | 1 + LLamaSharp.sln | 16 +- 17 files changed, 664 insertions(+), 2 deletions(-) create mode 100644 LLama.Examples/NewVersion/SemanticKernelChat.cs create mode 100644 LLama.Examples/NewVersion/SemanticKernelMemorySkill.cs create mode 100644 LLama.Examples/NewVersion/SemanticKernelPrompt.cs create mode 100644 LLama.SemanticKernel/ChatCompletion/HistoryTransform.cs create mode 100644 LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs create mode 100644 LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs create mode 100644 LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs create mode 100644 LLama.SemanticKernel/ExtensionMethods.cs create mode 100644 LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj create mode 100644 LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs create mode 100644 LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs create mode 100644 LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs diff --git a/LLama.Examples/LLama.Examples.csproj b/LLama.Examples/LLama.Examples.csproj index 6a1685ed..a8abe3ae 100644 --- a/LLama.Examples/LLama.Examples.csproj +++ b/LLama.Examples/LLama.Examples.csproj @@ -27,6 +27,11 @@ + + + + + diff --git a/LLama.Examples/NewVersion/SemanticKernelChat.cs b/LLama.Examples/NewVersion/SemanticKernelChat.cs new file mode 100644 index 00000000..feca8d7f --- /dev/null +++ b/LLama.Examples/NewVersion/SemanticKernelChat.cs @@ -0,0 +1,72 @@ +using System.Reflection.Metadata; +using System.Security.Cryptography; +using System.Text; +using LLama.Abstractions; +using LLama.Common; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.AI.ChatCompletion; +using Microsoft.SemanticKernel.AI.TextCompletion; +using Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion; +using Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion; + +namespace LLama.Examples.NewVersion +{ + public class SemanticKernelChat + { + public static async Task Run() + { + Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md"); + Console.Write("Please input your model path: "); + var modelPath = Console.ReadLine(); + + // Load weights into memory + var parameters = new ModelParams(modelPath) + { + Seed = RandomNumberGenerator.GetInt32(int.MaxValue), + }; + using var model = LLamaWeights.LoadFromFile(parameters); + using var context = model.CreateContext(parameters); + var ex = new InteractiveExecutor(context); + //var builder = new KernelBuilder(); + //builder.WithAIService("local-llama", new LLamaSharpChatCompletion(ex), true); + //var kernel = builder.Build(); + + var chatGPT = new LLamaSharpChatCompletion(ex); + + var chatHistory = chatGPT.CreateNewChat("You are a librarian, expert about books"); + + Console.WriteLine("Chat content:"); + Console.WriteLine("------------------------"); + + chatHistory.AddUserMessage("Hi, I'm looking for book suggestions"); + await MessageOutputAsync(chatHistory); + + // First bot assistant message + string reply = await chatGPT.GenerateMessageAsync(chatHistory); + chatHistory.AddAssistantMessage(reply); + await MessageOutputAsync(chatHistory); + + // Second user message + chatHistory.AddUserMessage("I love history and philosophy, I'd like to learn something new about Greece, any suggestion"); + await MessageOutputAsync(chatHistory); + + // Second bot assistant message + reply = await chatGPT.GenerateMessageAsync(chatHistory); + chatHistory.AddAssistantMessage(reply); + await MessageOutputAsync(chatHistory); + } + + /// + /// Outputs the last message of the chat history + /// + private static Task MessageOutputAsync(Microsoft.SemanticKernel.AI.ChatCompletion.ChatHistory chatHistory) + { + var message = chatHistory.Messages.Last(); + + Console.WriteLine($"{message.Role}: {message.Content}"); + Console.WriteLine("------------------------"); + + return Task.CompletedTask; + } + } +} diff --git a/LLama.Examples/NewVersion/SemanticKernelMemorySkill.cs b/LLama.Examples/NewVersion/SemanticKernelMemorySkill.cs new file mode 100644 index 00000000..df22d9eb --- /dev/null +++ b/LLama.Examples/NewVersion/SemanticKernelMemorySkill.cs @@ -0,0 +1,173 @@ +using System.Reflection.Metadata; +using System.Security.Cryptography; +using System.Text; +using LLama.Abstractions; +using LLama.Common; +using Microsoft.Extensions.Logging; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.AI.ChatCompletion; +using Microsoft.SemanticKernel.AI.Embeddings; +using Microsoft.SemanticKernel.AI.TextCompletion; +using Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion; +using Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion; +using Microsoft.SemanticKernel.Connectors.AI.LLama.TextEmbedding; +using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Skills.Core; + +namespace LLama.Examples.NewVersion +{ + public class SemanticKernelMemorySkill + { + private const string MemoryCollectionName = "aboutMe"; + + public static async Task Run() + { + Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example15_MemorySkill.cs"); + Console.Write("Please input your model path: "); + var modelPath = Console.ReadLine(); + + // Load weights into memory + var parameters = new ModelParams(modelPath) + { + Seed = RandomNumberGenerator.GetInt32(int.MaxValue), + }; + using var model = LLamaWeights.LoadFromFile(parameters); + using var context = model.CreateContext(parameters); + var ex = new InteractiveExecutor(context); + var ex2 = new StatelessExecutor(model, parameters); + var builder = new KernelBuilder(); + + var embedding = new LLamaEmbedder(context); + + builder.WithAIService("local-llama", new LLamaSharpChatCompletion(ex), true); + builder.WithAIService("local-llama-text", new LLamaSharpTextCompletion(ex), true); + builder.WithAIService("local-llama-embed", new LLamaSharpEmbeddingGeneration(embedding), true); + builder.WithMemoryStorage(new VolatileMemoryStore()); + var kernel = builder.Build(); + // ========= Store memories using the kernel ========= + + await kernel.Memory.SaveInformationAsync(MemoryCollectionName, id: "info1", text: "My name is Andrea"); + await kernel.Memory.SaveInformationAsync(MemoryCollectionName, id: "info2", text: "I work as a tourist operator"); + await kernel.Memory.SaveInformationAsync(MemoryCollectionName, id: "info3", text: "I've been living in Seattle since 2005"); + await kernel.Memory.SaveInformationAsync(MemoryCollectionName, id: "info4", text: "I visited France and Italy five times since 2015"); + + // ========= Store memories using semantic function ========= + + // Add Memory as a skill for other functions + var memorySkill = new TextMemorySkill(kernel.Memory); + kernel.ImportSkill(memorySkill); + + // Build a semantic function that saves info to memory + const string SaveFunctionDefinition = "{{save $info}}"; + var memorySaver = kernel.CreateSemanticFunction(SaveFunctionDefinition); + + await kernel.RunAsync(memorySaver, new() + { + [TextMemorySkill.CollectionParam] = MemoryCollectionName, + [TextMemorySkill.KeyParam] = "info5", + ["info"] = "My family is from New York" + }); + + // ========= Test memory remember ========= + Console.WriteLine("========= Example: Recalling a Memory ========="); + + var answer = await memorySkill.RetrieveAsync(MemoryCollectionName, "info1", null); + Console.WriteLine("Memory associated with 'info1': {0}", answer); + /* + Output: + "Memory associated with 'info1': My name is Andrea + */ + + // ========= Test memory recall ========= + Console.WriteLine("========= Example: Recalling an Idea ========="); + + answer = await memorySkill.RecallAsync("where did I grow up?", MemoryCollectionName, relevance: null, limit: 2, null); + Console.WriteLine("Ask: where did I grow up?"); + Console.WriteLine("Answer:\n{0}", answer); + + answer = await memorySkill.RecallAsync("where do I live?", MemoryCollectionName, relevance: null, limit: 2, null); + Console.WriteLine("Ask: where do I live?"); + Console.WriteLine("Answer:\n{0}", answer); + + /* + Output: + + Ask: where did I grow up? + Answer: + ["My family is from New York","I\u0027ve been living in Seattle since 2005"] + + Ask: where do I live? + Answer: + ["I\u0027ve been living in Seattle since 2005","My family is from New York"] + */ + + // ========= Use memory in a semantic function ========= + Console.WriteLine("========= Example: Using Recall in a Semantic Function ========="); + + // Build a semantic function that uses memory to find facts + const string RecallFunctionDefinition = @" +Consider only the facts below when answering questions. + +About me: {{recall 'where did I grow up?'}} +About me: {{recall 'where do I live?'}} + +Question: {{$input}} + +Answer: +"; + + var aboutMeOracle = kernel.CreateSemanticFunction(RecallFunctionDefinition, maxTokens: 100); + + var result = await kernel.RunAsync(aboutMeOracle, new("Do I live in the same town where I grew up?") + { + [TextMemorySkill.CollectionParam] = MemoryCollectionName, + [TextMemorySkill.RelevanceParam] = "0.8" + }); + + Console.WriteLine("Do I live in the same town where I grew up?\n"); + Console.WriteLine(result); + + /* + Output: + + Do I live in the same town where I grew up? + + No, I do not live in the same town where I grew up since my family is from New York and I have been living in Seattle since 2005. + */ + + // ========= Remove a memory ========= + Console.WriteLine("========= Example: Forgetting a Memory ========="); + + result = await kernel.RunAsync(aboutMeOracle, new("Tell me a bit about myself") + { + ["fact1"] = "What is my name?", + ["fact2"] = "What do I do for a living?", + [TextMemorySkill.RelevanceParam] = ".75" + }); + + Console.WriteLine("Tell me a bit about myself\n"); + Console.WriteLine(result); + + /* + Approximate Output: + Tell me a bit about myself + + My name is Andrea and my family is from New York. I work as a tourist operator. + */ + + await memorySkill.RemoveAsync(MemoryCollectionName, "info1", null); + + result = await kernel.RunAsync(aboutMeOracle, new("Tell me a bit about myself")); + + Console.WriteLine("Tell me a bit about myself\n"); + Console.WriteLine(result); + + /* + Approximate Output: + Tell me a bit about myself + + I'm from a family originally from New York and I work as a tourist operator. I've been living in Seattle since 2005. + */ + } + } +} diff --git a/LLama.Examples/NewVersion/SemanticKernelPrompt.cs b/LLama.Examples/NewVersion/SemanticKernelPrompt.cs new file mode 100644 index 00000000..40336b22 --- /dev/null +++ b/LLama.Examples/NewVersion/SemanticKernelPrompt.cs @@ -0,0 +1,55 @@ +using System.Reflection.Metadata; +using System.Security.Cryptography; +using System.Text; +using LLama.Abstractions; +using LLama.Common; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.AI.ChatCompletion; +using Microsoft.SemanticKernel.AI.TextCompletion; +using Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion; + +namespace LLama.Examples.NewVersion +{ + public class SemanticKernelPrompt + { + public static async Task Run() + { + Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example17_ChatGPT.cs"); + Console.Write("Please input your model path: "); + var modelPath = Console.ReadLine(); + + // Load weights into memory + var parameters = new ModelParams(modelPath) + { + Seed = RandomNumberGenerator.GetInt32(int.MaxValue), + }; + using var model = LLamaWeights.LoadFromFile(parameters); + var ex = new StatelessExecutor(model, parameters); + + var builder = new KernelBuilder(); + builder.WithAIService("local-llama", new LLamaSharpTextCompletion(ex), true); + + var kernel = builder.Build(); + + var prompt = @"{{$input}} + +One line TLDR with the fewest words."; + + var summarize = kernel.CreateSemanticFunction(prompt, maxTokens: 100); + + string text1 = @" +1st Law of Thermodynamics - Energy cannot be created or destroyed. +2nd Law of Thermodynamics - For a spontaneous process, the entropy of the universe increases. +3rd Law of Thermodynamics - A perfect crystal at zero Kelvin has zero entropy."; + + string text2 = @" +1. An object at rest remains at rest, and an object in motion remains in motion at constant speed and in a straight line unless acted on by an unbalanced force. +2. The acceleration of an object depends on the mass of the object and the amount of force applied. +3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first."; + + Console.WriteLine(await summarize.InvokeAsync(text1)); + + Console.WriteLine(await summarize.InvokeAsync(text2)); + } + } +} diff --git a/LLama.Examples/NewVersion/TestRunner.cs b/LLama.Examples/NewVersion/TestRunner.cs index f5a10ef4..c8a7bd31 100644 --- a/LLama.Examples/NewVersion/TestRunner.cs +++ b/LLama.Examples/NewVersion/TestRunner.cs @@ -8,7 +8,7 @@ Console.WriteLine("Please input a number to choose an example to run:"); Console.WriteLine("0: Run a chat session without stripping the role names."); - Console.WriteLine("1: Run a chat session with the role names strippped."); + Console.WriteLine("1: Run a chat session with the role names stripped."); Console.WriteLine("2: Interactive mode chat by using executor."); Console.WriteLine("3: Instruct mode chat by using executor."); Console.WriteLine("4: Stateless mode chat by using executor."); @@ -18,6 +18,9 @@ Console.WriteLine("8: Quantize the model."); Console.WriteLine("9: Automatic conversation."); Console.WriteLine("10: Constrain response to json format using grammar."); + Console.WriteLine("11: Semantic Kernel Prompt."); + Console.WriteLine("12: Semantic Kernel Chat."); + Console.WriteLine("13: Semantic Kernel Memory Skill."); while (true) { @@ -68,6 +71,18 @@ { GrammarJsonResponse.Run(); } + else if (choice == 11) + { + await SemanticKernelPrompt.Run(); + } + else if (choice == 12) + { + await SemanticKernelChat.Run(); + } + else if (choice == 13) + { + await SemanticKernelMemorySkill.Run(); + } else { Console.WriteLine("Cannot parse your choice. Please select again."); diff --git a/LLama.SemanticKernel/ChatCompletion/HistoryTransform.cs b/LLama.SemanticKernel/ChatCompletion/HistoryTransform.cs new file mode 100644 index 00000000..1b72d89e --- /dev/null +++ b/LLama.SemanticKernel/ChatCompletion/HistoryTransform.cs @@ -0,0 +1,17 @@ +using static LLama.LLamaTransforms; + +namespace Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion; + +/// +/// Default HistoryTransform Patch +/// +public class HistoryTransform : DefaultHistoryTransform +{ + /// + public override string HistoryToText(global::LLama.Common.ChatHistory history) + { + var prompt = base.HistoryToText(history); + return prompt + "\nAssistant:"; + + } +} diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs new file mode 100644 index 00000000..51dee59e --- /dev/null +++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs @@ -0,0 +1,74 @@ +using LLama; +using Microsoft.SemanticKernel.AI.ChatCompletion; +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Runtime.CompilerServices; +using System.Text; +using System.Threading; +using System.Threading.Tasks; + +namespace Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion; + +/// +/// LLamaSharp ChatCompletion +/// +public sealed class LLamaSharpChatCompletion : IChatCompletion +{ + private const string UserRole = "user:"; + private const string AssistantRole = "assistant:"; + private ChatSession session; + + public LLamaSharpChatCompletion(InteractiveExecutor model) + { + this.session = new ChatSession(model) + .WithHistoryTransform(new HistoryTransform()) + .WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { UserRole, AssistantRole })); + } + + /// + public ChatHistory CreateNewChat(string? instructions = "") + { + var history = new ChatHistory(); + + if (instructions != null && !string.IsNullOrEmpty(instructions)) + { + history.AddSystemMessage(instructions); + } + + return history; + } + + /// + public async Task> GetChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, CancellationToken cancellationToken = default) + { + requestSettings ??= new ChatRequestSettings() + { + MaxTokens = 256, + Temperature = 0, + TopP = 0, + StopSequences = new List { } + }; + + var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken); + + return new List { new LLamaSharpChatResult(result) }.AsReadOnly(); + } + + /// + public async IAsyncEnumerable GetStreamingChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + requestSettings ??= new ChatRequestSettings() + { + MaxTokens = 256, + Temperature = 0, + TopP = 0, + StopSequences = new List { } + }; + + var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken); + + yield return new LLamaSharpChatResult(result); + } +} diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs new file mode 100644 index 00000000..a10314fe --- /dev/null +++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs @@ -0,0 +1,14 @@ +using Microsoft.SemanticKernel.AI.ChatCompletion; + +namespace Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion; + +/// +/// LLamaSharp Chat Message +/// +public class LLamaSharpChatMessage : ChatMessageBase +{ + /// + public LLamaSharpChatMessage(AuthorRole role, string content) : base(role, content) + { + } +} diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs new file mode 100644 index 00000000..8a8b2ef3 --- /dev/null +++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs @@ -0,0 +1,38 @@ +using Microsoft.SemanticKernel.AI.ChatCompletion; +using System.Runtime.CompilerServices; +using System.Text; + +namespace Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion; + +internal sealed class LLamaSharpChatResult : IChatStreamingResult +{ + private readonly IAsyncEnumerable _stream; + + /// + /// + /// + /// + public LLamaSharpChatResult(IAsyncEnumerable stream) + { + _stream = stream; + } + /// + public async Task GetChatMessageAsync(CancellationToken cancellationToken = default) + { + var sb = new StringBuilder(); + await foreach (var token in _stream) + { + sb.Append(token); + } + return await Task.FromResult(new LLamaSharpChatMessage(AuthorRole.Assistant, sb.ToString())).ConfigureAwait(false); + } + + /// + public async IAsyncEnumerable GetStreamingChatMessageAsync([EnumeratorCancellation] CancellationToken cancellationToken = default) + { + await foreach (var token in _stream) + { + yield return new LLamaSharpChatMessage(AuthorRole.Assistant, token); + } + } +} diff --git a/LLama.SemanticKernel/ExtensionMethods.cs b/LLama.SemanticKernel/ExtensionMethods.cs new file mode 100644 index 00000000..ebfc1c37 --- /dev/null +++ b/LLama.SemanticKernel/ExtensionMethods.cs @@ -0,0 +1,72 @@ +using Microsoft.SemanticKernel.AI.ChatCompletion; +using Microsoft.SemanticKernel.AI.TextCompletion; + +namespace Microsoft.SemanticKernel.Connectors.AI.LLama; + +internal static class ExtensionMethods +{ + internal static global::LLama.Common.ChatHistory ToLLamaSharpChatHistory(this ChatHistory chatHistory) + { + if (chatHistory is null) + { + throw new ArgumentNullException(nameof(chatHistory)); + } + + var history = new global::LLama.Common.ChatHistory(); + + foreach (var chat in chatHistory) + { + var role = Enum.TryParse(chat.Role.Label, out var _role) ? _role : global::LLama.Common.AuthorRole.Unknown; + history.AddMessage(role, chat.Content); + } + + return history; + } + + /// + /// Convert ChatRequestSettings to LLamaSharp InferenceParams + /// + /// + /// + internal static global::LLama.Common.InferenceParams ToLLamaSharpInferenceParams(this ChatRequestSettings requestSettings) + { + if (requestSettings is null) + { + throw new ArgumentNullException(nameof(requestSettings)); + } + + var antiPrompts = new List(requestSettings.StopSequences) { AuthorRole.User.ToString() + ":" }; + return new global::LLama.Common.InferenceParams + { + Temperature = (float)requestSettings.Temperature, + TopP = (float)requestSettings.TopP, + PresencePenalty = (float)requestSettings.PresencePenalty, + FrequencyPenalty = (float)requestSettings.FrequencyPenalty, + AntiPrompts = antiPrompts, + MaxTokens = requestSettings.MaxTokens ?? -1 + }; + } + + /// + /// Convert CompleteRequestSettings to LLamaSharp InferenceParams + /// + /// + /// + internal static global::LLama.Common.InferenceParams ToLLamaSharpInferenceParams(this CompleteRequestSettings requestSettings) + { + if (requestSettings is null) + { + throw new ArgumentNullException(nameof(requestSettings)); + } + + return new global::LLama.Common.InferenceParams + { + Temperature = (float)requestSettings.Temperature, + TopP = (float)requestSettings.TopP, + PresencePenalty = (float)requestSettings.PresencePenalty, + FrequencyPenalty = (float)requestSettings.FrequencyPenalty, + AntiPrompts = requestSettings.StopSequences, + MaxTokens = requestSettings.MaxTokens ?? -1 + }; + } +} diff --git a/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj new file mode 100644 index 00000000..7b2a0780 --- /dev/null +++ b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj @@ -0,0 +1,22 @@ + + + + netstandard2.0;net6.0;net7.0 + Microsoft.SemanticKernel.Connectors.AI.LLama + enable + 10 + AnyCPU;x64;Arm64 + True + enable + enable + + + + + + + + + + + diff --git a/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs b/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs new file mode 100644 index 00000000..d6ce9362 --- /dev/null +++ b/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs @@ -0,0 +1,27 @@ +using LLama; +using LLama.Abstractions; +using Microsoft.SemanticKernel.AI.TextCompletion; + +namespace Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion; + +public sealed class LLamaSharpTextCompletion : ITextCompletion +{ + public ILLamaExecutor executor; + + public LLamaSharpTextCompletion(ILLamaExecutor executor) + { + this.executor = executor; + } + + public async Task> GetCompletionsAsync(string text, CompleteRequestSettings requestSettings, CancellationToken cancellationToken = default) + { + var result = executor.InferAsync(text, requestSettings.ToLLamaSharpInferenceParams(), cancellationToken); + return await Task.FromResult(new List { new LLamaTextResult(result) }.AsReadOnly()).ConfigureAwait(false); + } + + public async IAsyncEnumerable GetStreamingCompletionsAsync(string text, CompleteRequestSettings requestSettings, CancellationToken cancellationToken = default) + { + var result = executor.InferAsync(text, requestSettings.ToLLamaSharpInferenceParams(), cancellationToken); + yield return new LLamaTextResult(result); + } +} diff --git a/LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs b/LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs new file mode 100644 index 00000000..9ff2d6e4 --- /dev/null +++ b/LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs @@ -0,0 +1,37 @@ +using Microsoft.SemanticKernel.AI.TextCompletion; +using Microsoft.SemanticKernel.Orchestration; +using System.Runtime.CompilerServices; +using System.Text; + +namespace Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion; + +internal sealed class LLamaTextResult : ITextStreamingResult +{ + private readonly IAsyncEnumerable _text; + + public LLamaTextResult(IAsyncEnumerable text) + { + _text = text; + ModelResult = new(text); + } + + public ModelResult ModelResult { get; } + + public async Task GetCompletionAsync(CancellationToken cancellationToken = default) + { + var sb = new StringBuilder(); + await foreach (var token in _text) + { + sb.Append(token); + } + return await Task.FromResult(sb.ToString()).ConfigureAwait(false); + } + + public async IAsyncEnumerable GetCompletionStreamingAsync([EnumeratorCancellation] CancellationToken cancellationToken = default) + { + await foreach (string word in _text) + { + yield return word; + } + } +} diff --git a/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs b/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs new file mode 100644 index 00000000..f2bd2886 --- /dev/null +++ b/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs @@ -0,0 +1,21 @@ +using LLama; +using Microsoft.SemanticKernel.AI.Embeddings; + +namespace Microsoft.SemanticKernel.Connectors.AI.LLama.TextEmbedding; + +public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGeneration +{ + private LLamaEmbedder _embedder; + + public LLamaSharpEmbeddingGeneration(LLamaEmbedder embedder) + { + _embedder = embedder; + } + + /// + public async Task>> GenerateEmbeddingsAsync(IList data, CancellationToken cancellationToken = default) + { + var result = data.Select(text => new ReadOnlyMemory(_embedder.GetEmbeddings(text))).ToList(); + return await Task.FromResult(result).ConfigureAwait(false); + } +} diff --git a/LLama/LLamaEmbedder.cs b/LLama/LLamaEmbedder.cs index 5980d17c..5f7e6c12 100644 --- a/LLama/LLamaEmbedder.cs +++ b/LLama/LLamaEmbedder.cs @@ -29,6 +29,11 @@ namespace LLama _ctx = weights.CreateContext(@params); } + public LLamaEmbedder(LLamaContext ctx) + { + _ctx = ctx; + } + /// /// Get the embeddings of the text. /// diff --git a/LLama/OldVersion/LLamaEmbedder.cs b/LLama/OldVersion/LLamaEmbedder.cs index 7b6aedb6..662aa61a 100644 --- a/LLama/OldVersion/LLamaEmbedder.cs +++ b/LLama/OldVersion/LLamaEmbedder.cs @@ -54,6 +54,7 @@ namespace LLama.OldVersion int n_embed = NativeApi.llama_n_embd(_ctx); var embeddings = NativeApi.llama_get_embeddings(_ctx); + if (embeddings == null) { return new float[0]; diff --git a/LLamaSharp.sln b/LLamaSharp.sln index 2e00196c..2a039d41 100644 --- a/LLamaSharp.sln +++ b/LLamaSharp.sln @@ -11,7 +11,9 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLamaSharp", "LLama\LLamaSh EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLama.WebAPI", "LLama.WebAPI\LLama.WebAPI.csproj", "{D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LLama.Web", "LLama.Web\LLama.Web.csproj", "{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLama.Web", "LLama.Web\LLama.Web.csproj", "{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLamaSharp.SemanticKernel", "LLama.SemanticKernel\LLamaSharp.SemanticKernel.csproj", "{D98F93E3-B344-4F9D-86BB-FDBF6768B587}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -83,6 +85,18 @@ Global {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|Any CPU.Build.0 = Release|Any CPU {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|x64.ActiveCfg = Release|Any CPU {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|x64.Build.0 = Release|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|x64.ActiveCfg = Debug|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|x64.Build.0 = Debug|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|Any CPU.ActiveCfg = Debug|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|Any CPU.Build.0 = Debug|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|x64.ActiveCfg = Debug|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|x64.Build.0 = Debug|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|Any CPU.Build.0 = Release|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|x64.ActiveCfg = Release|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|x64.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE