From 9a1d6f99f26ffbe38e3a6c59d1e7d1198e9106be Mon Sep 17 00:00:00 2001
From: Tim Miller <innerlogic4321@gmail.com>
Date: Thu, 31 Aug 2023 17:24:44 +0900
Subject: [PATCH] Add Semantic Kernel support

---
 LLama.Examples/LLama.Examples.csproj          |   5 +
 .../NewVersion/SemanticKernelChat.cs          |  72 ++++++++
 .../NewVersion/SemanticKernelMemorySkill.cs   | 173 ++++++++++++++++++
 .../NewVersion/SemanticKernelPrompt.cs        |  55 ++++++
 LLama.Examples/NewVersion/TestRunner.cs       |  17 +-
 .../ChatCompletion/HistoryTransform.cs        |  17 ++
 .../LLamaSharpChatCompletion.cs               |  74 ++++++++
 .../ChatCompletion/LLamaSharpChatMessage.cs   |  14 ++
 .../ChatCompletion/LLamaSharpChatResult.cs    |  38 ++++
 LLama.SemanticKernel/ExtensionMethods.cs      |  72 ++++++++
 .../LLamaSharp.SemanticKernel.csproj          |  22 +++
 .../LLamaSharpTextCompletion.cs               |  27 +++
 .../TextCompletion/LLamaTextResult.cs         |  37 ++++
 .../LLamaSharpEmbeddingGeneration.cs          |  21 +++
 LLama/LLamaEmbedder.cs                        |   5 +
 LLama/OldVersion/LLamaEmbedder.cs             |   1 +
 LLamaSharp.sln                                |  16 +-
 17 files changed, 664 insertions(+), 2 deletions(-)
 create mode 100644 LLama.Examples/NewVersion/SemanticKernelChat.cs
 create mode 100644 LLama.Examples/NewVersion/SemanticKernelMemorySkill.cs
 create mode 100644 LLama.Examples/NewVersion/SemanticKernelPrompt.cs
 create mode 100644 LLama.SemanticKernel/ChatCompletion/HistoryTransform.cs
 create mode 100644 LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs
 create mode 100644 LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs
 create mode 100644 LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs
 create mode 100644 LLama.SemanticKernel/ExtensionMethods.cs
 create mode 100644 LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj
 create mode 100644 LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs
 create mode 100644 LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs
 create mode 100644 LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs
diff --git a/LLama.Examples/LLama.Examples.csproj b/LLama.Examples/LLama.Examples.csproj
index 6a1685ed..a8abe3ae 100644
--- a/LLama.Examples/LLama.Examples.csproj
+++ b/LLama.Examples/LLama.Examples.csproj
@@ -27,6 +27,11 @@
   </PropertyGroup>
 
   <ItemGroup>
+    <PackageReference Include="Microsoft.SemanticKernel" Version="0.21.230828.2-preview" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\LLama.SemanticKernel\LLamaSharp.SemanticKernel.csproj" />
     <ProjectReference Include="..\LLama\LLamaSharp.csproj" />
   </ItemGroup>
 
diff --git a/LLama.Examples/NewVersion/SemanticKernelChat.cs b/LLama.Examples/NewVersion/SemanticKernelChat.cs
new file mode 100644
index 00000000..feca8d7f
--- /dev/null
+++ b/LLama.Examples/NewVersion/SemanticKernelChat.cs
@@ -0,0 +1,72 @@
+﻿using System.Reflection.Metadata;
+using System.Security.Cryptography;
+using System.Text;
+using LLama.Abstractions;
+using LLama.Common;
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.AI.ChatCompletion;
+using Microsoft.SemanticKernel.AI.TextCompletion;
+using Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
+using Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion;
+
+namespace LLama.Examples.NewVersion
+{
+    public class SemanticKernelChat
+    {
+        public static async Task Run()
+        {
+            Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md");
+            Console.Write("Please input your model path: ");
+            var modelPath = Console.ReadLine();
+
+            // Load weights into memory
+            var parameters = new ModelParams(modelPath)
+            {
+                Seed = RandomNumberGenerator.GetInt32(int.MaxValue),
+            };
+            using var model = LLamaWeights.LoadFromFile(parameters);
+            using var context = model.CreateContext(parameters);
+            var ex = new InteractiveExecutor(context);
+            //var builder = new KernelBuilder();
+            //builder.WithAIService<IChatCompletion>("local-llama", new LLamaSharpChatCompletion(ex), true);
+            //var kernel = builder.Build();
+
+            var chatGPT = new LLamaSharpChatCompletion(ex);
+
+            var chatHistory = chatGPT.CreateNewChat("You are a librarian, expert about books");
+
+            Console.WriteLine("Chat content:");
+            Console.WriteLine("------------------------");
+
+            chatHistory.AddUserMessage("Hi, I'm looking for book suggestions");
+            await MessageOutputAsync(chatHistory);
+
+            // First bot assistant message
+            string reply = await chatGPT.GenerateMessageAsync(chatHistory);
+            chatHistory.AddAssistantMessage(reply);
+            await MessageOutputAsync(chatHistory);
+
+            // Second user message
+            chatHistory.AddUserMessage("I love history and philosophy, I'd like to learn something new about Greece, any suggestion");
+            await MessageOutputAsync(chatHistory);
+
+            // Second bot assistant message
+            reply = await chatGPT.GenerateMessageAsync(chatHistory);
+            chatHistory.AddAssistantMessage(reply);
+            await MessageOutputAsync(chatHistory);
+        }
+
+        /// <summary>
+        /// Outputs the last message of the chat history
+        /// </summary>
+        private static Task MessageOutputAsync(Microsoft.SemanticKernel.AI.ChatCompletion.ChatHistory chatHistory)
+        {
+            var message = chatHistory.Messages.Last();
+
+            Console.WriteLine($"{message.Role}: {message.Content}");
+            Console.WriteLine("------------------------");
+
+            return Task.CompletedTask;
+        }
+    }
+}
diff --git a/LLama.Examples/NewVersion/SemanticKernelMemorySkill.cs b/LLama.Examples/NewVersion/SemanticKernelMemorySkill.cs
new file mode 100644
index 00000000..df22d9eb
--- /dev/null
+++ b/LLama.Examples/NewVersion/SemanticKernelMemorySkill.cs
@@ -0,0 +1,173 @@
+﻿using System.Reflection.Metadata;
+using System.Security.Cryptography;
+using System.Text;
+using LLama.Abstractions;
+using LLama.Common;
+using Microsoft.Extensions.Logging;
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.AI.ChatCompletion;
+using Microsoft.SemanticKernel.AI.Embeddings;
+using Microsoft.SemanticKernel.AI.TextCompletion;
+using Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
+using Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion;
+using Microsoft.SemanticKernel.Connectors.AI.LLama.TextEmbedding;
+using Microsoft.SemanticKernel.Memory;
+using Microsoft.SemanticKernel.Skills.Core;
+
+namespace LLama.Examples.NewVersion
+{
+    public class SemanticKernelMemorySkill
+    {
+        private const string MemoryCollectionName = "aboutMe";
+
+        public static async Task Run()
+        {
+            Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example15_MemorySkill.cs");
+            Console.Write("Please input your model path: ");
+            var modelPath = Console.ReadLine();
+
+            // Load weights into memory
+            var parameters = new ModelParams(modelPath)
+            {
+                Seed = RandomNumberGenerator.GetInt32(int.MaxValue),
+            };
+            using var model = LLamaWeights.LoadFromFile(parameters);
+            using var context = model.CreateContext(parameters);
+            var ex = new InteractiveExecutor(context);
+            var ex2 = new StatelessExecutor(model, parameters);
+            var builder = new KernelBuilder();
+
+            var embedding = new LLamaEmbedder(context);
+
+            builder.WithAIService<IChatCompletion>("local-llama", new LLamaSharpChatCompletion(ex), true);
+            builder.WithAIService<ITextCompletion>("local-llama-text", new LLamaSharpTextCompletion(ex), true);
+            builder.WithAIService<ITextEmbeddingGeneration>("local-llama-embed", new LLamaSharpEmbeddingGeneration(embedding), true);
+            builder.WithMemoryStorage(new VolatileMemoryStore());
+            var kernel = builder.Build();
+            // ========= Store memories using the kernel =========
+
+            await kernel.Memory.SaveInformationAsync(MemoryCollectionName, id: "info1", text: "My name is Andrea");
+            await kernel.Memory.SaveInformationAsync(MemoryCollectionName, id: "info2", text: "I work as a tourist operator");
+            await kernel.Memory.SaveInformationAsync(MemoryCollectionName, id: "info3", text: "I've been living in Seattle since 2005");
+            await kernel.Memory.SaveInformationAsync(MemoryCollectionName, id: "info4", text: "I visited France and Italy five times since 2015");
+
+            // ========= Store memories using semantic function =========
+
+            // Add Memory as a skill for other functions
+            var memorySkill = new TextMemorySkill(kernel.Memory);
+            kernel.ImportSkill(memorySkill);
+
+            // Build a semantic function that saves info to memory
+            const string SaveFunctionDefinition = "{{save $info}}";
+            var memorySaver = kernel.CreateSemanticFunction(SaveFunctionDefinition);
+
+            await kernel.RunAsync(memorySaver, new()
+            {
+                [TextMemorySkill.CollectionParam] = MemoryCollectionName,
+                [TextMemorySkill.KeyParam] = "info5",
+                ["info"] = "My family is from New York"
+            });
+
+            // ========= Test memory remember =========
+            Console.WriteLine("========= Example: Recalling a Memory =========");
+
+            var answer = await memorySkill.RetrieveAsync(MemoryCollectionName, "info1", null);
+            Console.WriteLine("Memory associated with 'info1': {0}", answer);
+            /*
+            Output:
+            "Memory associated with 'info1': My name is Andrea
+            */
+
+            // ========= Test memory recall =========
+            Console.WriteLine("========= Example: Recalling an Idea =========");
+
+            answer = await memorySkill.RecallAsync("where did I grow up?", MemoryCollectionName, relevance: null, limit: 2, null);
+            Console.WriteLine("Ask: where did I grow up?");
+            Console.WriteLine("Answer:\n{0}", answer);
+
+            answer = await memorySkill.RecallAsync("where do I live?", MemoryCollectionName, relevance: null, limit: 2, null);
+            Console.WriteLine("Ask: where do I live?");
+            Console.WriteLine("Answer:\n{0}", answer);
+
+            /*
+            Output:
+
+                Ask: where did I grow up?
+                Answer:
+                    ["My family is from New York","I\u0027ve been living in Seattle since 2005"]
+
+                Ask: where do I live?
+                Answer:
+                    ["I\u0027ve been living in Seattle since 2005","My family is from New York"]
+            */
+
+            // ========= Use memory in a semantic function =========
+            Console.WriteLine("========= Example: Using Recall in a Semantic Function =========");
+
+            // Build a semantic function that uses memory to find facts
+            const string RecallFunctionDefinition = @"
+Consider only the facts below when answering questions.
+
+About me: {{recall 'where did I grow up?'}}
+About me: {{recall 'where do I live?'}}
+
+Question: {{$input}}
+
+Answer:
+";
+
+            var aboutMeOracle = kernel.CreateSemanticFunction(RecallFunctionDefinition, maxTokens: 100);
+
+            var result = await kernel.RunAsync(aboutMeOracle, new("Do I live in the same town where I grew up?")
+            {
+                [TextMemorySkill.CollectionParam] = MemoryCollectionName,
+                [TextMemorySkill.RelevanceParam] = "0.8"
+            });
+
+            Console.WriteLine("Do I live in the same town where I grew up?\n");
+            Console.WriteLine(result);
+
+            /*
+            Output:
+
+                Do I live in the same town where I grew up?
+
+                No, I do not live in the same town where I grew up since my family is from New York and I have been living in Seattle since 2005.
+            */
+
+            // ========= Remove a memory =========
+            Console.WriteLine("========= Example: Forgetting a Memory =========");
+
+            result = await kernel.RunAsync(aboutMeOracle, new("Tell me a bit about myself")
+            {
+                ["fact1"] = "What is my name?",
+                ["fact2"] = "What do I do for a living?",
+                [TextMemorySkill.RelevanceParam] = ".75"
+            });
+
+            Console.WriteLine("Tell me a bit about myself\n");
+            Console.WriteLine(result);
+
+            /*
+            Approximate Output:
+                Tell me a bit about myself
+
+                My name is Andrea and my family is from New York. I work as a tourist operator.
+            */
+
+            await memorySkill.RemoveAsync(MemoryCollectionName, "info1", null);
+
+            result = await kernel.RunAsync(aboutMeOracle, new("Tell me a bit about myself"));
+
+            Console.WriteLine("Tell me a bit about myself\n");
+            Console.WriteLine(result);
+
+            /*
+            Approximate Output:
+                Tell me a bit about myself
+
+                I'm from a family originally from New York and I work as a tourist operator. I've been living in Seattle since 2005.
+            */
+        }
+    }
+}
diff --git a/LLama.Examples/NewVersion/SemanticKernelPrompt.cs b/LLama.Examples/NewVersion/SemanticKernelPrompt.cs
new file mode 100644
index 00000000..40336b22
--- /dev/null
+++ b/LLama.Examples/NewVersion/SemanticKernelPrompt.cs
@@ -0,0 +1,55 @@
+﻿using System.Reflection.Metadata;
+using System.Security.Cryptography;
+using System.Text;
+using LLama.Abstractions;
+using LLama.Common;
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.AI.ChatCompletion;
+using Microsoft.SemanticKernel.AI.TextCompletion;
+using Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion;
+
+namespace LLama.Examples.NewVersion
+{
+    public class SemanticKernelPrompt
+    {
+        public static async Task Run()
+        {
+            Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example17_ChatGPT.cs");
+            Console.Write("Please input your model path: ");
+            var modelPath = Console.ReadLine();
+
+            // Load weights into memory
+            var parameters = new ModelParams(modelPath)
+            {
+                Seed = RandomNumberGenerator.GetInt32(int.MaxValue),
+            };
+            using var model = LLamaWeights.LoadFromFile(parameters);
+            var ex = new StatelessExecutor(model, parameters);
+
+            var builder = new KernelBuilder();
+            builder.WithAIService<ITextCompletion>("local-llama", new LLamaSharpTextCompletion(ex), true);
+
+            var kernel = builder.Build();
+
+            var prompt = @"{{$input}}
+
+One line TLDR with the fewest words.";
+
+            var summarize = kernel.CreateSemanticFunction(prompt, maxTokens: 100);
+
+            string text1 = @"
+1st Law of Thermodynamics - Energy cannot be created or destroyed.
+2nd Law of Thermodynamics - For a spontaneous process, the entropy of the universe increases.
+3rd Law of Thermodynamics - A perfect crystal at zero Kelvin has zero entropy.";
+
+            string text2 = @"
+1. An object at rest remains at rest, and an object in motion remains in motion at constant speed and in a straight line unless acted on by an unbalanced force.
+2. The acceleration of an object depends on the mass of the object and the amount of force applied.
+3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first.";
+
+            Console.WriteLine(await summarize.InvokeAsync(text1));
+
+            Console.WriteLine(await summarize.InvokeAsync(text2));
+        }
+    }
+}
diff --git a/LLama.Examples/NewVersion/TestRunner.cs b/LLama.Examples/NewVersion/TestRunner.cs
index f5a10ef4..c8a7bd31 100644
--- a/LLama.Examples/NewVersion/TestRunner.cs
+++ b/LLama.Examples/NewVersion/TestRunner.cs
@@ -8,7 +8,7 @@
 
             Console.WriteLine("Please input a number to choose an example to run:");
             Console.WriteLine("0: Run a chat session without stripping the role names.");
-            Console.WriteLine("1: Run a chat session with the role names strippped.");
+            Console.WriteLine("1: Run a chat session with the role names stripped.");
             Console.WriteLine("2: Interactive mode chat by using executor.");
             Console.WriteLine("3: Instruct mode chat by using executor.");
             Console.WriteLine("4: Stateless mode chat by using executor.");
@@ -18,6 +18,9 @@
             Console.WriteLine("8: Quantize the model.");
             Console.WriteLine("9: Automatic conversation.");
             Console.WriteLine("10: Constrain response to json format using grammar.");
+            Console.WriteLine("11: Semantic Kernel Prompt.");
+            Console.WriteLine("12: Semantic Kernel Chat.");
+            Console.WriteLine("13: Semantic Kernel Memory Skill.");
 
             while (true)
             {
@@ -68,6 +71,18 @@
                 {
                     GrammarJsonResponse.Run();
                 }
+                else if (choice == 11)
+                {
+                    await SemanticKernelPrompt.Run();
+                }
+                else if (choice == 12)
+                {
+                    await SemanticKernelChat.Run();
+                }
+                else if (choice == 13)
+                {
+                    await SemanticKernelMemorySkill.Run();
+                }
                 else
                 {
                     Console.WriteLine("Cannot parse your choice. Please select again.");
diff --git a/LLama.SemanticKernel/ChatCompletion/HistoryTransform.cs b/LLama.SemanticKernel/ChatCompletion/HistoryTransform.cs
new file mode 100644
index 00000000..1b72d89e
--- /dev/null
+++ b/LLama.SemanticKernel/ChatCompletion/HistoryTransform.cs
@@ -0,0 +1,17 @@
+﻿using static LLama.LLamaTransforms;
+
+namespace Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
+
+/// <summary>
+/// Default HistoryTransform Patch
+/// </summary>
+public class HistoryTransform : DefaultHistoryTransform
+{
+    /// <inheritdoc/>
+    public override string HistoryToText(global::LLama.Common.ChatHistory history)
+    {
+        var prompt = base.HistoryToText(history);
+        return prompt + "\nAssistant:";
+
+    }
+}
diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs
new file mode 100644
index 00000000..51dee59e
--- /dev/null
+++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs
@@ -0,0 +1,74 @@
+﻿using LLama;
+using Microsoft.SemanticKernel.AI.ChatCompletion;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
+
+/// <summary>
+/// LLamaSharp ChatCompletion
+/// </summary>
+public sealed class LLamaSharpChatCompletion : IChatCompletion
+{
+    private const string UserRole = "user:";
+    private const string AssistantRole = "assistant:";
+    private ChatSession session;
+
+    public LLamaSharpChatCompletion(InteractiveExecutor model)
+    {
+        this.session = new ChatSession(model)
+            .WithHistoryTransform(new HistoryTransform())
+            .WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { UserRole, AssistantRole }));
+    }
+
+    /// <inheritdoc/>
+    public ChatHistory CreateNewChat(string? instructions = "")
+    {
+        var history = new ChatHistory();
+
+        if (instructions != null && !string.IsNullOrEmpty(instructions))
+        {
+            history.AddSystemMessage(instructions);
+        }
+
+        return history;
+    }
+
+    /// <inheritdoc/>
+    public async Task<IReadOnlyList<IChatResult>> GetChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, CancellationToken cancellationToken = default)
+    {
+        requestSettings ??= new ChatRequestSettings()
+        {
+            MaxTokens = 256,
+            Temperature = 0,
+            TopP = 0,
+            StopSequences = new List<string> { }
+        };
+
+        var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
+
+        return new List<IChatResult> { new LLamaSharpChatResult(result) }.AsReadOnly();
+    }
+
+    /// <inheritdoc/>
+    public async IAsyncEnumerable<IChatStreamingResult> GetStreamingChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+    {
+        requestSettings ??= new ChatRequestSettings()
+        {
+            MaxTokens = 256,
+            Temperature = 0,
+            TopP = 0,
+            StopSequences = new List<string> { }
+        };
+
+        var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
+
+        yield return new LLamaSharpChatResult(result);
+    }
+}
diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs
new file mode 100644
index 00000000..a10314fe
--- /dev/null
+++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs
@@ -0,0 +1,14 @@
+﻿using Microsoft.SemanticKernel.AI.ChatCompletion;
+
+namespace Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
+
+/// <summary>
+/// LLamaSharp Chat Message
+/// </summary>
+public class LLamaSharpChatMessage : ChatMessageBase
+{
+    /// <inheritdoc/>
+    public LLamaSharpChatMessage(AuthorRole role, string content) : base(role, content)
+    {
+    }
+}
diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs
new file mode 100644
index 00000000..8a8b2ef3
--- /dev/null
+++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs
@@ -0,0 +1,38 @@
+﻿using Microsoft.SemanticKernel.AI.ChatCompletion;
+using System.Runtime.CompilerServices;
+using System.Text;
+
+namespace Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
+
+internal sealed class LLamaSharpChatResult : IChatStreamingResult
+{
+    private readonly IAsyncEnumerable<string> _stream;
+
+    /// <summary>
+    /// 
+    /// </summary>
+    /// <param name="stream"></param>
+    public LLamaSharpChatResult(IAsyncEnumerable<string> stream)
+    {
+        _stream = stream;
+    }
+    /// <inheritdoc/>
+    public async Task<ChatMessageBase> GetChatMessageAsync(CancellationToken cancellationToken = default)
+    {
+        var sb = new StringBuilder();
+        await foreach (var token in _stream)
+        {
+            sb.Append(token);
+        }
+        return await Task.FromResult(new LLamaSharpChatMessage(AuthorRole.Assistant, sb.ToString())).ConfigureAwait(false);
+    }
+
+    /// <inheritdoc/>
+    public async IAsyncEnumerable<ChatMessageBase> GetStreamingChatMessageAsync([EnumeratorCancellation] CancellationToken cancellationToken = default)
+    {
+        await foreach (var token in _stream)
+        {
+            yield return new LLamaSharpChatMessage(AuthorRole.Assistant, token);
+        }
+    }
+}
diff --git a/LLama.SemanticKernel/ExtensionMethods.cs b/LLama.SemanticKernel/ExtensionMethods.cs
new file mode 100644
index 00000000..ebfc1c37
--- /dev/null
+++ b/LLama.SemanticKernel/ExtensionMethods.cs
@@ -0,0 +1,72 @@
+﻿using Microsoft.SemanticKernel.AI.ChatCompletion;
+using Microsoft.SemanticKernel.AI.TextCompletion;
+
+namespace Microsoft.SemanticKernel.Connectors.AI.LLama;
+
+internal static class ExtensionMethods
+{
+    internal static global::LLama.Common.ChatHistory ToLLamaSharpChatHistory(this ChatHistory chatHistory)
+    {
+        if (chatHistory is null)
+        {
+            throw new ArgumentNullException(nameof(chatHistory));
+        }
+
+        var history = new global::LLama.Common.ChatHistory();
+
+        foreach (var chat in chatHistory)
+        {
+            var role = Enum.TryParse<global::LLama.Common.AuthorRole>(chat.Role.Label, out var _role) ? _role : global::LLama.Common.AuthorRole.Unknown;
+            history.AddMessage(role, chat.Content);
+        }
+
+        return history;
+    }
+
+    /// <summary>
+    /// Convert ChatRequestSettings to LLamaSharp InferenceParams
+    /// </summary>
+    /// <param name="requestSettings"></param>
+    /// <returns></returns>
+    internal static global::LLama.Common.InferenceParams ToLLamaSharpInferenceParams(this ChatRequestSettings requestSettings)
+    {
+        if (requestSettings is null)
+        {
+            throw new ArgumentNullException(nameof(requestSettings));
+        }
+
+        var antiPrompts = new List<string>(requestSettings.StopSequences) { AuthorRole.User.ToString() + ":" };
+        return new global::LLama.Common.InferenceParams
+        {
+            Temperature = (float)requestSettings.Temperature,
+            TopP = (float)requestSettings.TopP,
+            PresencePenalty = (float)requestSettings.PresencePenalty,
+            FrequencyPenalty = (float)requestSettings.FrequencyPenalty,
+            AntiPrompts = antiPrompts,
+            MaxTokens = requestSettings.MaxTokens ?? -1
+        };
+    }
+
+    /// <summary>
+    /// Convert CompleteRequestSettings to LLamaSharp InferenceParams
+    /// </summary>
+    /// <param name="requestSettings"></param>
+    /// <returns></returns>
+    internal static global::LLama.Common.InferenceParams ToLLamaSharpInferenceParams(this CompleteRequestSettings requestSettings)
+    {
+        if (requestSettings is null)
+        {
+            throw new ArgumentNullException(nameof(requestSettings));
+        }
+
+        return new global::LLama.Common.InferenceParams
+        {
+            Temperature = (float)requestSettings.Temperature,
+            TopP = (float)requestSettings.TopP,
+            PresencePenalty = (float)requestSettings.PresencePenalty,
+            FrequencyPenalty = (float)requestSettings.FrequencyPenalty,
+            AntiPrompts = requestSettings.StopSequences,
+            MaxTokens = requestSettings.MaxTokens ?? -1
+        };
+    }
+}
diff --git a/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj
new file mode 100644
index 00000000..7b2a0780
--- /dev/null
+++ b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj
@@ -0,0 +1,22 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+	<PropertyGroup>
+		<TargetFrameworks>netstandard2.0;net6.0;net7.0</TargetFrameworks>
+		<RootNamespace>Microsoft.SemanticKernel.Connectors.AI.LLama</RootNamespace>
+		<Nullable>enable</Nullable>
+		<LangVersion>10</LangVersion>
+		<Platforms>AnyCPU;x64;Arm64</Platforms>
+		<AllowUnsafeBlocks>True</AllowUnsafeBlocks>
+		<ImplicitUsings>enable</ImplicitUsings>
+		<Nullable>enable</Nullable>
+	</PropertyGroup>
+
+	<ItemGroup>
+	  <PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="0.21.230828.2-preview" />
+	</ItemGroup>
+
+	<ItemGroup>
+		<ProjectReference Include="..\LLama\LLamaSharp.csproj" />
+	</ItemGroup>
+
+</Project>
diff --git a/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs b/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs
new file mode 100644
index 00000000..d6ce9362
--- /dev/null
+++ b/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs
@@ -0,0 +1,27 @@
+﻿using LLama;
+using LLama.Abstractions;
+using Microsoft.SemanticKernel.AI.TextCompletion;
+
+namespace Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion;
+
+public sealed class LLamaSharpTextCompletion : ITextCompletion
+{
+    public ILLamaExecutor executor;
+
+    public LLamaSharpTextCompletion(ILLamaExecutor executor)
+    {
+        this.executor = executor;
+    }
+
+    public async Task<IReadOnlyList<ITextResult>> GetCompletionsAsync(string text, CompleteRequestSettings requestSettings, CancellationToken cancellationToken = default)
+    {
+        var result = executor.InferAsync(text, requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
+        return await Task.FromResult(new List<ITextResult> { new LLamaTextResult(result) }.AsReadOnly()).ConfigureAwait(false);
+    }
+
+    public async IAsyncEnumerable<ITextStreamingResult> GetStreamingCompletionsAsync(string text, CompleteRequestSettings requestSettings, CancellationToken cancellationToken = default)
+    {
+        var result = executor.InferAsync(text, requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
+        yield return new LLamaTextResult(result);
+    }
+}
diff --git a/LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs b/LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs
new file mode 100644
index 00000000..9ff2d6e4
--- /dev/null
+++ b/LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs
@@ -0,0 +1,37 @@
+﻿using Microsoft.SemanticKernel.AI.TextCompletion;
+using Microsoft.SemanticKernel.Orchestration;
+using System.Runtime.CompilerServices;
+using System.Text;
+
+namespace Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion;
+
+internal sealed class LLamaTextResult : ITextStreamingResult
+{
+    private readonly IAsyncEnumerable<string> _text;
+
+    public LLamaTextResult(IAsyncEnumerable<string> text)
+    {
+        _text = text;
+        ModelResult = new(text);
+    }
+
+    public ModelResult ModelResult { get; }
+
+    public async Task<string> GetCompletionAsync(CancellationToken cancellationToken = default)
+    {
+        var sb = new StringBuilder();
+        await foreach (var token in _text)
+        {
+            sb.Append(token);
+        }
+        return await Task.FromResult(sb.ToString()).ConfigureAwait(false);
+    }
+
+    public async IAsyncEnumerable<string> GetCompletionStreamingAsync([EnumeratorCancellation] CancellationToken cancellationToken = default)
+    {
+        await foreach (string word in _text)
+        {
+            yield return word;
+        }
+    }
+}
diff --git a/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs b/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs
new file mode 100644
index 00000000..f2bd2886
--- /dev/null
+++ b/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs
@@ -0,0 +1,21 @@
+﻿using LLama;
+using Microsoft.SemanticKernel.AI.Embeddings;
+
+namespace Microsoft.SemanticKernel.Connectors.AI.LLama.TextEmbedding;
+
+public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGeneration
+{
+    private LLamaEmbedder _embedder;
+
+    public LLamaSharpEmbeddingGeneration(LLamaEmbedder embedder)
+    {
+        _embedder = embedder;
+    }
+
+    /// <inheritdoc/>
+    public async Task<IList<ReadOnlyMemory<float>>> GenerateEmbeddingsAsync(IList<string> data, CancellationToken cancellationToken = default)
+    {
+        var result = data.Select(text => new ReadOnlyMemory<float>(_embedder.GetEmbeddings(text))).ToList();
+        return await Task.FromResult(result).ConfigureAwait(false);
+    }
+}
diff --git a/LLama/LLamaEmbedder.cs b/LLama/LLamaEmbedder.cs
index 5980d17c..5f7e6c12 100644
--- a/LLama/LLamaEmbedder.cs
+++ b/LLama/LLamaEmbedder.cs
@@ -29,6 +29,11 @@ namespace LLama
             _ctx = weights.CreateContext(@params);
         }
 
+        public LLamaEmbedder(LLamaContext ctx)
+        {
+            _ctx = ctx;
+        }
+
         /// <summary>
         /// Get the embeddings of the text.
         /// </summary>
diff --git a/LLama/OldVersion/LLamaEmbedder.cs b/LLama/OldVersion/LLamaEmbedder.cs
index 7b6aedb6..662aa61a 100644
--- a/LLama/OldVersion/LLamaEmbedder.cs
+++ b/LLama/OldVersion/LLamaEmbedder.cs
@@ -54,6 +54,7 @@ namespace LLama.OldVersion
 
             int n_embed = NativeApi.llama_n_embd(_ctx);
             var embeddings = NativeApi.llama_get_embeddings(_ctx);
+
             if (embeddings == null)
             {
                 return new float[0];
diff --git a/LLamaSharp.sln b/LLamaSharp.sln
index 2e00196c..2a039d41 100644
--- a/LLamaSharp.sln
+++ b/LLamaSharp.sln
@@ -11,7 +11,9 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLamaSharp", "LLama\LLamaSh
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLama.WebAPI", "LLama.WebAPI\LLama.WebAPI.csproj", "{D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}"
 EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LLama.Web", "LLama.Web\LLama.Web.csproj", "{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLama.Web", "LLama.Web\LLama.Web.csproj", "{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLamaSharp.SemanticKernel", "LLama.SemanticKernel\LLamaSharp.SemanticKernel.csproj", "{D98F93E3-B344-4F9D-86BB-FDBF6768B587}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -83,6 +85,18 @@ Global
 		{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|Any CPU.Build.0 = Release|Any CPU
 		{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|x64.ActiveCfg = Release|Any CPU
 		{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|x64.Build.0 = Release|Any CPU
+		{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|x64.ActiveCfg = Debug|Any CPU
+		{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|x64.Build.0 = Debug|Any CPU
+		{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|Any CPU.ActiveCfg = Debug|Any CPU
+		{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|Any CPU.Build.0 = Debug|Any CPU
+		{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|x64.ActiveCfg = Debug|Any CPU
+		{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|x64.Build.0 = Debug|Any CPU
+		{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|Any CPU.Build.0 = Release|Any CPU
+		{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|x64.ActiveCfg = Release|Any CPU
+		{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|x64.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE