diff --git a/LLama.Examples/LLama.Examples.csproj b/LLama.Examples/LLama.Examples.csproj
index 6a1685ed..a8abe3ae 100644
--- a/LLama.Examples/LLama.Examples.csproj
+++ b/LLama.Examples/LLama.Examples.csproj
@@ -27,6 +27,11 @@
+
+
+
+
+
diff --git a/LLama.Examples/NewVersion/SemanticKernelChat.cs b/LLama.Examples/NewVersion/SemanticKernelChat.cs
new file mode 100644
index 00000000..feca8d7f
--- /dev/null
+++ b/LLama.Examples/NewVersion/SemanticKernelChat.cs
@@ -0,0 +1,72 @@
+using System.Reflection.Metadata;
+using System.Security.Cryptography;
+using System.Text;
+using LLama.Abstractions;
+using LLama.Common;
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.AI.ChatCompletion;
+using Microsoft.SemanticKernel.AI.TextCompletion;
+using Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
+using Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion;
+
+namespace LLama.Examples.NewVersion
+{
+ public class SemanticKernelChat
+ {
+ public static async Task Run()
+ {
+ Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md");
+ Console.Write("Please input your model path: ");
+ var modelPath = Console.ReadLine();
+
+ // Load weights into memory
+ var parameters = new ModelParams(modelPath)
+ {
+ Seed = RandomNumberGenerator.GetInt32(int.MaxValue),
+ };
+ using var model = LLamaWeights.LoadFromFile(parameters);
+ using var context = model.CreateContext(parameters);
+ var ex = new InteractiveExecutor(context);
+ //var builder = new KernelBuilder();
+ //builder.WithAIService("local-llama", new LLamaSharpChatCompletion(ex), true);
+ //var kernel = builder.Build();
+
+ var chatGPT = new LLamaSharpChatCompletion(ex);
+
+ var chatHistory = chatGPT.CreateNewChat("You are a librarian, expert about books");
+
+ Console.WriteLine("Chat content:");
+ Console.WriteLine("------------------------");
+
+ chatHistory.AddUserMessage("Hi, I'm looking for book suggestions");
+ await MessageOutputAsync(chatHistory);
+
+ // First bot assistant message
+ string reply = await chatGPT.GenerateMessageAsync(chatHistory);
+ chatHistory.AddAssistantMessage(reply);
+ await MessageOutputAsync(chatHistory);
+
+ // Second user message
+ chatHistory.AddUserMessage("I love history and philosophy, I'd like to learn something new about Greece, any suggestion");
+ await MessageOutputAsync(chatHistory);
+
+ // Second bot assistant message
+ reply = await chatGPT.GenerateMessageAsync(chatHistory);
+ chatHistory.AddAssistantMessage(reply);
+ await MessageOutputAsync(chatHistory);
+ }
+
+ ///
+ /// Outputs the last message of the chat history
+ ///
+ private static Task MessageOutputAsync(Microsoft.SemanticKernel.AI.ChatCompletion.ChatHistory chatHistory)
+ {
+ var message = chatHistory.Messages.Last();
+
+ Console.WriteLine($"{message.Role}: {message.Content}");
+ Console.WriteLine("------------------------");
+
+ return Task.CompletedTask;
+ }
+ }
+}
diff --git a/LLama.Examples/NewVersion/SemanticKernelMemorySkill.cs b/LLama.Examples/NewVersion/SemanticKernelMemorySkill.cs
new file mode 100644
index 00000000..df22d9eb
--- /dev/null
+++ b/LLama.Examples/NewVersion/SemanticKernelMemorySkill.cs
@@ -0,0 +1,173 @@
+using System.Reflection.Metadata;
+using System.Security.Cryptography;
+using System.Text;
+using LLama.Abstractions;
+using LLama.Common;
+using Microsoft.Extensions.Logging;
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.AI.ChatCompletion;
+using Microsoft.SemanticKernel.AI.Embeddings;
+using Microsoft.SemanticKernel.AI.TextCompletion;
+using Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
+using Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion;
+using Microsoft.SemanticKernel.Connectors.AI.LLama.TextEmbedding;
+using Microsoft.SemanticKernel.Memory;
+using Microsoft.SemanticKernel.Skills.Core;
+
+namespace LLama.Examples.NewVersion
+{
+ public class SemanticKernelMemorySkill
+ {
+ private const string MemoryCollectionName = "aboutMe";
+
+ public static async Task Run()
+ {
+ Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example15_MemorySkill.cs");
+ Console.Write("Please input your model path: ");
+ var modelPath = Console.ReadLine();
+
+ // Load weights into memory
+ var parameters = new ModelParams(modelPath)
+ {
+ Seed = RandomNumberGenerator.GetInt32(int.MaxValue),
+ };
+ using var model = LLamaWeights.LoadFromFile(parameters);
+ using var context = model.CreateContext(parameters);
+ var ex = new InteractiveExecutor(context);
+ var ex2 = new StatelessExecutor(model, parameters);
+ var builder = new KernelBuilder();
+
+ var embedding = new LLamaEmbedder(context);
+
+ builder.WithAIService("local-llama", new LLamaSharpChatCompletion(ex), true);
+ builder.WithAIService("local-llama-text", new LLamaSharpTextCompletion(ex), true);
+ builder.WithAIService("local-llama-embed", new LLamaSharpEmbeddingGeneration(embedding), true);
+ builder.WithMemoryStorage(new VolatileMemoryStore());
+ var kernel = builder.Build();
+ // ========= Store memories using the kernel =========
+
+ await kernel.Memory.SaveInformationAsync(MemoryCollectionName, id: "info1", text: "My name is Andrea");
+ await kernel.Memory.SaveInformationAsync(MemoryCollectionName, id: "info2", text: "I work as a tourist operator");
+ await kernel.Memory.SaveInformationAsync(MemoryCollectionName, id: "info3", text: "I've been living in Seattle since 2005");
+ await kernel.Memory.SaveInformationAsync(MemoryCollectionName, id: "info4", text: "I visited France and Italy five times since 2015");
+
+ // ========= Store memories using semantic function =========
+
+ // Add Memory as a skill for other functions
+ var memorySkill = new TextMemorySkill(kernel.Memory);
+ kernel.ImportSkill(memorySkill);
+
+ // Build a semantic function that saves info to memory
+ const string SaveFunctionDefinition = "{{save $info}}";
+ var memorySaver = kernel.CreateSemanticFunction(SaveFunctionDefinition);
+
+ await kernel.RunAsync(memorySaver, new()
+ {
+ [TextMemorySkill.CollectionParam] = MemoryCollectionName,
+ [TextMemorySkill.KeyParam] = "info5",
+ ["info"] = "My family is from New York"
+ });
+
+ // ========= Test memory remember =========
+ Console.WriteLine("========= Example: Recalling a Memory =========");
+
+ var answer = await memorySkill.RetrieveAsync(MemoryCollectionName, "info1", null);
+ Console.WriteLine("Memory associated with 'info1': {0}", answer);
+ /*
+ Output:
+ "Memory associated with 'info1': My name is Andrea
+ */
+
+ // ========= Test memory recall =========
+ Console.WriteLine("========= Example: Recalling an Idea =========");
+
+ answer = await memorySkill.RecallAsync("where did I grow up?", MemoryCollectionName, relevance: null, limit: 2, null);
+ Console.WriteLine("Ask: where did I grow up?");
+ Console.WriteLine("Answer:\n{0}", answer);
+
+ answer = await memorySkill.RecallAsync("where do I live?", MemoryCollectionName, relevance: null, limit: 2, null);
+ Console.WriteLine("Ask: where do I live?");
+ Console.WriteLine("Answer:\n{0}", answer);
+
+ /*
+ Output:
+
+ Ask: where did I grow up?
+ Answer:
+ ["My family is from New York","I\u0027ve been living in Seattle since 2005"]
+
+ Ask: where do I live?
+ Answer:
+ ["I\u0027ve been living in Seattle since 2005","My family is from New York"]
+ */
+
+ // ========= Use memory in a semantic function =========
+ Console.WriteLine("========= Example: Using Recall in a Semantic Function =========");
+
+ // Build a semantic function that uses memory to find facts
+ const string RecallFunctionDefinition = @"
+Consider only the facts below when answering questions.
+
+About me: {{recall 'where did I grow up?'}}
+About me: {{recall 'where do I live?'}}
+
+Question: {{$input}}
+
+Answer:
+";
+
+ var aboutMeOracle = kernel.CreateSemanticFunction(RecallFunctionDefinition, maxTokens: 100);
+
+ var result = await kernel.RunAsync(aboutMeOracle, new("Do I live in the same town where I grew up?")
+ {
+ [TextMemorySkill.CollectionParam] = MemoryCollectionName,
+ [TextMemorySkill.RelevanceParam] = "0.8"
+ });
+
+ Console.WriteLine("Do I live in the same town where I grew up?\n");
+ Console.WriteLine(result);
+
+ /*
+ Output:
+
+ Do I live in the same town where I grew up?
+
+ No, I do not live in the same town where I grew up since my family is from New York and I have been living in Seattle since 2005.
+ */
+
+ // ========= Remove a memory =========
+ Console.WriteLine("========= Example: Forgetting a Memory =========");
+
+ result = await kernel.RunAsync(aboutMeOracle, new("Tell me a bit about myself")
+ {
+ ["fact1"] = "What is my name?",
+ ["fact2"] = "What do I do for a living?",
+ [TextMemorySkill.RelevanceParam] = ".75"
+ });
+
+ Console.WriteLine("Tell me a bit about myself\n");
+ Console.WriteLine(result);
+
+ /*
+ Approximate Output:
+ Tell me a bit about myself
+
+ My name is Andrea and my family is from New York. I work as a tourist operator.
+ */
+
+ await memorySkill.RemoveAsync(MemoryCollectionName, "info1", null);
+
+ result = await kernel.RunAsync(aboutMeOracle, new("Tell me a bit about myself"));
+
+ Console.WriteLine("Tell me a bit about myself\n");
+ Console.WriteLine(result);
+
+ /*
+ Approximate Output:
+ Tell me a bit about myself
+
+ I'm from a family originally from New York and I work as a tourist operator. I've been living in Seattle since 2005.
+ */
+ }
+ }
+}
diff --git a/LLama.Examples/NewVersion/SemanticKernelPrompt.cs b/LLama.Examples/NewVersion/SemanticKernelPrompt.cs
new file mode 100644
index 00000000..40336b22
--- /dev/null
+++ b/LLama.Examples/NewVersion/SemanticKernelPrompt.cs
@@ -0,0 +1,55 @@
+using System.Reflection.Metadata;
+using System.Security.Cryptography;
+using System.Text;
+using LLama.Abstractions;
+using LLama.Common;
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.AI.ChatCompletion;
+using Microsoft.SemanticKernel.AI.TextCompletion;
+using Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion;
+
+namespace LLama.Examples.NewVersion
+{
+ public class SemanticKernelPrompt
+ {
+ public static async Task Run()
+ {
+ Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example17_ChatGPT.cs");
+ Console.Write("Please input your model path: ");
+ var modelPath = Console.ReadLine();
+
+ // Load weights into memory
+ var parameters = new ModelParams(modelPath)
+ {
+ Seed = RandomNumberGenerator.GetInt32(int.MaxValue),
+ };
+ using var model = LLamaWeights.LoadFromFile(parameters);
+ var ex = new StatelessExecutor(model, parameters);
+
+ var builder = new KernelBuilder();
+ builder.WithAIService("local-llama", new LLamaSharpTextCompletion(ex), true);
+
+ var kernel = builder.Build();
+
+ var prompt = @"{{$input}}
+
+One line TLDR with the fewest words.";
+
+ var summarize = kernel.CreateSemanticFunction(prompt, maxTokens: 100);
+
+ string text1 = @"
+1st Law of Thermodynamics - Energy cannot be created or destroyed.
+2nd Law of Thermodynamics - For a spontaneous process, the entropy of the universe increases.
+3rd Law of Thermodynamics - A perfect crystal at zero Kelvin has zero entropy.";
+
+ string text2 = @"
+1. An object at rest remains at rest, and an object in motion remains in motion at constant speed and in a straight line unless acted on by an unbalanced force.
+2. The acceleration of an object depends on the mass of the object and the amount of force applied.
+3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first.";
+
+ Console.WriteLine(await summarize.InvokeAsync(text1));
+
+ Console.WriteLine(await summarize.InvokeAsync(text2));
+ }
+ }
+}
diff --git a/LLama.Examples/NewVersion/TestRunner.cs b/LLama.Examples/NewVersion/TestRunner.cs
index f5a10ef4..c8a7bd31 100644
--- a/LLama.Examples/NewVersion/TestRunner.cs
+++ b/LLama.Examples/NewVersion/TestRunner.cs
@@ -8,7 +8,7 @@
Console.WriteLine("Please input a number to choose an example to run:");
Console.WriteLine("0: Run a chat session without stripping the role names.");
- Console.WriteLine("1: Run a chat session with the role names strippped.");
+ Console.WriteLine("1: Run a chat session with the role names stripped.");
Console.WriteLine("2: Interactive mode chat by using executor.");
Console.WriteLine("3: Instruct mode chat by using executor.");
Console.WriteLine("4: Stateless mode chat by using executor.");
@@ -18,6 +18,9 @@
Console.WriteLine("8: Quantize the model.");
Console.WriteLine("9: Automatic conversation.");
Console.WriteLine("10: Constrain response to json format using grammar.");
+ Console.WriteLine("11: Semantic Kernel Prompt.");
+ Console.WriteLine("12: Semantic Kernel Chat.");
+ Console.WriteLine("13: Semantic Kernel Memory Skill.");
while (true)
{
@@ -68,6 +71,18 @@
{
GrammarJsonResponse.Run();
}
+ else if (choice == 11)
+ {
+ await SemanticKernelPrompt.Run();
+ }
+ else if (choice == 12)
+ {
+ await SemanticKernelChat.Run();
+ }
+ else if (choice == 13)
+ {
+ await SemanticKernelMemorySkill.Run();
+ }
else
{
Console.WriteLine("Cannot parse your choice. Please select again.");
diff --git a/LLama.SemanticKernel/ChatCompletion/HistoryTransform.cs b/LLama.SemanticKernel/ChatCompletion/HistoryTransform.cs
new file mode 100644
index 00000000..1b72d89e
--- /dev/null
+++ b/LLama.SemanticKernel/ChatCompletion/HistoryTransform.cs
@@ -0,0 +1,17 @@
+using static LLama.LLamaTransforms;
+
+namespace Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
+
+///
+/// Default HistoryTransform Patch
+///
+public class HistoryTransform : DefaultHistoryTransform
+{
+ ///
+ public override string HistoryToText(global::LLama.Common.ChatHistory history)
+ {
+ var prompt = base.HistoryToText(history);
+ return prompt + "\nAssistant:";
+
+ }
+}
diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs
new file mode 100644
index 00000000..51dee59e
--- /dev/null
+++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs
@@ -0,0 +1,74 @@
+using LLama;
+using Microsoft.SemanticKernel.AI.ChatCompletion;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
+
+///
+/// LLamaSharp ChatCompletion
+///
+public sealed class LLamaSharpChatCompletion : IChatCompletion
+{
+ private const string UserRole = "user:";
+ private const string AssistantRole = "assistant:";
+ private ChatSession session;
+
+ public LLamaSharpChatCompletion(InteractiveExecutor model)
+ {
+ this.session = new ChatSession(model)
+ .WithHistoryTransform(new HistoryTransform())
+ .WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { UserRole, AssistantRole }));
+ }
+
+ ///
+ public ChatHistory CreateNewChat(string? instructions = "")
+ {
+ var history = new ChatHistory();
+
+ if (instructions != null && !string.IsNullOrEmpty(instructions))
+ {
+ history.AddSystemMessage(instructions);
+ }
+
+ return history;
+ }
+
+ ///
+ public async Task> GetChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, CancellationToken cancellationToken = default)
+ {
+ requestSettings ??= new ChatRequestSettings()
+ {
+ MaxTokens = 256,
+ Temperature = 0,
+ TopP = 0,
+ StopSequences = new List { }
+ };
+
+ var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
+
+ return new List { new LLamaSharpChatResult(result) }.AsReadOnly();
+ }
+
+ ///
+ public async IAsyncEnumerable GetStreamingChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+ {
+ requestSettings ??= new ChatRequestSettings()
+ {
+ MaxTokens = 256,
+ Temperature = 0,
+ TopP = 0,
+ StopSequences = new List { }
+ };
+
+ var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
+
+ yield return new LLamaSharpChatResult(result);
+ }
+}
diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs
new file mode 100644
index 00000000..a10314fe
--- /dev/null
+++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs
@@ -0,0 +1,14 @@
+using Microsoft.SemanticKernel.AI.ChatCompletion;
+
+namespace Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
+
+///
+/// LLamaSharp Chat Message
+///
+public class LLamaSharpChatMessage : ChatMessageBase
+{
+ ///
+ public LLamaSharpChatMessage(AuthorRole role, string content) : base(role, content)
+ {
+ }
+}
diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs
new file mode 100644
index 00000000..8a8b2ef3
--- /dev/null
+++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs
@@ -0,0 +1,38 @@
+using Microsoft.SemanticKernel.AI.ChatCompletion;
+using System.Runtime.CompilerServices;
+using System.Text;
+
+namespace Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
+
+internal sealed class LLamaSharpChatResult : IChatStreamingResult
+{
+ private readonly IAsyncEnumerable _stream;
+
+ ///
+ ///
+ ///
+ ///
+ public LLamaSharpChatResult(IAsyncEnumerable stream)
+ {
+ _stream = stream;
+ }
+ ///
+ public async Task GetChatMessageAsync(CancellationToken cancellationToken = default)
+ {
+ var sb = new StringBuilder();
+ await foreach (var token in _stream)
+ {
+ sb.Append(token);
+ }
+ return await Task.FromResult(new LLamaSharpChatMessage(AuthorRole.Assistant, sb.ToString())).ConfigureAwait(false);
+ }
+
+ ///
+ public async IAsyncEnumerable GetStreamingChatMessageAsync([EnumeratorCancellation] CancellationToken cancellationToken = default)
+ {
+ await foreach (var token in _stream)
+ {
+ yield return new LLamaSharpChatMessage(AuthorRole.Assistant, token);
+ }
+ }
+}
diff --git a/LLama.SemanticKernel/ExtensionMethods.cs b/LLama.SemanticKernel/ExtensionMethods.cs
new file mode 100644
index 00000000..ebfc1c37
--- /dev/null
+++ b/LLama.SemanticKernel/ExtensionMethods.cs
@@ -0,0 +1,72 @@
+using Microsoft.SemanticKernel.AI.ChatCompletion;
+using Microsoft.SemanticKernel.AI.TextCompletion;
+
+namespace Microsoft.SemanticKernel.Connectors.AI.LLama;
+
+internal static class ExtensionMethods
+{
+ internal static global::LLama.Common.ChatHistory ToLLamaSharpChatHistory(this ChatHistory chatHistory)
+ {
+ if (chatHistory is null)
+ {
+ throw new ArgumentNullException(nameof(chatHistory));
+ }
+
+ var history = new global::LLama.Common.ChatHistory();
+
+ foreach (var chat in chatHistory)
+ {
+ var role = Enum.TryParse(chat.Role.Label, out var _role) ? _role : global::LLama.Common.AuthorRole.Unknown;
+ history.AddMessage(role, chat.Content);
+ }
+
+ return history;
+ }
+
+ ///
+ /// Convert ChatRequestSettings to LLamaSharp InferenceParams
+ ///
+ ///
+ ///
+ internal static global::LLama.Common.InferenceParams ToLLamaSharpInferenceParams(this ChatRequestSettings requestSettings)
+ {
+ if (requestSettings is null)
+ {
+ throw new ArgumentNullException(nameof(requestSettings));
+ }
+
+ var antiPrompts = new List(requestSettings.StopSequences) { AuthorRole.User.ToString() + ":" };
+ return new global::LLama.Common.InferenceParams
+ {
+ Temperature = (float)requestSettings.Temperature,
+ TopP = (float)requestSettings.TopP,
+ PresencePenalty = (float)requestSettings.PresencePenalty,
+ FrequencyPenalty = (float)requestSettings.FrequencyPenalty,
+ AntiPrompts = antiPrompts,
+ MaxTokens = requestSettings.MaxTokens ?? -1
+ };
+ }
+
+ ///
+ /// Convert CompleteRequestSettings to LLamaSharp InferenceParams
+ ///
+ ///
+ ///
+ internal static global::LLama.Common.InferenceParams ToLLamaSharpInferenceParams(this CompleteRequestSettings requestSettings)
+ {
+ if (requestSettings is null)
+ {
+ throw new ArgumentNullException(nameof(requestSettings));
+ }
+
+ return new global::LLama.Common.InferenceParams
+ {
+ Temperature = (float)requestSettings.Temperature,
+ TopP = (float)requestSettings.TopP,
+ PresencePenalty = (float)requestSettings.PresencePenalty,
+ FrequencyPenalty = (float)requestSettings.FrequencyPenalty,
+ AntiPrompts = requestSettings.StopSequences,
+ MaxTokens = requestSettings.MaxTokens ?? -1
+ };
+ }
+}
diff --git a/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj
new file mode 100644
index 00000000..7b2a0780
--- /dev/null
+++ b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj
@@ -0,0 +1,22 @@
+
+
+
+ netstandard2.0;net6.0;net7.0
+ Microsoft.SemanticKernel.Connectors.AI.LLama
+ enable
+ 10
+ AnyCPU;x64;Arm64
+ True
+ enable
+ enable
+
+
+
+
+
+
+
+
+
+
+
diff --git a/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs b/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs
new file mode 100644
index 00000000..d6ce9362
--- /dev/null
+++ b/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs
@@ -0,0 +1,27 @@
+using LLama;
+using LLama.Abstractions;
+using Microsoft.SemanticKernel.AI.TextCompletion;
+
+namespace Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion;
+
+public sealed class LLamaSharpTextCompletion : ITextCompletion
+{
+ public ILLamaExecutor executor;
+
+ public LLamaSharpTextCompletion(ILLamaExecutor executor)
+ {
+ this.executor = executor;
+ }
+
+ public async Task> GetCompletionsAsync(string text, CompleteRequestSettings requestSettings, CancellationToken cancellationToken = default)
+ {
+ var result = executor.InferAsync(text, requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
+ return await Task.FromResult(new List { new LLamaTextResult(result) }.AsReadOnly()).ConfigureAwait(false);
+ }
+
+ public async IAsyncEnumerable GetStreamingCompletionsAsync(string text, CompleteRequestSettings requestSettings, CancellationToken cancellationToken = default)
+ {
+ var result = executor.InferAsync(text, requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
+ yield return new LLamaTextResult(result);
+ }
+}
diff --git a/LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs b/LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs
new file mode 100644
index 00000000..9ff2d6e4
--- /dev/null
+++ b/LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs
@@ -0,0 +1,37 @@
+using Microsoft.SemanticKernel.AI.TextCompletion;
+using Microsoft.SemanticKernel.Orchestration;
+using System.Runtime.CompilerServices;
+using System.Text;
+
+namespace Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion;
+
+internal sealed class LLamaTextResult : ITextStreamingResult
+{
+ private readonly IAsyncEnumerable _text;
+
+ public LLamaTextResult(IAsyncEnumerable text)
+ {
+ _text = text;
+ ModelResult = new(text);
+ }
+
+ public ModelResult ModelResult { get; }
+
+ public async Task GetCompletionAsync(CancellationToken cancellationToken = default)
+ {
+ var sb = new StringBuilder();
+ await foreach (var token in _text)
+ {
+ sb.Append(token);
+ }
+ return await Task.FromResult(sb.ToString()).ConfigureAwait(false);
+ }
+
+ public async IAsyncEnumerable GetCompletionStreamingAsync([EnumeratorCancellation] CancellationToken cancellationToken = default)
+ {
+ await foreach (string word in _text)
+ {
+ yield return word;
+ }
+ }
+}
diff --git a/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs b/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs
new file mode 100644
index 00000000..f2bd2886
--- /dev/null
+++ b/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs
@@ -0,0 +1,21 @@
+using LLama;
+using Microsoft.SemanticKernel.AI.Embeddings;
+
+namespace Microsoft.SemanticKernel.Connectors.AI.LLama.TextEmbedding;
+
+public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGeneration
+{
+ private LLamaEmbedder _embedder;
+
+ public LLamaSharpEmbeddingGeneration(LLamaEmbedder embedder)
+ {
+ _embedder = embedder;
+ }
+
+ ///
+ public async Task>> GenerateEmbeddingsAsync(IList data, CancellationToken cancellationToken = default)
+ {
+ var result = data.Select(text => new ReadOnlyMemory(_embedder.GetEmbeddings(text))).ToList();
+ return await Task.FromResult(result).ConfigureAwait(false);
+ }
+}
diff --git a/LLama/LLamaEmbedder.cs b/LLama/LLamaEmbedder.cs
index 5980d17c..5f7e6c12 100644
--- a/LLama/LLamaEmbedder.cs
+++ b/LLama/LLamaEmbedder.cs
@@ -29,6 +29,11 @@ namespace LLama
_ctx = weights.CreateContext(@params);
}
+ public LLamaEmbedder(LLamaContext ctx)
+ {
+ _ctx = ctx;
+ }
+
///
/// Get the embeddings of the text.
///
diff --git a/LLama/OldVersion/LLamaEmbedder.cs b/LLama/OldVersion/LLamaEmbedder.cs
index 7b6aedb6..662aa61a 100644
--- a/LLama/OldVersion/LLamaEmbedder.cs
+++ b/LLama/OldVersion/LLamaEmbedder.cs
@@ -54,6 +54,7 @@ namespace LLama.OldVersion
int n_embed = NativeApi.llama_n_embd(_ctx);
var embeddings = NativeApi.llama_get_embeddings(_ctx);
+
if (embeddings == null)
{
return new float[0];
diff --git a/LLamaSharp.sln b/LLamaSharp.sln
index 2e00196c..2a039d41 100644
--- a/LLamaSharp.sln
+++ b/LLamaSharp.sln
@@ -11,7 +11,9 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLamaSharp", "LLama\LLamaSh
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLama.WebAPI", "LLama.WebAPI\LLama.WebAPI.csproj", "{D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}"
EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LLama.Web", "LLama.Web\LLama.Web.csproj", "{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLama.Web", "LLama.Web\LLama.Web.csproj", "{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLamaSharp.SemanticKernel", "LLama.SemanticKernel\LLamaSharp.SemanticKernel.csproj", "{D98F93E3-B344-4F9D-86BB-FDBF6768B587}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -83,6 +85,18 @@ Global
{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|Any CPU.Build.0 = Release|Any CPU
{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|x64.ActiveCfg = Release|Any CPU
{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|x64.Build.0 = Release|Any CPU
+ {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|x64.ActiveCfg = Debug|Any CPU
+ {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|x64.Build.0 = Debug|Any CPU
+ {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|Any CPU.ActiveCfg = Debug|Any CPU
+ {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|Any CPU.Build.0 = Debug|Any CPU
+ {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|x64.ActiveCfg = Debug|Any CPU
+ {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|x64.Build.0 = Debug|Any CPU
+ {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|Any CPU.Build.0 = Release|Any CPU
+ {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|x64.ActiveCfg = Release|Any CPU
+ {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|x64.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE