Add Semantic Kernel support
This commit is contained in:
parent
40e76a70c5
commit
9a1d6f99f2
|
@ -27,6 +27,11 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
<PackageReference Include="Microsoft.SemanticKernel" Version="0.21.230828.2-preview" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="..\LLama.SemanticKernel\LLamaSharp.SemanticKernel.csproj" />
|
||||||
<ProjectReference Include="..\LLama\LLamaSharp.csproj" />
|
<ProjectReference Include="..\LLama\LLamaSharp.csproj" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,72 @@
|
||||||
|
using System.Reflection.Metadata;
|
||||||
|
using System.Security.Cryptography;
|
||||||
|
using System.Text;
|
||||||
|
using LLama.Abstractions;
|
||||||
|
using LLama.Common;
|
||||||
|
using Microsoft.SemanticKernel;
|
||||||
|
using Microsoft.SemanticKernel.AI.ChatCompletion;
|
||||||
|
using Microsoft.SemanticKernel.AI.TextCompletion;
|
||||||
|
using Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
|
||||||
|
using Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion;
|
||||||
|
|
||||||
|
namespace LLama.Examples.NewVersion
|
||||||
|
{
|
||||||
|
public class SemanticKernelChat
|
||||||
|
{
|
||||||
|
public static async Task Run()
|
||||||
|
{
|
||||||
|
Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md");
|
||||||
|
Console.Write("Please input your model path: ");
|
||||||
|
var modelPath = Console.ReadLine();
|
||||||
|
|
||||||
|
// Load weights into memory
|
||||||
|
var parameters = new ModelParams(modelPath)
|
||||||
|
{
|
||||||
|
Seed = RandomNumberGenerator.GetInt32(int.MaxValue),
|
||||||
|
};
|
||||||
|
using var model = LLamaWeights.LoadFromFile(parameters);
|
||||||
|
using var context = model.CreateContext(parameters);
|
||||||
|
var ex = new InteractiveExecutor(context);
|
||||||
|
//var builder = new KernelBuilder();
|
||||||
|
//builder.WithAIService<IChatCompletion>("local-llama", new LLamaSharpChatCompletion(ex), true);
|
||||||
|
//var kernel = builder.Build();
|
||||||
|
|
||||||
|
var chatGPT = new LLamaSharpChatCompletion(ex);
|
||||||
|
|
||||||
|
var chatHistory = chatGPT.CreateNewChat("You are a librarian, expert about books");
|
||||||
|
|
||||||
|
Console.WriteLine("Chat content:");
|
||||||
|
Console.WriteLine("------------------------");
|
||||||
|
|
||||||
|
chatHistory.AddUserMessage("Hi, I'm looking for book suggestions");
|
||||||
|
await MessageOutputAsync(chatHistory);
|
||||||
|
|
||||||
|
// First bot assistant message
|
||||||
|
string reply = await chatGPT.GenerateMessageAsync(chatHistory);
|
||||||
|
chatHistory.AddAssistantMessage(reply);
|
||||||
|
await MessageOutputAsync(chatHistory);
|
||||||
|
|
||||||
|
// Second user message
|
||||||
|
chatHistory.AddUserMessage("I love history and philosophy, I'd like to learn something new about Greece, any suggestion");
|
||||||
|
await MessageOutputAsync(chatHistory);
|
||||||
|
|
||||||
|
// Second bot assistant message
|
||||||
|
reply = await chatGPT.GenerateMessageAsync(chatHistory);
|
||||||
|
chatHistory.AddAssistantMessage(reply);
|
||||||
|
await MessageOutputAsync(chatHistory);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Outputs the last message of the chat history
|
||||||
|
/// </summary>
|
||||||
|
private static Task MessageOutputAsync(Microsoft.SemanticKernel.AI.ChatCompletion.ChatHistory chatHistory)
|
||||||
|
{
|
||||||
|
var message = chatHistory.Messages.Last();
|
||||||
|
|
||||||
|
Console.WriteLine($"{message.Role}: {message.Content}");
|
||||||
|
Console.WriteLine("------------------------");
|
||||||
|
|
||||||
|
return Task.CompletedTask;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,173 @@
|
||||||
|
using System.Reflection.Metadata;
|
||||||
|
using System.Security.Cryptography;
|
||||||
|
using System.Text;
|
||||||
|
using LLama.Abstractions;
|
||||||
|
using LLama.Common;
|
||||||
|
using Microsoft.Extensions.Logging;
|
||||||
|
using Microsoft.SemanticKernel;
|
||||||
|
using Microsoft.SemanticKernel.AI.ChatCompletion;
|
||||||
|
using Microsoft.SemanticKernel.AI.Embeddings;
|
||||||
|
using Microsoft.SemanticKernel.AI.TextCompletion;
|
||||||
|
using Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
|
||||||
|
using Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion;
|
||||||
|
using Microsoft.SemanticKernel.Connectors.AI.LLama.TextEmbedding;
|
||||||
|
using Microsoft.SemanticKernel.Memory;
|
||||||
|
using Microsoft.SemanticKernel.Skills.Core;
|
||||||
|
|
||||||
|
namespace LLama.Examples.NewVersion
|
||||||
|
{
|
||||||
|
public class SemanticKernelMemorySkill
|
||||||
|
{
|
||||||
|
private const string MemoryCollectionName = "aboutMe";
|
||||||
|
|
||||||
|
public static async Task Run()
|
||||||
|
{
|
||||||
|
Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example15_MemorySkill.cs");
|
||||||
|
Console.Write("Please input your model path: ");
|
||||||
|
var modelPath = Console.ReadLine();
|
||||||
|
|
||||||
|
// Load weights into memory
|
||||||
|
var parameters = new ModelParams(modelPath)
|
||||||
|
{
|
||||||
|
Seed = RandomNumberGenerator.GetInt32(int.MaxValue),
|
||||||
|
};
|
||||||
|
using var model = LLamaWeights.LoadFromFile(parameters);
|
||||||
|
using var context = model.CreateContext(parameters);
|
||||||
|
var ex = new InteractiveExecutor(context);
|
||||||
|
var ex2 = new StatelessExecutor(model, parameters);
|
||||||
|
var builder = new KernelBuilder();
|
||||||
|
|
||||||
|
var embedding = new LLamaEmbedder(context);
|
||||||
|
|
||||||
|
builder.WithAIService<IChatCompletion>("local-llama", new LLamaSharpChatCompletion(ex), true);
|
||||||
|
builder.WithAIService<ITextCompletion>("local-llama-text", new LLamaSharpTextCompletion(ex), true);
|
||||||
|
builder.WithAIService<ITextEmbeddingGeneration>("local-llama-embed", new LLamaSharpEmbeddingGeneration(embedding), true);
|
||||||
|
builder.WithMemoryStorage(new VolatileMemoryStore());
|
||||||
|
var kernel = builder.Build();
|
||||||
|
// ========= Store memories using the kernel =========
|
||||||
|
|
||||||
|
await kernel.Memory.SaveInformationAsync(MemoryCollectionName, id: "info1", text: "My name is Andrea");
|
||||||
|
await kernel.Memory.SaveInformationAsync(MemoryCollectionName, id: "info2", text: "I work as a tourist operator");
|
||||||
|
await kernel.Memory.SaveInformationAsync(MemoryCollectionName, id: "info3", text: "I've been living in Seattle since 2005");
|
||||||
|
await kernel.Memory.SaveInformationAsync(MemoryCollectionName, id: "info4", text: "I visited France and Italy five times since 2015");
|
||||||
|
|
||||||
|
// ========= Store memories using semantic function =========
|
||||||
|
|
||||||
|
// Add Memory as a skill for other functions
|
||||||
|
var memorySkill = new TextMemorySkill(kernel.Memory);
|
||||||
|
kernel.ImportSkill(memorySkill);
|
||||||
|
|
||||||
|
// Build a semantic function that saves info to memory
|
||||||
|
const string SaveFunctionDefinition = "{{save $info}}";
|
||||||
|
var memorySaver = kernel.CreateSemanticFunction(SaveFunctionDefinition);
|
||||||
|
|
||||||
|
await kernel.RunAsync(memorySaver, new()
|
||||||
|
{
|
||||||
|
[TextMemorySkill.CollectionParam] = MemoryCollectionName,
|
||||||
|
[TextMemorySkill.KeyParam] = "info5",
|
||||||
|
["info"] = "My family is from New York"
|
||||||
|
});
|
||||||
|
|
||||||
|
// ========= Test memory remember =========
|
||||||
|
Console.WriteLine("========= Example: Recalling a Memory =========");
|
||||||
|
|
||||||
|
var answer = await memorySkill.RetrieveAsync(MemoryCollectionName, "info1", null);
|
||||||
|
Console.WriteLine("Memory associated with 'info1': {0}", answer);
|
||||||
|
/*
|
||||||
|
Output:
|
||||||
|
"Memory associated with 'info1': My name is Andrea
|
||||||
|
*/
|
||||||
|
|
||||||
|
// ========= Test memory recall =========
|
||||||
|
Console.WriteLine("========= Example: Recalling an Idea =========");
|
||||||
|
|
||||||
|
answer = await memorySkill.RecallAsync("where did I grow up?", MemoryCollectionName, relevance: null, limit: 2, null);
|
||||||
|
Console.WriteLine("Ask: where did I grow up?");
|
||||||
|
Console.WriteLine("Answer:\n{0}", answer);
|
||||||
|
|
||||||
|
answer = await memorySkill.RecallAsync("where do I live?", MemoryCollectionName, relevance: null, limit: 2, null);
|
||||||
|
Console.WriteLine("Ask: where do I live?");
|
||||||
|
Console.WriteLine("Answer:\n{0}", answer);
|
||||||
|
|
||||||
|
/*
|
||||||
|
Output:
|
||||||
|
|
||||||
|
Ask: where did I grow up?
|
||||||
|
Answer:
|
||||||
|
["My family is from New York","I\u0027ve been living in Seattle since 2005"]
|
||||||
|
|
||||||
|
Ask: where do I live?
|
||||||
|
Answer:
|
||||||
|
["I\u0027ve been living in Seattle since 2005","My family is from New York"]
|
||||||
|
*/
|
||||||
|
|
||||||
|
// ========= Use memory in a semantic function =========
|
||||||
|
Console.WriteLine("========= Example: Using Recall in a Semantic Function =========");
|
||||||
|
|
||||||
|
// Build a semantic function that uses memory to find facts
|
||||||
|
const string RecallFunctionDefinition = @"
|
||||||
|
Consider only the facts below when answering questions.
|
||||||
|
|
||||||
|
About me: {{recall 'where did I grow up?'}}
|
||||||
|
About me: {{recall 'where do I live?'}}
|
||||||
|
|
||||||
|
Question: {{$input}}
|
||||||
|
|
||||||
|
Answer:
|
||||||
|
";
|
||||||
|
|
||||||
|
var aboutMeOracle = kernel.CreateSemanticFunction(RecallFunctionDefinition, maxTokens: 100);
|
||||||
|
|
||||||
|
var result = await kernel.RunAsync(aboutMeOracle, new("Do I live in the same town where I grew up?")
|
||||||
|
{
|
||||||
|
[TextMemorySkill.CollectionParam] = MemoryCollectionName,
|
||||||
|
[TextMemorySkill.RelevanceParam] = "0.8"
|
||||||
|
});
|
||||||
|
|
||||||
|
Console.WriteLine("Do I live in the same town where I grew up?\n");
|
||||||
|
Console.WriteLine(result);
|
||||||
|
|
||||||
|
/*
|
||||||
|
Output:
|
||||||
|
|
||||||
|
Do I live in the same town where I grew up?
|
||||||
|
|
||||||
|
No, I do not live in the same town where I grew up since my family is from New York and I have been living in Seattle since 2005.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// ========= Remove a memory =========
|
||||||
|
Console.WriteLine("========= Example: Forgetting a Memory =========");
|
||||||
|
|
||||||
|
result = await kernel.RunAsync(aboutMeOracle, new("Tell me a bit about myself")
|
||||||
|
{
|
||||||
|
["fact1"] = "What is my name?",
|
||||||
|
["fact2"] = "What do I do for a living?",
|
||||||
|
[TextMemorySkill.RelevanceParam] = ".75"
|
||||||
|
});
|
||||||
|
|
||||||
|
Console.WriteLine("Tell me a bit about myself\n");
|
||||||
|
Console.WriteLine(result);
|
||||||
|
|
||||||
|
/*
|
||||||
|
Approximate Output:
|
||||||
|
Tell me a bit about myself
|
||||||
|
|
||||||
|
My name is Andrea and my family is from New York. I work as a tourist operator.
|
||||||
|
*/
|
||||||
|
|
||||||
|
await memorySkill.RemoveAsync(MemoryCollectionName, "info1", null);
|
||||||
|
|
||||||
|
result = await kernel.RunAsync(aboutMeOracle, new("Tell me a bit about myself"));
|
||||||
|
|
||||||
|
Console.WriteLine("Tell me a bit about myself\n");
|
||||||
|
Console.WriteLine(result);
|
||||||
|
|
||||||
|
/*
|
||||||
|
Approximate Output:
|
||||||
|
Tell me a bit about myself
|
||||||
|
|
||||||
|
I'm from a family originally from New York and I work as a tourist operator. I've been living in Seattle since 2005.
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,55 @@
|
||||||
|
using System.Reflection.Metadata;
|
||||||
|
using System.Security.Cryptography;
|
||||||
|
using System.Text;
|
||||||
|
using LLama.Abstractions;
|
||||||
|
using LLama.Common;
|
||||||
|
using Microsoft.SemanticKernel;
|
||||||
|
using Microsoft.SemanticKernel.AI.ChatCompletion;
|
||||||
|
using Microsoft.SemanticKernel.AI.TextCompletion;
|
||||||
|
using Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion;
|
||||||
|
|
||||||
|
namespace LLama.Examples.NewVersion
|
||||||
|
{
|
||||||
|
public class SemanticKernelPrompt
|
||||||
|
{
|
||||||
|
public static async Task Run()
|
||||||
|
{
|
||||||
|
Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example17_ChatGPT.cs");
|
||||||
|
Console.Write("Please input your model path: ");
|
||||||
|
var modelPath = Console.ReadLine();
|
||||||
|
|
||||||
|
// Load weights into memory
|
||||||
|
var parameters = new ModelParams(modelPath)
|
||||||
|
{
|
||||||
|
Seed = RandomNumberGenerator.GetInt32(int.MaxValue),
|
||||||
|
};
|
||||||
|
using var model = LLamaWeights.LoadFromFile(parameters);
|
||||||
|
var ex = new StatelessExecutor(model, parameters);
|
||||||
|
|
||||||
|
var builder = new KernelBuilder();
|
||||||
|
builder.WithAIService<ITextCompletion>("local-llama", new LLamaSharpTextCompletion(ex), true);
|
||||||
|
|
||||||
|
var kernel = builder.Build();
|
||||||
|
|
||||||
|
var prompt = @"{{$input}}
|
||||||
|
|
||||||
|
One line TLDR with the fewest words.";
|
||||||
|
|
||||||
|
var summarize = kernel.CreateSemanticFunction(prompt, maxTokens: 100);
|
||||||
|
|
||||||
|
string text1 = @"
|
||||||
|
1st Law of Thermodynamics - Energy cannot be created or destroyed.
|
||||||
|
2nd Law of Thermodynamics - For a spontaneous process, the entropy of the universe increases.
|
||||||
|
3rd Law of Thermodynamics - A perfect crystal at zero Kelvin has zero entropy.";
|
||||||
|
|
||||||
|
string text2 = @"
|
||||||
|
1. An object at rest remains at rest, and an object in motion remains in motion at constant speed and in a straight line unless acted on by an unbalanced force.
|
||||||
|
2. The acceleration of an object depends on the mass of the object and the amount of force applied.
|
||||||
|
3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first.";
|
||||||
|
|
||||||
|
Console.WriteLine(await summarize.InvokeAsync(text1));
|
||||||
|
|
||||||
|
Console.WriteLine(await summarize.InvokeAsync(text2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -8,7 +8,7 @@
|
||||||
|
|
||||||
Console.WriteLine("Please input a number to choose an example to run:");
|
Console.WriteLine("Please input a number to choose an example to run:");
|
||||||
Console.WriteLine("0: Run a chat session without stripping the role names.");
|
Console.WriteLine("0: Run a chat session without stripping the role names.");
|
||||||
Console.WriteLine("1: Run a chat session with the role names strippped.");
|
Console.WriteLine("1: Run a chat session with the role names stripped.");
|
||||||
Console.WriteLine("2: Interactive mode chat by using executor.");
|
Console.WriteLine("2: Interactive mode chat by using executor.");
|
||||||
Console.WriteLine("3: Instruct mode chat by using executor.");
|
Console.WriteLine("3: Instruct mode chat by using executor.");
|
||||||
Console.WriteLine("4: Stateless mode chat by using executor.");
|
Console.WriteLine("4: Stateless mode chat by using executor.");
|
||||||
|
@ -18,6 +18,9 @@
|
||||||
Console.WriteLine("8: Quantize the model.");
|
Console.WriteLine("8: Quantize the model.");
|
||||||
Console.WriteLine("9: Automatic conversation.");
|
Console.WriteLine("9: Automatic conversation.");
|
||||||
Console.WriteLine("10: Constrain response to json format using grammar.");
|
Console.WriteLine("10: Constrain response to json format using grammar.");
|
||||||
|
Console.WriteLine("11: Semantic Kernel Prompt.");
|
||||||
|
Console.WriteLine("12: Semantic Kernel Chat.");
|
||||||
|
Console.WriteLine("13: Semantic Kernel Memory Skill.");
|
||||||
|
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
|
@ -68,6 +71,18 @@
|
||||||
{
|
{
|
||||||
GrammarJsonResponse.Run();
|
GrammarJsonResponse.Run();
|
||||||
}
|
}
|
||||||
|
else if (choice == 11)
|
||||||
|
{
|
||||||
|
await SemanticKernelPrompt.Run();
|
||||||
|
}
|
||||||
|
else if (choice == 12)
|
||||||
|
{
|
||||||
|
await SemanticKernelChat.Run();
|
||||||
|
}
|
||||||
|
else if (choice == 13)
|
||||||
|
{
|
||||||
|
await SemanticKernelMemorySkill.Run();
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
Console.WriteLine("Cannot parse your choice. Please select again.");
|
Console.WriteLine("Cannot parse your choice. Please select again.");
|
||||||
|
|
|
@ -0,0 +1,17 @@
|
||||||
|
using static LLama.LLamaTransforms;
|
||||||
|
|
||||||
|
namespace Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Default HistoryTransform Patch
|
||||||
|
/// </summary>
|
||||||
|
public class HistoryTransform : DefaultHistoryTransform
|
||||||
|
{
|
||||||
|
/// <inheritdoc/>
|
||||||
|
public override string HistoryToText(global::LLama.Common.ChatHistory history)
|
||||||
|
{
|
||||||
|
var prompt = base.HistoryToText(history);
|
||||||
|
return prompt + "\nAssistant:";
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,74 @@
|
||||||
|
using LLama;
|
||||||
|
using Microsoft.SemanticKernel.AI.ChatCompletion;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.IO;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Runtime.CompilerServices;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// LLamaSharp ChatCompletion
|
||||||
|
/// </summary>
|
||||||
|
public sealed class LLamaSharpChatCompletion : IChatCompletion
|
||||||
|
{
|
||||||
|
private const string UserRole = "user:";
|
||||||
|
private const string AssistantRole = "assistant:";
|
||||||
|
private ChatSession session;
|
||||||
|
|
||||||
|
public LLamaSharpChatCompletion(InteractiveExecutor model)
|
||||||
|
{
|
||||||
|
this.session = new ChatSession(model)
|
||||||
|
.WithHistoryTransform(new HistoryTransform())
|
||||||
|
.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { UserRole, AssistantRole }));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc/>
|
||||||
|
public ChatHistory CreateNewChat(string? instructions = "")
|
||||||
|
{
|
||||||
|
var history = new ChatHistory();
|
||||||
|
|
||||||
|
if (instructions != null && !string.IsNullOrEmpty(instructions))
|
||||||
|
{
|
||||||
|
history.AddSystemMessage(instructions);
|
||||||
|
}
|
||||||
|
|
||||||
|
return history;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc/>
|
||||||
|
public async Task<IReadOnlyList<IChatResult>> GetChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, CancellationToken cancellationToken = default)
|
||||||
|
{
|
||||||
|
requestSettings ??= new ChatRequestSettings()
|
||||||
|
{
|
||||||
|
MaxTokens = 256,
|
||||||
|
Temperature = 0,
|
||||||
|
TopP = 0,
|
||||||
|
StopSequences = new List<string> { }
|
||||||
|
};
|
||||||
|
|
||||||
|
var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
|
||||||
|
|
||||||
|
return new List<IChatResult> { new LLamaSharpChatResult(result) }.AsReadOnly();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc/>
|
||||||
|
public async IAsyncEnumerable<IChatStreamingResult> GetStreamingChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
|
||||||
|
{
|
||||||
|
requestSettings ??= new ChatRequestSettings()
|
||||||
|
{
|
||||||
|
MaxTokens = 256,
|
||||||
|
Temperature = 0,
|
||||||
|
TopP = 0,
|
||||||
|
StopSequences = new List<string> { }
|
||||||
|
};
|
||||||
|
|
||||||
|
var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
|
||||||
|
|
||||||
|
yield return new LLamaSharpChatResult(result);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,14 @@
|
||||||
|
using Microsoft.SemanticKernel.AI.ChatCompletion;
|
||||||
|
|
||||||
|
namespace Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// LLamaSharp Chat Message
|
||||||
|
/// </summary>
|
||||||
|
public class LLamaSharpChatMessage : ChatMessageBase
|
||||||
|
{
|
||||||
|
/// <inheritdoc/>
|
||||||
|
public LLamaSharpChatMessage(AuthorRole role, string content) : base(role, content)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,38 @@
|
||||||
|
using Microsoft.SemanticKernel.AI.ChatCompletion;
|
||||||
|
using System.Runtime.CompilerServices;
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
|
namespace Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
|
||||||
|
|
||||||
|
internal sealed class LLamaSharpChatResult : IChatStreamingResult
|
||||||
|
{
|
||||||
|
private readonly IAsyncEnumerable<string> _stream;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
///
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="stream"></param>
|
||||||
|
public LLamaSharpChatResult(IAsyncEnumerable<string> stream)
|
||||||
|
{
|
||||||
|
_stream = stream;
|
||||||
|
}
|
||||||
|
/// <inheritdoc/>
|
||||||
|
public async Task<ChatMessageBase> GetChatMessageAsync(CancellationToken cancellationToken = default)
|
||||||
|
{
|
||||||
|
var sb = new StringBuilder();
|
||||||
|
await foreach (var token in _stream)
|
||||||
|
{
|
||||||
|
sb.Append(token);
|
||||||
|
}
|
||||||
|
return await Task.FromResult(new LLamaSharpChatMessage(AuthorRole.Assistant, sb.ToString())).ConfigureAwait(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc/>
|
||||||
|
public async IAsyncEnumerable<ChatMessageBase> GetStreamingChatMessageAsync([EnumeratorCancellation] CancellationToken cancellationToken = default)
|
||||||
|
{
|
||||||
|
await foreach (var token in _stream)
|
||||||
|
{
|
||||||
|
yield return new LLamaSharpChatMessage(AuthorRole.Assistant, token);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,72 @@
|
||||||
|
using Microsoft.SemanticKernel.AI.ChatCompletion;
|
||||||
|
using Microsoft.SemanticKernel.AI.TextCompletion;
|
||||||
|
|
||||||
|
namespace Microsoft.SemanticKernel.Connectors.AI.LLama;
|
||||||
|
|
||||||
|
internal static class ExtensionMethods
|
||||||
|
{
|
||||||
|
internal static global::LLama.Common.ChatHistory ToLLamaSharpChatHistory(this ChatHistory chatHistory)
|
||||||
|
{
|
||||||
|
if (chatHistory is null)
|
||||||
|
{
|
||||||
|
throw new ArgumentNullException(nameof(chatHistory));
|
||||||
|
}
|
||||||
|
|
||||||
|
var history = new global::LLama.Common.ChatHistory();
|
||||||
|
|
||||||
|
foreach (var chat in chatHistory)
|
||||||
|
{
|
||||||
|
var role = Enum.TryParse<global::LLama.Common.AuthorRole>(chat.Role.Label, out var _role) ? _role : global::LLama.Common.AuthorRole.Unknown;
|
||||||
|
history.AddMessage(role, chat.Content);
|
||||||
|
}
|
||||||
|
|
||||||
|
return history;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Convert ChatRequestSettings to LLamaSharp InferenceParams
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="requestSettings"></param>
|
||||||
|
/// <returns></returns>
|
||||||
|
internal static global::LLama.Common.InferenceParams ToLLamaSharpInferenceParams(this ChatRequestSettings requestSettings)
|
||||||
|
{
|
||||||
|
if (requestSettings is null)
|
||||||
|
{
|
||||||
|
throw new ArgumentNullException(nameof(requestSettings));
|
||||||
|
}
|
||||||
|
|
||||||
|
var antiPrompts = new List<string>(requestSettings.StopSequences) { AuthorRole.User.ToString() + ":" };
|
||||||
|
return new global::LLama.Common.InferenceParams
|
||||||
|
{
|
||||||
|
Temperature = (float)requestSettings.Temperature,
|
||||||
|
TopP = (float)requestSettings.TopP,
|
||||||
|
PresencePenalty = (float)requestSettings.PresencePenalty,
|
||||||
|
FrequencyPenalty = (float)requestSettings.FrequencyPenalty,
|
||||||
|
AntiPrompts = antiPrompts,
|
||||||
|
MaxTokens = requestSettings.MaxTokens ?? -1
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Convert CompleteRequestSettings to LLamaSharp InferenceParams
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="requestSettings"></param>
|
||||||
|
/// <returns></returns>
|
||||||
|
internal static global::LLama.Common.InferenceParams ToLLamaSharpInferenceParams(this CompleteRequestSettings requestSettings)
|
||||||
|
{
|
||||||
|
if (requestSettings is null)
|
||||||
|
{
|
||||||
|
throw new ArgumentNullException(nameof(requestSettings));
|
||||||
|
}
|
||||||
|
|
||||||
|
return new global::LLama.Common.InferenceParams
|
||||||
|
{
|
||||||
|
Temperature = (float)requestSettings.Temperature,
|
||||||
|
TopP = (float)requestSettings.TopP,
|
||||||
|
PresencePenalty = (float)requestSettings.PresencePenalty,
|
||||||
|
FrequencyPenalty = (float)requestSettings.FrequencyPenalty,
|
||||||
|
AntiPrompts = requestSettings.StopSequences,
|
||||||
|
MaxTokens = requestSettings.MaxTokens ?? -1
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,22 @@
|
||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<TargetFrameworks>netstandard2.0;net6.0;net7.0</TargetFrameworks>
|
||||||
|
<RootNamespace>Microsoft.SemanticKernel.Connectors.AI.LLama</RootNamespace>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
<LangVersion>10</LangVersion>
|
||||||
|
<Platforms>AnyCPU;x64;Arm64</Platforms>
|
||||||
|
<AllowUnsafeBlocks>True</AllowUnsafeBlocks>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="0.21.230828.2-preview" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="..\LLama\LLamaSharp.csproj" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
|
@ -0,0 +1,27 @@
|
||||||
|
using LLama;
|
||||||
|
using LLama.Abstractions;
|
||||||
|
using Microsoft.SemanticKernel.AI.TextCompletion;
|
||||||
|
|
||||||
|
namespace Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion;
|
||||||
|
|
||||||
|
public sealed class LLamaSharpTextCompletion : ITextCompletion
|
||||||
|
{
|
||||||
|
public ILLamaExecutor executor;
|
||||||
|
|
||||||
|
public LLamaSharpTextCompletion(ILLamaExecutor executor)
|
||||||
|
{
|
||||||
|
this.executor = executor;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<IReadOnlyList<ITextResult>> GetCompletionsAsync(string text, CompleteRequestSettings requestSettings, CancellationToken cancellationToken = default)
|
||||||
|
{
|
||||||
|
var result = executor.InferAsync(text, requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
|
||||||
|
return await Task.FromResult(new List<ITextResult> { new LLamaTextResult(result) }.AsReadOnly()).ConfigureAwait(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async IAsyncEnumerable<ITextStreamingResult> GetStreamingCompletionsAsync(string text, CompleteRequestSettings requestSettings, CancellationToken cancellationToken = default)
|
||||||
|
{
|
||||||
|
var result = executor.InferAsync(text, requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
|
||||||
|
yield return new LLamaTextResult(result);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,37 @@
|
||||||
|
using Microsoft.SemanticKernel.AI.TextCompletion;
|
||||||
|
using Microsoft.SemanticKernel.Orchestration;
|
||||||
|
using System.Runtime.CompilerServices;
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
|
namespace Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion;
|
||||||
|
|
||||||
|
internal sealed class LLamaTextResult : ITextStreamingResult
|
||||||
|
{
|
||||||
|
private readonly IAsyncEnumerable<string> _text;
|
||||||
|
|
||||||
|
public LLamaTextResult(IAsyncEnumerable<string> text)
|
||||||
|
{
|
||||||
|
_text = text;
|
||||||
|
ModelResult = new(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ModelResult ModelResult { get; }
|
||||||
|
|
||||||
|
public async Task<string> GetCompletionAsync(CancellationToken cancellationToken = default)
|
||||||
|
{
|
||||||
|
var sb = new StringBuilder();
|
||||||
|
await foreach (var token in _text)
|
||||||
|
{
|
||||||
|
sb.Append(token);
|
||||||
|
}
|
||||||
|
return await Task.FromResult(sb.ToString()).ConfigureAwait(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async IAsyncEnumerable<string> GetCompletionStreamingAsync([EnumeratorCancellation] CancellationToken cancellationToken = default)
|
||||||
|
{
|
||||||
|
await foreach (string word in _text)
|
||||||
|
{
|
||||||
|
yield return word;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
using LLama;
|
||||||
|
using Microsoft.SemanticKernel.AI.Embeddings;
|
||||||
|
|
||||||
|
namespace Microsoft.SemanticKernel.Connectors.AI.LLama.TextEmbedding;
|
||||||
|
|
||||||
|
public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGeneration
|
||||||
|
{
|
||||||
|
private LLamaEmbedder _embedder;
|
||||||
|
|
||||||
|
public LLamaSharpEmbeddingGeneration(LLamaEmbedder embedder)
|
||||||
|
{
|
||||||
|
_embedder = embedder;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc/>
|
||||||
|
public async Task<IList<ReadOnlyMemory<float>>> GenerateEmbeddingsAsync(IList<string> data, CancellationToken cancellationToken = default)
|
||||||
|
{
|
||||||
|
var result = data.Select(text => new ReadOnlyMemory<float>(_embedder.GetEmbeddings(text))).ToList();
|
||||||
|
return await Task.FromResult(result).ConfigureAwait(false);
|
||||||
|
}
|
||||||
|
}
|
|
@ -29,6 +29,11 @@ namespace LLama
|
||||||
_ctx = weights.CreateContext(@params);
|
_ctx = weights.CreateContext(@params);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public LLamaEmbedder(LLamaContext ctx)
|
||||||
|
{
|
||||||
|
_ctx = ctx;
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Get the embeddings of the text.
|
/// Get the embeddings of the text.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
|
|
@ -54,6 +54,7 @@ namespace LLama.OldVersion
|
||||||
|
|
||||||
int n_embed = NativeApi.llama_n_embd(_ctx);
|
int n_embed = NativeApi.llama_n_embd(_ctx);
|
||||||
var embeddings = NativeApi.llama_get_embeddings(_ctx);
|
var embeddings = NativeApi.llama_get_embeddings(_ctx);
|
||||||
|
|
||||||
if (embeddings == null)
|
if (embeddings == null)
|
||||||
{
|
{
|
||||||
return new float[0];
|
return new float[0];
|
||||||
|
|
|
@ -11,7 +11,9 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLamaSharp", "LLama\LLamaSh
|
||||||
EndProject
|
EndProject
|
||||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLama.WebAPI", "LLama.WebAPI\LLama.WebAPI.csproj", "{D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}"
|
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLama.WebAPI", "LLama.WebAPI\LLama.WebAPI.csproj", "{D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}"
|
||||||
EndProject
|
EndProject
|
||||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LLama.Web", "LLama.Web\LLama.Web.csproj", "{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}"
|
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLama.Web", "LLama.Web\LLama.Web.csproj", "{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}"
|
||||||
|
EndProject
|
||||||
|
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLamaSharp.SemanticKernel", "LLama.SemanticKernel\LLamaSharp.SemanticKernel.csproj", "{D98F93E3-B344-4F9D-86BB-FDBF6768B587}"
|
||||||
EndProject
|
EndProject
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
|
@ -83,6 +85,18 @@ Global
|
||||||
{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|Any CPU.Build.0 = Release|Any CPU
|
{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|x64.ActiveCfg = Release|Any CPU
|
{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|x64.ActiveCfg = Release|Any CPU
|
||||||
{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|x64.Build.0 = Release|Any CPU
|
{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|x64.Build.0 = Release|Any CPU
|
||||||
|
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|x64.ActiveCfg = Debug|Any CPU
|
||||||
|
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|x64.Build.0 = Debug|Any CPU
|
||||||
|
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|x64.ActiveCfg = Debug|Any CPU
|
||||||
|
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|x64.Build.0 = Debug|Any CPU
|
||||||
|
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|x64.ActiveCfg = Release|Any CPU
|
||||||
|
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|x64.Build.0 = Release|Any CPU
|
||||||
EndGlobalSection
|
EndGlobalSection
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
HideSolutionNode = FALSE
|
HideSolutionNode = FALSE
|
||||||
|
|
Loading…
Reference in New Issue