Add Semantic Kernel support
This commit is contained in:
parent
40e76a70c5
commit
9a1d6f99f2
|
@ -27,6 +27,11 @@
|
|||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.SemanticKernel" Version="0.21.230828.2-preview" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\LLama.SemanticKernel\LLamaSharp.SemanticKernel.csproj" />
|
||||
<ProjectReference Include="..\LLama\LLamaSharp.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
using System.Reflection.Metadata;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using LLama.Abstractions;
|
||||
using LLama.Common;
|
||||
using Microsoft.SemanticKernel;
|
||||
using Microsoft.SemanticKernel.AI.ChatCompletion;
|
||||
using Microsoft.SemanticKernel.AI.TextCompletion;
|
||||
using Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
|
||||
using Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion;
|
||||
|
||||
namespace LLama.Examples.NewVersion
|
||||
{
|
||||
public class SemanticKernelChat
|
||||
{
|
||||
public static async Task Run()
|
||||
{
|
||||
Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md");
|
||||
Console.Write("Please input your model path: ");
|
||||
var modelPath = Console.ReadLine();
|
||||
|
||||
// Load weights into memory
|
||||
var parameters = new ModelParams(modelPath)
|
||||
{
|
||||
Seed = RandomNumberGenerator.GetInt32(int.MaxValue),
|
||||
};
|
||||
using var model = LLamaWeights.LoadFromFile(parameters);
|
||||
using var context = model.CreateContext(parameters);
|
||||
var ex = new InteractiveExecutor(context);
|
||||
//var builder = new KernelBuilder();
|
||||
//builder.WithAIService<IChatCompletion>("local-llama", new LLamaSharpChatCompletion(ex), true);
|
||||
//var kernel = builder.Build();
|
||||
|
||||
var chatGPT = new LLamaSharpChatCompletion(ex);
|
||||
|
||||
var chatHistory = chatGPT.CreateNewChat("You are a librarian, expert about books");
|
||||
|
||||
Console.WriteLine("Chat content:");
|
||||
Console.WriteLine("------------------------");
|
||||
|
||||
chatHistory.AddUserMessage("Hi, I'm looking for book suggestions");
|
||||
await MessageOutputAsync(chatHistory);
|
||||
|
||||
// First bot assistant message
|
||||
string reply = await chatGPT.GenerateMessageAsync(chatHistory);
|
||||
chatHistory.AddAssistantMessage(reply);
|
||||
await MessageOutputAsync(chatHistory);
|
||||
|
||||
// Second user message
|
||||
chatHistory.AddUserMessage("I love history and philosophy, I'd like to learn something new about Greece, any suggestion");
|
||||
await MessageOutputAsync(chatHistory);
|
||||
|
||||
// Second bot assistant message
|
||||
reply = await chatGPT.GenerateMessageAsync(chatHistory);
|
||||
chatHistory.AddAssistantMessage(reply);
|
||||
await MessageOutputAsync(chatHistory);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Outputs the last message of the chat history
|
||||
/// </summary>
|
||||
private static Task MessageOutputAsync(Microsoft.SemanticKernel.AI.ChatCompletion.ChatHistory chatHistory)
|
||||
{
|
||||
var message = chatHistory.Messages.Last();
|
||||
|
||||
Console.WriteLine($"{message.Role}: {message.Content}");
|
||||
Console.WriteLine("------------------------");
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,173 @@
|
|||
using System.Reflection.Metadata;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using LLama.Abstractions;
|
||||
using LLama.Common;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.SemanticKernel;
|
||||
using Microsoft.SemanticKernel.AI.ChatCompletion;
|
||||
using Microsoft.SemanticKernel.AI.Embeddings;
|
||||
using Microsoft.SemanticKernel.AI.TextCompletion;
|
||||
using Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
|
||||
using Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion;
|
||||
using Microsoft.SemanticKernel.Connectors.AI.LLama.TextEmbedding;
|
||||
using Microsoft.SemanticKernel.Memory;
|
||||
using Microsoft.SemanticKernel.Skills.Core;
|
||||
|
||||
namespace LLama.Examples.NewVersion
|
||||
{
|
||||
public class SemanticKernelMemorySkill
|
||||
{
|
||||
private const string MemoryCollectionName = "aboutMe";
|
||||
|
||||
public static async Task Run()
|
||||
{
|
||||
Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example15_MemorySkill.cs");
|
||||
Console.Write("Please input your model path: ");
|
||||
var modelPath = Console.ReadLine();
|
||||
|
||||
// Load weights into memory
|
||||
var parameters = new ModelParams(modelPath)
|
||||
{
|
||||
Seed = RandomNumberGenerator.GetInt32(int.MaxValue),
|
||||
};
|
||||
using var model = LLamaWeights.LoadFromFile(parameters);
|
||||
using var context = model.CreateContext(parameters);
|
||||
var ex = new InteractiveExecutor(context);
|
||||
var ex2 = new StatelessExecutor(model, parameters);
|
||||
var builder = new KernelBuilder();
|
||||
|
||||
var embedding = new LLamaEmbedder(context);
|
||||
|
||||
builder.WithAIService<IChatCompletion>("local-llama", new LLamaSharpChatCompletion(ex), true);
|
||||
builder.WithAIService<ITextCompletion>("local-llama-text", new LLamaSharpTextCompletion(ex), true);
|
||||
builder.WithAIService<ITextEmbeddingGeneration>("local-llama-embed", new LLamaSharpEmbeddingGeneration(embedding), true);
|
||||
builder.WithMemoryStorage(new VolatileMemoryStore());
|
||||
var kernel = builder.Build();
|
||||
// ========= Store memories using the kernel =========
|
||||
|
||||
await kernel.Memory.SaveInformationAsync(MemoryCollectionName, id: "info1", text: "My name is Andrea");
|
||||
await kernel.Memory.SaveInformationAsync(MemoryCollectionName, id: "info2", text: "I work as a tourist operator");
|
||||
await kernel.Memory.SaveInformationAsync(MemoryCollectionName, id: "info3", text: "I've been living in Seattle since 2005");
|
||||
await kernel.Memory.SaveInformationAsync(MemoryCollectionName, id: "info4", text: "I visited France and Italy five times since 2015");
|
||||
|
||||
// ========= Store memories using semantic function =========
|
||||
|
||||
// Add Memory as a skill for other functions
|
||||
var memorySkill = new TextMemorySkill(kernel.Memory);
|
||||
kernel.ImportSkill(memorySkill);
|
||||
|
||||
// Build a semantic function that saves info to memory
|
||||
const string SaveFunctionDefinition = "{{save $info}}";
|
||||
var memorySaver = kernel.CreateSemanticFunction(SaveFunctionDefinition);
|
||||
|
||||
await kernel.RunAsync(memorySaver, new()
|
||||
{
|
||||
[TextMemorySkill.CollectionParam] = MemoryCollectionName,
|
||||
[TextMemorySkill.KeyParam] = "info5",
|
||||
["info"] = "My family is from New York"
|
||||
});
|
||||
|
||||
// ========= Test memory remember =========
|
||||
Console.WriteLine("========= Example: Recalling a Memory =========");
|
||||
|
||||
var answer = await memorySkill.RetrieveAsync(MemoryCollectionName, "info1", null);
|
||||
Console.WriteLine("Memory associated with 'info1': {0}", answer);
|
||||
/*
|
||||
Output:
|
||||
"Memory associated with 'info1': My name is Andrea
|
||||
*/
|
||||
|
||||
// ========= Test memory recall =========
|
||||
Console.WriteLine("========= Example: Recalling an Idea =========");
|
||||
|
||||
answer = await memorySkill.RecallAsync("where did I grow up?", MemoryCollectionName, relevance: null, limit: 2, null);
|
||||
Console.WriteLine("Ask: where did I grow up?");
|
||||
Console.WriteLine("Answer:\n{0}", answer);
|
||||
|
||||
answer = await memorySkill.RecallAsync("where do I live?", MemoryCollectionName, relevance: null, limit: 2, null);
|
||||
Console.WriteLine("Ask: where do I live?");
|
||||
Console.WriteLine("Answer:\n{0}", answer);
|
||||
|
||||
/*
|
||||
Output:
|
||||
|
||||
Ask: where did I grow up?
|
||||
Answer:
|
||||
["My family is from New York","I\u0027ve been living in Seattle since 2005"]
|
||||
|
||||
Ask: where do I live?
|
||||
Answer:
|
||||
["I\u0027ve been living in Seattle since 2005","My family is from New York"]
|
||||
*/
|
||||
|
||||
// ========= Use memory in a semantic function =========
|
||||
Console.WriteLine("========= Example: Using Recall in a Semantic Function =========");
|
||||
|
||||
// Build a semantic function that uses memory to find facts
|
||||
const string RecallFunctionDefinition = @"
|
||||
Consider only the facts below when answering questions.
|
||||
|
||||
About me: {{recall 'where did I grow up?'}}
|
||||
About me: {{recall 'where do I live?'}}
|
||||
|
||||
Question: {{$input}}
|
||||
|
||||
Answer:
|
||||
";
|
||||
|
||||
var aboutMeOracle = kernel.CreateSemanticFunction(RecallFunctionDefinition, maxTokens: 100);
|
||||
|
||||
var result = await kernel.RunAsync(aboutMeOracle, new("Do I live in the same town where I grew up?")
|
||||
{
|
||||
[TextMemorySkill.CollectionParam] = MemoryCollectionName,
|
||||
[TextMemorySkill.RelevanceParam] = "0.8"
|
||||
});
|
||||
|
||||
Console.WriteLine("Do I live in the same town where I grew up?\n");
|
||||
Console.WriteLine(result);
|
||||
|
||||
/*
|
||||
Output:
|
||||
|
||||
Do I live in the same town where I grew up?
|
||||
|
||||
No, I do not live in the same town where I grew up since my family is from New York and I have been living in Seattle since 2005.
|
||||
*/
|
||||
|
||||
// ========= Remove a memory =========
|
||||
Console.WriteLine("========= Example: Forgetting a Memory =========");
|
||||
|
||||
result = await kernel.RunAsync(aboutMeOracle, new("Tell me a bit about myself")
|
||||
{
|
||||
["fact1"] = "What is my name?",
|
||||
["fact2"] = "What do I do for a living?",
|
||||
[TextMemorySkill.RelevanceParam] = ".75"
|
||||
});
|
||||
|
||||
Console.WriteLine("Tell me a bit about myself\n");
|
||||
Console.WriteLine(result);
|
||||
|
||||
/*
|
||||
Approximate Output:
|
||||
Tell me a bit about myself
|
||||
|
||||
My name is Andrea and my family is from New York. I work as a tourist operator.
|
||||
*/
|
||||
|
||||
await memorySkill.RemoveAsync(MemoryCollectionName, "info1", null);
|
||||
|
||||
result = await kernel.RunAsync(aboutMeOracle, new("Tell me a bit about myself"));
|
||||
|
||||
Console.WriteLine("Tell me a bit about myself\n");
|
||||
Console.WriteLine(result);
|
||||
|
||||
/*
|
||||
Approximate Output:
|
||||
Tell me a bit about myself
|
||||
|
||||
I'm from a family originally from New York and I work as a tourist operator. I've been living in Seattle since 2005.
|
||||
*/
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
using System.Reflection.Metadata;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using LLama.Abstractions;
|
||||
using LLama.Common;
|
||||
using Microsoft.SemanticKernel;
|
||||
using Microsoft.SemanticKernel.AI.ChatCompletion;
|
||||
using Microsoft.SemanticKernel.AI.TextCompletion;
|
||||
using Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion;
|
||||
|
||||
namespace LLama.Examples.NewVersion
|
||||
{
|
||||
public class SemanticKernelPrompt
|
||||
{
|
||||
public static async Task Run()
|
||||
{
|
||||
Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example17_ChatGPT.cs");
|
||||
Console.Write("Please input your model path: ");
|
||||
var modelPath = Console.ReadLine();
|
||||
|
||||
// Load weights into memory
|
||||
var parameters = new ModelParams(modelPath)
|
||||
{
|
||||
Seed = RandomNumberGenerator.GetInt32(int.MaxValue),
|
||||
};
|
||||
using var model = LLamaWeights.LoadFromFile(parameters);
|
||||
var ex = new StatelessExecutor(model, parameters);
|
||||
|
||||
var builder = new KernelBuilder();
|
||||
builder.WithAIService<ITextCompletion>("local-llama", new LLamaSharpTextCompletion(ex), true);
|
||||
|
||||
var kernel = builder.Build();
|
||||
|
||||
var prompt = @"{{$input}}
|
||||
|
||||
One line TLDR with the fewest words.";
|
||||
|
||||
var summarize = kernel.CreateSemanticFunction(prompt, maxTokens: 100);
|
||||
|
||||
string text1 = @"
|
||||
1st Law of Thermodynamics - Energy cannot be created or destroyed.
|
||||
2nd Law of Thermodynamics - For a spontaneous process, the entropy of the universe increases.
|
||||
3rd Law of Thermodynamics - A perfect crystal at zero Kelvin has zero entropy.";
|
||||
|
||||
string text2 = @"
|
||||
1. An object at rest remains at rest, and an object in motion remains in motion at constant speed and in a straight line unless acted on by an unbalanced force.
|
||||
2. The acceleration of an object depends on the mass of the object and the amount of force applied.
|
||||
3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first.";
|
||||
|
||||
Console.WriteLine(await summarize.InvokeAsync(text1));
|
||||
|
||||
Console.WriteLine(await summarize.InvokeAsync(text2));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -8,7 +8,7 @@
|
|||
|
||||
Console.WriteLine("Please input a number to choose an example to run:");
|
||||
Console.WriteLine("0: Run a chat session without stripping the role names.");
|
||||
Console.WriteLine("1: Run a chat session with the role names strippped.");
|
||||
Console.WriteLine("1: Run a chat session with the role names stripped.");
|
||||
Console.WriteLine("2: Interactive mode chat by using executor.");
|
||||
Console.WriteLine("3: Instruct mode chat by using executor.");
|
||||
Console.WriteLine("4: Stateless mode chat by using executor.");
|
||||
|
@ -18,6 +18,9 @@
|
|||
Console.WriteLine("8: Quantize the model.");
|
||||
Console.WriteLine("9: Automatic conversation.");
|
||||
Console.WriteLine("10: Constrain response to json format using grammar.");
|
||||
Console.WriteLine("11: Semantic Kernel Prompt.");
|
||||
Console.WriteLine("12: Semantic Kernel Chat.");
|
||||
Console.WriteLine("13: Semantic Kernel Memory Skill.");
|
||||
|
||||
while (true)
|
||||
{
|
||||
|
@ -68,6 +71,18 @@
|
|||
{
|
||||
GrammarJsonResponse.Run();
|
||||
}
|
||||
else if (choice == 11)
|
||||
{
|
||||
await SemanticKernelPrompt.Run();
|
||||
}
|
||||
else if (choice == 12)
|
||||
{
|
||||
await SemanticKernelChat.Run();
|
||||
}
|
||||
else if (choice == 13)
|
||||
{
|
||||
await SemanticKernelMemorySkill.Run();
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine("Cannot parse your choice. Please select again.");
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
using static LLama.LLamaTransforms;
|
||||
|
||||
namespace Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
|
||||
|
||||
/// <summary>
|
||||
/// Default HistoryTransform Patch
|
||||
/// </summary>
|
||||
public class HistoryTransform : DefaultHistoryTransform
|
||||
{
|
||||
/// <inheritdoc/>
|
||||
public override string HistoryToText(global::LLama.Common.ChatHistory history)
|
||||
{
|
||||
var prompt = base.HistoryToText(history);
|
||||
return prompt + "\nAssistant:";
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
using LLama;
|
||||
using Microsoft.SemanticKernel.AI.ChatCompletion;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
|
||||
|
||||
/// <summary>
|
||||
/// LLamaSharp ChatCompletion
|
||||
/// </summary>
|
||||
public sealed class LLamaSharpChatCompletion : IChatCompletion
|
||||
{
|
||||
private const string UserRole = "user:";
|
||||
private const string AssistantRole = "assistant:";
|
||||
private ChatSession session;
|
||||
|
||||
public LLamaSharpChatCompletion(InteractiveExecutor model)
|
||||
{
|
||||
this.session = new ChatSession(model)
|
||||
.WithHistoryTransform(new HistoryTransform())
|
||||
.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { UserRole, AssistantRole }));
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public ChatHistory CreateNewChat(string? instructions = "")
|
||||
{
|
||||
var history = new ChatHistory();
|
||||
|
||||
if (instructions != null && !string.IsNullOrEmpty(instructions))
|
||||
{
|
||||
history.AddSystemMessage(instructions);
|
||||
}
|
||||
|
||||
return history;
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public async Task<IReadOnlyList<IChatResult>> GetChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, CancellationToken cancellationToken = default)
|
||||
{
|
||||
requestSettings ??= new ChatRequestSettings()
|
||||
{
|
||||
MaxTokens = 256,
|
||||
Temperature = 0,
|
||||
TopP = 0,
|
||||
StopSequences = new List<string> { }
|
||||
};
|
||||
|
||||
var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
|
||||
|
||||
return new List<IChatResult> { new LLamaSharpChatResult(result) }.AsReadOnly();
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public async IAsyncEnumerable<IChatStreamingResult> GetStreamingChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
|
||||
{
|
||||
requestSettings ??= new ChatRequestSettings()
|
||||
{
|
||||
MaxTokens = 256,
|
||||
Temperature = 0,
|
||||
TopP = 0,
|
||||
StopSequences = new List<string> { }
|
||||
};
|
||||
|
||||
var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
|
||||
|
||||
yield return new LLamaSharpChatResult(result);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
using Microsoft.SemanticKernel.AI.ChatCompletion;
|
||||
|
||||
namespace Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
|
||||
|
||||
/// <summary>
|
||||
/// LLamaSharp Chat Message
|
||||
/// </summary>
|
||||
public class LLamaSharpChatMessage : ChatMessageBase
|
||||
{
|
||||
/// <inheritdoc/>
|
||||
public LLamaSharpChatMessage(AuthorRole role, string content) : base(role, content)
|
||||
{
|
||||
}
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
using Microsoft.SemanticKernel.AI.ChatCompletion;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Text;
|
||||
|
||||
namespace Microsoft.SemanticKernel.Connectors.AI.LLama.ChatCompletion;
|
||||
|
||||
internal sealed class LLamaSharpChatResult : IChatStreamingResult
|
||||
{
|
||||
private readonly IAsyncEnumerable<string> _stream;
|
||||
|
||||
/// <summary>
|
||||
///
|
||||
/// </summary>
|
||||
/// <param name="stream"></param>
|
||||
public LLamaSharpChatResult(IAsyncEnumerable<string> stream)
|
||||
{
|
||||
_stream = stream;
|
||||
}
|
||||
/// <inheritdoc/>
|
||||
public async Task<ChatMessageBase> GetChatMessageAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
await foreach (var token in _stream)
|
||||
{
|
||||
sb.Append(token);
|
||||
}
|
||||
return await Task.FromResult(new LLamaSharpChatMessage(AuthorRole.Assistant, sb.ToString())).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public async IAsyncEnumerable<ChatMessageBase> GetStreamingChatMessageAsync([EnumeratorCancellation] CancellationToken cancellationToken = default)
|
||||
{
|
||||
await foreach (var token in _stream)
|
||||
{
|
||||
yield return new LLamaSharpChatMessage(AuthorRole.Assistant, token);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
using Microsoft.SemanticKernel.AI.ChatCompletion;
|
||||
using Microsoft.SemanticKernel.AI.TextCompletion;
|
||||
|
||||
namespace Microsoft.SemanticKernel.Connectors.AI.LLama;
|
||||
|
||||
internal static class ExtensionMethods
|
||||
{
|
||||
internal static global::LLama.Common.ChatHistory ToLLamaSharpChatHistory(this ChatHistory chatHistory)
|
||||
{
|
||||
if (chatHistory is null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(chatHistory));
|
||||
}
|
||||
|
||||
var history = new global::LLama.Common.ChatHistory();
|
||||
|
||||
foreach (var chat in chatHistory)
|
||||
{
|
||||
var role = Enum.TryParse<global::LLama.Common.AuthorRole>(chat.Role.Label, out var _role) ? _role : global::LLama.Common.AuthorRole.Unknown;
|
||||
history.AddMessage(role, chat.Content);
|
||||
}
|
||||
|
||||
return history;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Convert ChatRequestSettings to LLamaSharp InferenceParams
|
||||
/// </summary>
|
||||
/// <param name="requestSettings"></param>
|
||||
/// <returns></returns>
|
||||
internal static global::LLama.Common.InferenceParams ToLLamaSharpInferenceParams(this ChatRequestSettings requestSettings)
|
||||
{
|
||||
if (requestSettings is null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(requestSettings));
|
||||
}
|
||||
|
||||
var antiPrompts = new List<string>(requestSettings.StopSequences) { AuthorRole.User.ToString() + ":" };
|
||||
return new global::LLama.Common.InferenceParams
|
||||
{
|
||||
Temperature = (float)requestSettings.Temperature,
|
||||
TopP = (float)requestSettings.TopP,
|
||||
PresencePenalty = (float)requestSettings.PresencePenalty,
|
||||
FrequencyPenalty = (float)requestSettings.FrequencyPenalty,
|
||||
AntiPrompts = antiPrompts,
|
||||
MaxTokens = requestSettings.MaxTokens ?? -1
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Convert CompleteRequestSettings to LLamaSharp InferenceParams
|
||||
/// </summary>
|
||||
/// <param name="requestSettings"></param>
|
||||
/// <returns></returns>
|
||||
internal static global::LLama.Common.InferenceParams ToLLamaSharpInferenceParams(this CompleteRequestSettings requestSettings)
|
||||
{
|
||||
if (requestSettings is null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(requestSettings));
|
||||
}
|
||||
|
||||
return new global::LLama.Common.InferenceParams
|
||||
{
|
||||
Temperature = (float)requestSettings.Temperature,
|
||||
TopP = (float)requestSettings.TopP,
|
||||
PresencePenalty = (float)requestSettings.PresencePenalty,
|
||||
FrequencyPenalty = (float)requestSettings.FrequencyPenalty,
|
||||
AntiPrompts = requestSettings.StopSequences,
|
||||
MaxTokens = requestSettings.MaxTokens ?? -1
|
||||
};
|
||||
}
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFrameworks>netstandard2.0;net6.0;net7.0</TargetFrameworks>
|
||||
<RootNamespace>Microsoft.SemanticKernel.Connectors.AI.LLama</RootNamespace>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>10</LangVersion>
|
||||
<Platforms>AnyCPU;x64;Arm64</Platforms>
|
||||
<AllowUnsafeBlocks>True</AllowUnsafeBlocks>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="0.21.230828.2-preview" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\LLama\LLamaSharp.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
|
@ -0,0 +1,27 @@
|
|||
using LLama;
|
||||
using LLama.Abstractions;
|
||||
using Microsoft.SemanticKernel.AI.TextCompletion;
|
||||
|
||||
namespace Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion;
|
||||
|
||||
public sealed class LLamaSharpTextCompletion : ITextCompletion
|
||||
{
|
||||
public ILLamaExecutor executor;
|
||||
|
||||
public LLamaSharpTextCompletion(ILLamaExecutor executor)
|
||||
{
|
||||
this.executor = executor;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<ITextResult>> GetCompletionsAsync(string text, CompleteRequestSettings requestSettings, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var result = executor.InferAsync(text, requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
|
||||
return await Task.FromResult(new List<ITextResult> { new LLamaTextResult(result) }.AsReadOnly()).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
public async IAsyncEnumerable<ITextStreamingResult> GetStreamingCompletionsAsync(string text, CompleteRequestSettings requestSettings, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var result = executor.InferAsync(text, requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
|
||||
yield return new LLamaTextResult(result);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
using Microsoft.SemanticKernel.AI.TextCompletion;
|
||||
using Microsoft.SemanticKernel.Orchestration;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Text;
|
||||
|
||||
namespace Microsoft.SemanticKernel.Connectors.AI.LLama.TextCompletion;
|
||||
|
||||
internal sealed class LLamaTextResult : ITextStreamingResult
|
||||
{
|
||||
private readonly IAsyncEnumerable<string> _text;
|
||||
|
||||
public LLamaTextResult(IAsyncEnumerable<string> text)
|
||||
{
|
||||
_text = text;
|
||||
ModelResult = new(text);
|
||||
}
|
||||
|
||||
public ModelResult ModelResult { get; }
|
||||
|
||||
public async Task<string> GetCompletionAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
await foreach (var token in _text)
|
||||
{
|
||||
sb.Append(token);
|
||||
}
|
||||
return await Task.FromResult(sb.ToString()).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
public async IAsyncEnumerable<string> GetCompletionStreamingAsync([EnumeratorCancellation] CancellationToken cancellationToken = default)
|
||||
{
|
||||
await foreach (string word in _text)
|
||||
{
|
||||
yield return word;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
using LLama;
|
||||
using Microsoft.SemanticKernel.AI.Embeddings;
|
||||
|
||||
namespace Microsoft.SemanticKernel.Connectors.AI.LLama.TextEmbedding;
|
||||
|
||||
public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGeneration
|
||||
{
|
||||
private LLamaEmbedder _embedder;
|
||||
|
||||
public LLamaSharpEmbeddingGeneration(LLamaEmbedder embedder)
|
||||
{
|
||||
_embedder = embedder;
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public async Task<IList<ReadOnlyMemory<float>>> GenerateEmbeddingsAsync(IList<string> data, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var result = data.Select(text => new ReadOnlyMemory<float>(_embedder.GetEmbeddings(text))).ToList();
|
||||
return await Task.FromResult(result).ConfigureAwait(false);
|
||||
}
|
||||
}
|
|
@ -29,6 +29,11 @@ namespace LLama
|
|||
_ctx = weights.CreateContext(@params);
|
||||
}
|
||||
|
||||
public LLamaEmbedder(LLamaContext ctx)
|
||||
{
|
||||
_ctx = ctx;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Get the embeddings of the text.
|
||||
/// </summary>
|
||||
|
|
|
@ -54,6 +54,7 @@ namespace LLama.OldVersion
|
|||
|
||||
int n_embed = NativeApi.llama_n_embd(_ctx);
|
||||
var embeddings = NativeApi.llama_get_embeddings(_ctx);
|
||||
|
||||
if (embeddings == null)
|
||||
{
|
||||
return new float[0];
|
||||
|
|
|
@ -11,7 +11,9 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLamaSharp", "LLama\LLamaSh
|
|||
EndProject
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLama.WebAPI", "LLama.WebAPI\LLama.WebAPI.csproj", "{D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LLama.Web", "LLama.Web\LLama.Web.csproj", "{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}"
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLama.Web", "LLama.Web\LLama.Web.csproj", "{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}"
|
||||
EndProject
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLamaSharp.SemanticKernel", "LLama.SemanticKernel\LLamaSharp.SemanticKernel.csproj", "{D98F93E3-B344-4F9D-86BB-FDBF6768B587}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
|
@ -83,6 +85,18 @@ Global
|
|||
{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|x64.ActiveCfg = Release|Any CPU
|
||||
{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|x64.Build.0 = Release|Any CPU
|
||||
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|x64.ActiveCfg = Debug|Any CPU
|
||||
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|x64.Build.0 = Debug|Any CPU
|
||||
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|Any CPU.Build.0 = Debug|Any CPU
|
||||
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|x64.ActiveCfg = Debug|Any CPU
|
||||
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|x64.Build.0 = Debug|Any CPU
|
||||
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|x64.ActiveCfg = Release|Any CPU
|
||||
{D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|x64.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
|
Loading…
Reference in New Issue