Merge branch 'master' of github.com:AsakusaRinne/LLamaSharp into cuda_detection
This commit is contained in:
commit
d7675f7936
|
@ -22,13 +22,19 @@ fi
|
|||
|
||||
mkdir ./temp;
|
||||
mkdir ./temp/runtimes;
|
||||
cp ./LLama/runtimes/*.* ./temp/runtimes/;
|
||||
# For sure it could be done better but cp -R did not work on osx
|
||||
mkdir ./temp/runtimes/osx-arm64
|
||||
mkdir ./temp/runtimes/osx-x64
|
||||
cp ./LLama/runtimes/*.* ./temp/runtimes/;
|
||||
cp ./LLama/runtimes/osx-arm64/*.* ./temp/runtimes/osx-arm64/;
|
||||
cp ./LLama/runtimes/osx-x64/*.* ./temp/runtimes/osx-x64;
|
||||
cp ./LLama/runtimes/build/*.* ./temp/;
|
||||
|
||||
# get the current version
|
||||
cd temp;
|
||||
dotnet add package LLamaSharp;
|
||||
version=$(dotnet list temp.csproj package | grep LLamaSharp);
|
||||
# TODO: This didn´t work on osx...we need a solution
|
||||
read -ra arr <<< "$version"
|
||||
version="${arr[-1]}"
|
||||
echo "The latest version: $version";
|
||||
|
|
|
@ -6,9 +6,9 @@ on:
|
|||
cublas:
|
||||
type: boolean
|
||||
description: Build CUBLAS binaries
|
||||
macos:
|
||||
osx:
|
||||
type: boolean
|
||||
description: Build MacOS binaries
|
||||
description: Build OSX binaries
|
||||
push:
|
||||
branches: [cron_job]
|
||||
#schedule:
|
||||
|
@ -147,7 +147,7 @@ jobs:
|
|||
include:
|
||||
- build: 'arm64'
|
||||
defines: '-DCMAKE_OSX_ARCHITECTURES=arm64'
|
||||
- build: 'x86_64'
|
||||
- build: 'x64'
|
||||
defines: '-DLLAMA_METAL=OFF -DCMAKE_OSX_ARCHITECTURES=x86_64'
|
||||
runs-on: macos-latest
|
||||
steps:
|
||||
|
@ -169,7 +169,7 @@ jobs:
|
|||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
path: ./build/libllama.dylib
|
||||
name: llama-bin-macos-${{ matrix.build }}.dylib
|
||||
name: llama-bin-osx-${{ matrix.build }}.dylib
|
||||
- name: Upload Metal
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
|
@ -212,12 +212,12 @@ jobs:
|
|||
- name: Rearrange MacOS files
|
||||
if: ${{ github.event.inputs.macos }}
|
||||
run: |
|
||||
mkdir deps/macos-arm64
|
||||
mkdir deps/macos-x86_64
|
||||
mkdir deps/osx-arm64
|
||||
mkdir deps/osx-x64
|
||||
|
||||
cp artifacts/llama-bin-macos-arm64.dylib/libllama.dylib deps/macos-arm64/libllama.dylib
|
||||
cp artifacts/ggml-metal.metal/ggml-metal.metal deps/macos-arm64/ggml-metal.metal
|
||||
cp artifacts/llama-bin-macos-x86_64.dylib/libllama.dylib deps/macos-x86_64/libllama.dylib
|
||||
cp artifacts/llama-bin-osx-arm64.dylib/libllama.dylib deps/osx-arm64/libllama.dylib
|
||||
cp artifacts/ggml-metal.metal/ggml-metal.metal deps/osx-arm64/ggml-metal.metal
|
||||
cp artifacts/llama-bin-osx-x64.dylib/libllama.dylib deps/osx-x64/libllama.dylib
|
||||
|
||||
|
||||
- name: Rearrange CUDA files
|
||||
|
|
|
@ -12,12 +12,12 @@ jobs:
|
|||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build: [linux-release, windows-release, macos-release]
|
||||
build: [linux-release, windows-release, osx-release]
|
||||
include:
|
||||
- build: linux-release
|
||||
os: ubuntu-latest
|
||||
config: release
|
||||
- build: macos-release
|
||||
- build: osx-release
|
||||
os: macos-latest
|
||||
config: release
|
||||
- build: windows-release
|
||||
|
|
|
@ -29,7 +29,8 @@
|
|||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="7.0.0" />
|
||||
<PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-beta4" />
|
||||
<PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-beta1" />
|
||||
<PackageReference Include="Spectre.Console" Version="0.47.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
|
|
@ -4,6 +4,27 @@
|
|||
<TargetFramework>net6.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
|
||||
<Version>0.7.1</Version>
|
||||
<Authors>Xbotter</Authors>
|
||||
<Company>SciSharp STACK</Company>
|
||||
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
|
||||
<Copyright>MIT, SciSharp STACK $([System.DateTime]::UtcNow.ToString(yyyy))</Copyright>
|
||||
<RepositoryUrl>https://github.com/SciSharp/LLamaSharp</RepositoryUrl>
|
||||
<RepositoryType>git</RepositoryType>
|
||||
<PackageIconUrl>https://avatars3.githubusercontent.com/u/44989469?s=200&v=4</PackageIconUrl>
|
||||
<PackageTags>LLama, LLM, GPT, ChatGPT, kernel-memory, vector search, SciSharp</PackageTags>
|
||||
<Description>
|
||||
The integration of LLamaSharp and Microsoft kernel-memory. It could make it easy to support document search for LLamaSharp model inference.
|
||||
</Description>
|
||||
<PackageReleaseNotes>
|
||||
Support integration with kernel-memory
|
||||
</PackageReleaseNotes>
|
||||
<PackageLicenseExpression>MIT</PackageLicenseExpression>
|
||||
<PackageOutputPath>packages</PackageOutputPath>
|
||||
<Platforms>AnyCPU;x64;Arm64</Platforms>
|
||||
<PackageId>LLamaSharp.kernel-memory</PackageId>
|
||||
<Configurations>Debug;Release;GPU</Configurations>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
|
|
@ -3,9 +3,9 @@ using Microsoft.SemanticKernel.AI.ChatCompletion;
|
|||
|
||||
namespace LLamaSharp.SemanticKernel;
|
||||
|
||||
internal static class ExtensionMethods
|
||||
public static class ExtensionMethods
|
||||
{
|
||||
internal static global::LLama.Common.ChatHistory ToLLamaSharpChatHistory(this ChatHistory chatHistory)
|
||||
public static global::LLama.Common.ChatHistory ToLLamaSharpChatHistory(this ChatHistory chatHistory, bool ignoreCase = true)
|
||||
{
|
||||
if (chatHistory is null)
|
||||
{
|
||||
|
@ -16,7 +16,7 @@ internal static class ExtensionMethods
|
|||
|
||||
foreach (var chat in chatHistory)
|
||||
{
|
||||
var role = Enum.TryParse<global::LLama.Common.AuthorRole>(chat.Role.Label, out var _role) ? _role : global::LLama.Common.AuthorRole.Unknown;
|
||||
var role = Enum.TryParse<global::LLama.Common.AuthorRole>(chat.Role.Label, ignoreCase, out var _role) ? _role : global::LLama.Common.AuthorRole.Unknown;
|
||||
history.AddMessage(role, chat.Content);
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
<PackageIconUrl>https://avatars3.githubusercontent.com/u/44989469?s=200&v=4</PackageIconUrl>
|
||||
<PackageTags>LLama, LLM, GPT, ChatGPT, semantic-kernel, SciSharp</PackageTags>
|
||||
<Description>
|
||||
The integration of LLamaSharp ans semantic-kernel.
|
||||
The integration of LLamaSharp and Microsoft semantic-kernel.
|
||||
</Description>
|
||||
<PackageReleaseNotes>
|
||||
Support integration with semantic-kernel
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
using System.Text;
|
||||
using LLama.Abstractions;
|
||||
using LLama.Native;
|
||||
|
||||
namespace LLama.Web.Common
|
||||
{
|
||||
|
@ -118,6 +119,24 @@ namespace LLama.Web.Common
|
|||
/// </summary>
|
||||
public float? RopeFrequencyScale { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public float? YarnExtrapolationFactor { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public float? YarnAttentionFactor { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public float? YarnBetaFast { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public float? YarnBetaSlow { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public uint? YarnOriginalContext { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public RopeScalingType? YarnScalingType { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Use experimental mul_mat_q kernels
|
||||
/// </summary>
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
using System.Text;
|
||||
using LLama.Native;
|
||||
|
||||
namespace LLama.Abstractions;
|
||||
|
||||
|
@ -67,4 +68,34 @@ public interface IContextParams
|
|||
/// Number of threads to use for batch processing (null = autodetect) (n_threads)
|
||||
/// </summary>
|
||||
uint? BatchThreads { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// YaRN extrapolation mix factor
|
||||
/// </summary>
|
||||
float? YarnExtrapolationFactor { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// YaRN magnitude scaling factor
|
||||
/// </summary>
|
||||
float? YarnAttentionFactor { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// YaRN low correction dim
|
||||
/// </summary>
|
||||
float? YarnBetaFast { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// YaRN high correction dim
|
||||
/// </summary>
|
||||
float? YarnBetaSlow { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// YaRN original context length
|
||||
/// </summary>
|
||||
uint? YarnOriginalContext { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// YaRN scaling method to use.
|
||||
/// </summary>
|
||||
RopeScalingType? YarnScalingType { get; set; }
|
||||
}
|
|
@ -1,11 +1,14 @@
|
|||
using LLama.Abstractions;
|
||||
using LLama.Common;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using static LLama.InteractiveExecutor;
|
||||
|
||||
namespace LLama
|
||||
{
|
||||
|
@ -95,11 +98,11 @@ namespace LLama
|
|||
Directory.CreateDirectory(path);
|
||||
}
|
||||
_executor.Context.SaveState(Path.Combine(path, _modelStateFilename));
|
||||
if(Executor is StatelessExecutor)
|
||||
if (Executor is StatelessExecutor)
|
||||
{
|
||||
|
||||
}
|
||||
else if(Executor is StatefulExecutorBase statefulExecutor)
|
||||
else if (Executor is StatefulExecutorBase statefulExecutor)
|
||||
{
|
||||
statefulExecutor.SaveState(Path.Combine(path, _executorStateFilename));
|
||||
}
|
||||
|
@ -135,46 +138,90 @@ namespace LLama
|
|||
}
|
||||
|
||||
/// <summary>
|
||||
/// Get the response from the LLama model. Note that prompt could not only be the preset words,
|
||||
/// but also the question you want to ask.
|
||||
/// Generates a response for a given user prompt and manages history state for the user.
|
||||
/// This will always pass the whole history to the model. Don't pass a whole history
|
||||
/// to this method as the user prompt will be appended to the history of the current session.
|
||||
/// If more control is needed, use the other overload of this method that accepts a ChatHistory object.
|
||||
/// </summary>
|
||||
/// <param name="prompt"></param>
|
||||
/// <param name="inferenceParams"></param>
|
||||
/// <param name="cancellationToken"></param>
|
||||
/// <returns></returns>
|
||||
/// <returns>Returns generated text of the assistant message.</returns>
|
||||
public async IAsyncEnumerable<string> ChatAsync(string prompt, IInferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
|
||||
{
|
||||
foreach(var inputTransform in InputTransformPipeline)
|
||||
foreach (var inputTransform in InputTransformPipeline)
|
||||
prompt = inputTransform.Transform(prompt);
|
||||
|
||||
History.Messages.AddRange(HistoryTransform.TextToHistory(AuthorRole.User, prompt).Messages);
|
||||
|
||||
History.Messages.Add(new ChatHistory.Message(AuthorRole.User, prompt));
|
||||
|
||||
if (_executor is InteractiveExecutor executor)
|
||||
{
|
||||
InteractiveExecutorState state = (InteractiveExecutorState)executor.GetStateData();
|
||||
prompt = state.IsPromptRun
|
||||
? HistoryTransform.HistoryToText(History)
|
||||
: prompt;
|
||||
}
|
||||
|
||||
StringBuilder sb = new();
|
||||
|
||||
await foreach (var result in ChatAsyncInternal(prompt, inferenceParams, cancellationToken))
|
||||
{
|
||||
yield return result;
|
||||
sb.Append(result);
|
||||
}
|
||||
History.Messages.AddRange(HistoryTransform.TextToHistory(AuthorRole.Assistant, sb.ToString()).Messages);
|
||||
|
||||
string assistantMessage = sb.ToString();
|
||||
|
||||
// Remove end tokens from the assistant message
|
||||
// if defined in inferenceParams.AntiPrompts.
|
||||
// We only want the response that was generated and not tokens
|
||||
// that are delimiting the beginning or end of the response.
|
||||
if (inferenceParams?.AntiPrompts != null)
|
||||
{
|
||||
foreach (var stopToken in inferenceParams.AntiPrompts)
|
||||
{
|
||||
assistantMessage = assistantMessage.Replace(stopToken, "");
|
||||
}
|
||||
}
|
||||
|
||||
History.Messages.Add(new ChatHistory.Message(AuthorRole.Assistant, assistantMessage));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Get the response from the LLama model with chat histories.
|
||||
/// Generates a response for a given chat history. This method does not manage history state for the user.
|
||||
/// If you want to e.g. truncate the history of a session to fit into the model's context window,
|
||||
/// use this method and pass the truncated history to it. If you don't need this control, use the other
|
||||
/// overload of this method that accepts a user prompt instead.
|
||||
/// </summary>
|
||||
/// <param name="history"></param>
|
||||
/// <param name="inferenceParams"></param>
|
||||
/// <param name="cancellationToken"></param>
|
||||
/// <returns></returns>
|
||||
/// <returns>Returns generated text of the assistant message.</returns>
|
||||
public async IAsyncEnumerable<string> ChatAsync(ChatHistory history, IInferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
|
||||
{
|
||||
var prompt = HistoryTransform.HistoryToText(history);
|
||||
History.Messages.AddRange(HistoryTransform.TextToHistory(AuthorRole.User, prompt).Messages);
|
||||
StringBuilder sb = new();
|
||||
if (history.Messages.Count == 0)
|
||||
{
|
||||
throw new ArgumentException("History must contain at least one message.");
|
||||
}
|
||||
|
||||
string prompt;
|
||||
if (_executor is InteractiveExecutor executor)
|
||||
{
|
||||
InteractiveExecutorState state = (InteractiveExecutorState)executor.GetStateData();
|
||||
|
||||
prompt = state.IsPromptRun
|
||||
? HistoryTransform.HistoryToText(History)
|
||||
: history.Messages.Last().Content;
|
||||
}
|
||||
else
|
||||
{
|
||||
prompt = history.Messages.Last().Content;
|
||||
}
|
||||
|
||||
await foreach (var result in ChatAsyncInternal(prompt, inferenceParams, cancellationToken))
|
||||
{
|
||||
yield return result;
|
||||
sb.Append(result);
|
||||
}
|
||||
History.Messages.AddRange(HistoryTransform.TextToHistory(AuthorRole.Assistant, sb.ToString()).Messages);
|
||||
}
|
||||
|
||||
private async IAsyncEnumerable<string> ChatAsyncInternal(string prompt, IInferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
|
||||
|
|
|
@ -3,6 +3,7 @@ using System;
|
|||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using LLama.Native;
|
||||
|
||||
namespace LLama.Common
|
||||
{
|
||||
|
@ -98,10 +99,30 @@ namespace LLama.Common
|
|||
/// </summary>
|
||||
public float? RopeFrequencyScale { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Use experimental mul_mat_q kernels
|
||||
/// </summary>
|
||||
public bool MulMatQ { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public float? YarnExtrapolationFactor { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public float? YarnAttentionFactor { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public float? YarnBetaFast { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public float? YarnBetaSlow { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public uint? YarnOriginalContext { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public RopeScalingType? YarnScalingType { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Use experimental mul_mat_q kernels
|
||||
/// </summary>
|
||||
public bool MulMatQ { get; set; }
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Load vocab only (no weights)
|
||||
|
|
|
@ -29,6 +29,15 @@ namespace LLama.Extensions
|
|||
result.embedding = @params.EmbeddingMode;
|
||||
result.rope_freq_base = @params.RopeFrequencyBase ?? 0;
|
||||
result.rope_freq_scale = @params.RopeFrequencyScale ?? 0;
|
||||
|
||||
// Default YaRN values copied from here: https://github.com/ggerganov/llama.cpp/blob/381efbf480959bb6d1e247a8b0c2328f22e350f8/common/common.h#L67
|
||||
result.yarn_ext_factor = @params.YarnExtrapolationFactor ?? -1f;
|
||||
result.yarn_attn_factor = @params.YarnAttentionFactor ?? 1f;
|
||||
result.yarn_beta_fast = @params.YarnBetaFast ?? 32f;
|
||||
result.yarn_beta_slow = @params.YarnBetaSlow ?? 1f;
|
||||
result.yarn_orig_ctx = @params.YarnOriginalContext ?? 0;
|
||||
result.rope_scaling_type = @params.YarnScalingType ?? RopeScalingType.LLAMA_ROPE_SCALING_UNSPECIFIED;
|
||||
|
||||
result.mul_mat_q = @params.MulMatQ;
|
||||
|
||||
result.n_threads = Threads(@params.Threads);
|
||||
|
|
|
@ -27,15 +27,15 @@
|
|||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
<Link>runtimes/linux-x64/native/cuda12/libllama.so</Link>
|
||||
</None>
|
||||
<None Include="$(MSBuildThisFileDirectory)runtimes/macos-arm64/libllama.dylib">
|
||||
<None Include="$(MSBuildThisFileDirectory)runtimes/osx-arm64/libllama.dylib">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
<Link>runtimes/osx-arm64/native/libllama.dylib</Link>
|
||||
</None>
|
||||
<None Include="$(MSBuildThisFileDirectory)runtimes/macos-arm64/ggml-metal.metal">
|
||||
<None Include="$(MSBuildThisFileDirectory)runtimes/osx-arm64/ggml-metal.metal">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
<Link>runtimes/osx-arm64/native/ggml-metal.metal</Link>
|
||||
</None>
|
||||
<None Include="$(MSBuildThisFileDirectory)runtimes/macos-x86_64/libllama.dylib">
|
||||
<None Include="$(MSBuildThisFileDirectory)runtimes/osx-x64/libllama.dylib">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
<Link>runtimes/osx-x64/native/libllama.dylib</Link>
|
||||
</None>
|
||||
|
|
|
@ -44,13 +44,13 @@ namespace LLama.Native
|
|||
/// <summary>
|
||||
/// RoPE scaling type, from `enum llama_rope_scaling_type`
|
||||
/// </summary>
|
||||
public sbyte rope_scaling_type;
|
||||
public RopeScalingType rope_scaling_type;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// RoPE base frequency, 0 = from model
|
||||
/// </summary>
|
||||
public float rope_freq_base;
|
||||
public float rope_freq_base;
|
||||
/// <summary>
|
||||
/// RoPE frequency scaling factor, 0 = from model
|
||||
/// </summary>
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
namespace LLama.Native
|
||||
{
|
||||
/// <summary>
|
||||
/// RoPE scaling type. C# equivalent of llama_rope_scaling_type
|
||||
/// </summary>
|
||||
public enum RopeScalingType
|
||||
: sbyte
|
||||
{
|
||||
LLAMA_ROPE_SCALING_UNSPECIFIED = -1,
|
||||
|
||||
LLAMA_ROPE_SCALING_NONE = 0,
|
||||
|
||||
LLAMA_ROPE_SCALING_LINEAR = 1,
|
||||
|
||||
LLAMA_ROPE_SCALING_YARN = 2,
|
||||
}
|
||||
}
|
|
@ -19,9 +19,9 @@
|
|||
<file src="LLamaSharpBackend.props" target="build/netstandard2.0/LLamaSharp.Backend.Cpu.props" />
|
||||
<file src="runtimes/libllama.dll" target="runtimes\win-x64\native\libllama.dll" />
|
||||
<file src="runtimes/libllama.so" target="runtimes\linux-x64\native\libllama.so" />
|
||||
<file src="runtimes/macos-x86_64/libllama.dylib" target="runtimes\osx-x64\native\libllama.dylib" />
|
||||
<file src="runtimes/macos-arm64/libllama.dylib" target="runtimes\osx-arm64\native\libllama.dylib" />
|
||||
<file src="runtimes/macos-arm64/ggml-metal.metal" target="runtimes\osx-arm64\native\ggml-metal.metal" />
|
||||
<file src="runtimes/osx-x64/libllama.dylib" target="runtimes\osx-x64\native\libllama.dylib" />
|
||||
<file src="runtimes/osx-arm64/libllama.dylib" target="runtimes\osx-arm64\native\libllama.dylib" />
|
||||
<file src="runtimes/osx-arm64/ggml-metal.metal" target="runtimes\osx-arm64\native\ggml-metal.metal" />
|
||||
<file src="icon512.png" target="icon512.png" />
|
||||
</files>
|
||||
</package>
|
||||
|
|
|
@ -54,6 +54,12 @@ For [microsoft semantic-kernel](https://github.com/microsoft/semantic-kernel) in
|
|||
LLamaSharp.semantic-kernel
|
||||
```
|
||||
|
||||
For [microsoft kernel-memory](https://github.com/microsoft/kernel-memory) integration, please search and install the following package (currently kernel-memory only supports net6.0):
|
||||
|
||||
```
|
||||
LLamaSharp.kernel-memory
|
||||
```
|
||||
|
||||
### Tips for choosing a version
|
||||
|
||||
In general, there may be some break changes between two minor releases, for example 0.5.1 and 0.6.0. On the contrary, we don't introduce API break changes in patch release. Therefore it's recommended to keep the highest patch version of a minor release. For example, keep 0.5.6 instead of 0.5.3.
|
||||
|
@ -196,7 +202,7 @@ Another choice is generate gguf format file yourself with a pytorch weight (or a
|
|||
|
||||
🔳 Fine-tune
|
||||
|
||||
⚠️ Local document search (enabled by kernel-memory now)
|
||||
✅ Local document search (enabled by kernel-memory now)
|
||||
|
||||
🔳 MAUI Integration
|
||||
|
||||
|
|
Loading…
Reference in New Issue