From de31a06a4a8a87e9f3b7db7b1201aaee6b422cbe Mon Sep 17 00:00:00 2001 From: Rinne Date: Mon, 29 Apr 2024 18:07:13 +0800 Subject: [PATCH 1/2] ci: add workflow to check the spelling. --- .github/_typos.toml | 14 ++++++++++++++ .github/workflows/spell_check.yml | 31 +++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 .github/_typos.toml create mode 100644 .github/workflows/spell_check.yml diff --git a/.github/_typos.toml b/.github/_typos.toml new file mode 100644 index 00000000..874ad82a --- /dev/null +++ b/.github/_typos.toml @@ -0,0 +1,14 @@ +# Typos configuration file +# +# Info: https://github.com/marketplace/actions/typos-action +# Install: brew install typos-cli +# Install: conda install typos +# Run: typos -c .github/_typos.toml + +[files] +extend-exclude = [ + "_typos.toml", + "docs/xmldocs/", + "LLama.Web/wwwroot/", + "LLama/runtimes/deps/" +] diff --git a/.github/workflows/spell_check.yml b/.github/workflows/spell_check.yml new file mode 100644 index 00000000..b6fab829 --- /dev/null +++ b/.github/workflows/spell_check.yml @@ -0,0 +1,31 @@ +# Check pull requests for typos. +# +# Configuration: .github/_typos.toml +# +# Info: https://github.com/marketplace/actions/typos-action +# Local install: brew install typos-cli +# Local install: conda install typos +# Local run: typos -c .github/_typos.toml + +name: Spell Check + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + run: + name: Spell check + runs-on: ubuntu-latest + steps: + - name: Check out code + uses: actions/checkout@v3 + + - name: Use custom config file + uses: crate-ci/typos@master + with: + config: .github/_typos.toml + write_changes: false + quiet: true \ No newline at end of file From 495177fd0fec99d3ad8530b9911e12c05aa4912c Mon Sep 17 00:00:00 2001 From: Rinne Date: Mon, 29 Apr 2024 18:19:20 +0800 Subject: [PATCH 2/2] fix: typos. --- LLama.Examples/Examples/BatchedExecutorSaveAndLoad.cs | 2 +- LLama.Examples/Examples/LlavaInteractiveModeExecute.cs | 2 +- LLama.Examples/Examples/SpeechChat.cs | 4 ++-- LLama.Web/Async/AsyncLock.cs | 2 +- LLama.Web/Extensions.cs | 6 +++--- LLama.Web/Hubs/SessionConnectionHub.cs | 2 +- LLama.Web/README.md | 6 +++--- LLama.Web/Services/ModelService.cs | 2 +- LLama/Abstractions/IInferenceParams.cs | 2 +- LLama/Abstractions/ILLamaExecutor.cs | 2 +- LLama/Abstractions/IModelParams.cs | 2 +- LLama/ChatSession.cs | 2 +- LLama/Common/InferenceParams.cs | 2 +- LLama/Extensions/IContextParamsExtensions.cs | 2 +- LLama/Extensions/IModelParamsExtensions.cs | 2 +- LLama/LLamaContext.cs | 2 +- LLama/LLamaExecutorBase.cs | 2 +- LLama/LLamaInstructExecutor.cs | 4 ++-- LLama/LLamaInteractExecutor.cs | 2 +- LLama/LLamaStatelessExecutor.cs | 2 +- LLama/Native/NativeApi.LLava.cs | 4 ++-- LLama/Native/NativeApi.Load.cs | 2 +- LLama/Native/NativeLibraryConfig.cs | 4 ++-- README.md | 2 +- docs/Examples/LLavaInteractiveModeExecute.md | 2 +- docs/FAQ.md | 4 ++-- docs/QuickStart.md | 2 +- docs/Tutorials/Executors.md | 6 +++--- docs/Tutorials/NativeLibraryConfig.md | 2 +- docs/index.md | 2 +- docs/xmldocs/llama.abstractions.iinferenceparams.md | 2 +- docs/xmldocs/llama.common.inferenceparams.md | 2 +- 32 files changed, 43 insertions(+), 43 deletions(-) diff --git a/LLama.Examples/Examples/BatchedExecutorSaveAndLoad.cs b/LLama.Examples/Examples/BatchedExecutorSaveAndLoad.cs index 0ec903eb..48d96f73 100644 --- a/LLama.Examples/Examples/BatchedExecutorSaveAndLoad.cs +++ b/LLama.Examples/Examples/BatchedExecutorSaveAndLoad.cs @@ -80,7 +80,7 @@ public class BatchedExecutorSaveAndLoad // Continue generating text await GenerateTokens(executor, conversation, sampler, decoder, n_len); - // Display final ouput + // Display final output AnsiConsole.MarkupLine($"[red]{prompt}{decoder.Read()}[/]"); } diff --git a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs index 89b4ae41..9d396ebf 100644 --- a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs +++ b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs @@ -95,7 +95,7 @@ namespace LLama.Examples.Examples Console.WriteLine(); - // Initilize Images in executor + // Initialize Images in executor // foreach (var image in imagePaths) { diff --git a/LLama.Examples/Examples/SpeechChat.cs b/LLama.Examples/Examples/SpeechChat.cs index 7cbea734..82ae1dcd 100644 --- a/LLama.Examples/Examples/SpeechChat.cs +++ b/LLama.Examples/Examples/SpeechChat.cs @@ -124,7 +124,7 @@ In case of inaudible sentences that might be, assume they're saying 'stop'. int totalNonBlankClips; // ..but for example's sake they work on a int nonIdleTime; // ..clip-based quant-length (1 = clipLength). // Default detection settings: A speech of 750ms, followed by pause of 500ms. (2x250ms) - public (int minBlanksPerSeperation, int minNonBlanksForValidMessages) detectionSettings = (2, 3); + public (int minBlanksPerSeparation, int minNonBlanksForValidMessages) detectionSettings = (2, 3); public HashSet ServiceUsers = []; @@ -156,7 +156,7 @@ In case of inaudible sentences that might be, assume they're saying 'stop'. // Compare the volume with the threshold and act accordingly. Once an interesting and 'full' set of clips pops up, serve it. if (maxVolume >= voiceDetectionThreshold) { currentBlankClips = 0; totalNonBlankClips++; nonIdleTime++; } - else if (++currentBlankClips < detectionSettings.minBlanksPerSeperation) { nonIdleTime++; } + else if (++currentBlankClips < detectionSettings.minBlanksPerSeparation) { nonIdleTime++; } else { if (totalNonBlankClips >= detectionSettings.minNonBlanksForValidMessages) { SendTranscription(); } diff --git a/LLama.Web/Async/AsyncLock.cs b/LLama.Web/Async/AsyncLock.cs index 09ccb0f7..df294bf8 100644 --- a/LLama.Web/Async/AsyncLock.cs +++ b/LLama.Web/Async/AsyncLock.cs @@ -1,7 +1,7 @@ namespace LLama.Web.Async { /// - /// Create an Async locking using statment + /// Create an Async locking using statement /// public sealed class AsyncLock { diff --git a/LLama.Web/Extensions.cs b/LLama.Web/Extensions.cs index ee8d7f7f..130c46bd 100644 --- a/LLama.Web/Extensions.cs +++ b/LLama.Web/Extensions.cs @@ -34,14 +34,14 @@ namespace LLama.Web private static List CombineCSV(List list, string csv) { var results = list is null || list.Count == 0 - ? CommaSeperatedToList(csv) - : CommaSeperatedToList(csv).Concat(list); + ? CommaSeparatedToList(csv) + : CommaSeparatedToList(csv).Concat(list); return results .Distinct() .ToList(); } - private static List CommaSeperatedToList(string value) + private static List CommaSeparatedToList(string value) { if (string.IsNullOrEmpty(value)) return new List(); diff --git a/LLama.Web/Hubs/SessionConnectionHub.cs b/LLama.Web/Hubs/SessionConnectionHub.cs index 966ec8a4..3ef46dbe 100644 --- a/LLama.Web/Hubs/SessionConnectionHub.cs +++ b/LLama.Web/Hubs/SessionConnectionHub.cs @@ -30,7 +30,7 @@ namespace LLama.Web.Hubs { _logger.Log(LogLevel.Information, "[OnDisconnectedAsync], Id: {0}", Context.ConnectionId); - // Remove connections session on dissconnect + // Remove connections session on disconnect await _modelSessionService.CloseAsync(Context.ConnectionId); await base.OnDisconnectedAsync(exception); } diff --git a/LLama.Web/README.md b/LLama.Web/README.md index 9b6786e6..7de2ca9d 100644 --- a/LLama.Web/README.md +++ b/LLama.Web/README.md @@ -1,8 +1,8 @@ ## LLama.Web - Basic ASP.NET Core examples of LLamaSharp in action -LLama.Web has no heavy dependencies and no extra frameworks ove bootstrap and jquery to keep the examples clean and easy to copy over to your own project +LLama.Web has no heavy dependencies and no extra frameworks over bootstrap and jquery to keep the examples clean and easy to copy over to your own project ## Websockets -Using signalr websockets simplifys the streaming of responses and model per connection management +Using signalr websockets simplifies the streaming of responses and model per connection management @@ -23,7 +23,7 @@ Example: { "Name": "Alpaca", "Path": "D:\\Repositories\\AI\\Prompts\\alpaca.txt", - "Prompt": "Alternativly to can set a prompt text directly and omit the Path" + "Prompt": "Alternatively to can set a prompt text directly and omit the Path" "AntiPrompt": [ "User:" ], diff --git a/LLama.Web/Services/ModelService.cs b/LLama.Web/Services/ModelService.cs index 3634f6ab..1ee30e32 100644 --- a/LLama.Web/Services/ModelService.cs +++ b/LLama.Web/Services/ModelService.cs @@ -8,7 +8,7 @@ namespace LLama.Web.Services { /// - /// Sercive for handling Models,Weights & Contexts + /// Service for handling Models,Weights & Contexts /// public class ModelService : IModelService { diff --git a/LLama/Abstractions/IInferenceParams.cs b/LLama/Abstractions/IInferenceParams.cs index 74ab0f81..425bc88d 100644 --- a/LLama/Abstractions/IInferenceParams.cs +++ b/LLama/Abstractions/IInferenceParams.cs @@ -6,7 +6,7 @@ using LLama.Sampling; namespace LLama.Abstractions { /// - /// The paramters used for inference. + /// The parameters used for inference. /// public interface IInferenceParams { diff --git a/LLama/Abstractions/ILLamaExecutor.cs b/LLama/Abstractions/ILLamaExecutor.cs index 574a27d8..9a223328 100644 --- a/LLama/Abstractions/ILLamaExecutor.cs +++ b/LLama/Abstractions/ILLamaExecutor.cs @@ -20,7 +20,7 @@ namespace LLama.Abstractions /// public bool IsMultiModal { get; } /// - /// Muti-Modal Projections / Clip Model weights + /// Multi-Modal Projections / Clip Model weights /// public LLavaWeights? ClipModel { get; } diff --git a/LLama/Abstractions/IModelParams.cs b/LLama/Abstractions/IModelParams.cs index 461c033b..ac81f1fd 100644 --- a/LLama/Abstractions/IModelParams.cs +++ b/LLama/Abstractions/IModelParams.cs @@ -232,7 +232,7 @@ namespace LLama.Abstractions public sealed record MetadataOverride { /// - /// Get the key being overriden by this override + /// Get the key being overridden by this override /// public string Key { get; } diff --git a/LLama/ChatSession.cs b/LLama/ChatSession.cs index 0a5accc5..9018b20e 100644 --- a/LLama/ChatSession.cs +++ b/LLama/ChatSession.cs @@ -545,7 +545,7 @@ public class ChatSession InferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) { - // Make sure the last message is an assistant message (reponse from the LLM). + // Make sure the last message is an assistant message (response from the LLM). ChatHistory.Message? lastAssistantMessage = History.Messages.LastOrDefault(); if (lastAssistantMessage is null diff --git a/LLama/Common/InferenceParams.cs b/LLama/Common/InferenceParams.cs index c0a8357e..44818a1f 100644 --- a/LLama/Common/InferenceParams.cs +++ b/LLama/Common/InferenceParams.cs @@ -7,7 +7,7 @@ using LLama.Sampling; namespace LLama.Common { /// - /// The paramters used for inference. + /// The parameters used for inference. /// public record InferenceParams : IInferenceParams diff --git a/LLama/Extensions/IContextParamsExtensions.cs b/LLama/Extensions/IContextParamsExtensions.cs index fa1a36dd..d6a94a2a 100644 --- a/LLama/Extensions/IContextParamsExtensions.cs +++ b/LLama/Extensions/IContextParamsExtensions.cs @@ -6,7 +6,7 @@ using LLama.Native; namespace LLama.Extensions { /// - /// Extention methods to the IContextParams interface + /// Extension methods to the IContextParams interface /// public static class IContextParamsExtensions { diff --git a/LLama/Extensions/IModelParamsExtensions.cs b/LLama/Extensions/IModelParamsExtensions.cs index c7daa135..523ec737 100644 --- a/LLama/Extensions/IModelParamsExtensions.cs +++ b/LLama/Extensions/IModelParamsExtensions.cs @@ -7,7 +7,7 @@ using LLama.Native; namespace LLama.Extensions; /// -/// Extention methods to the IModelParams interface +/// Extension methods to the IModelParams interface /// public static class IModelParamsExtensions { diff --git a/LLama/LLamaContext.cs b/LLama/LLamaContext.cs index e398982f..7d0c0f91 100644 --- a/LLama/LLamaContext.cs +++ b/LLama/LLamaContext.cs @@ -628,7 +628,7 @@ namespace LLama } /// - /// Copy bytes to a desintation pointer. + /// Copy bytes to a destination pointer. /// /// Destination to write to /// Length of the destination buffer diff --git a/LLama/LLamaExecutorBase.cs b/LLama/LLamaExecutorBase.cs index c00ead4f..70081c9f 100644 --- a/LLama/LLamaExecutorBase.cs +++ b/LLama/LLamaExecutorBase.cs @@ -209,7 +209,7 @@ namespace LLama /// /// Try to reuse the matching prefix from the session file. /// - protected virtual void TryReuseMathingPrefix() + protected virtual void TryReuseMatchingPrefix() { if (_n_session_consumed < _session_tokens.Count) { diff --git a/LLama/LLamaInstructExecutor.cs b/LLama/LLamaInstructExecutor.cs index 917dc5eb..65d2d6c7 100644 --- a/LLama/LLamaInstructExecutor.cs +++ b/LLama/LLamaInstructExecutor.cs @@ -189,7 +189,7 @@ namespace LLama HandleRunOutOfContext(inferenceParams.TokensKeep); } - TryReuseMathingPrefix(); + TryReuseMatchingPrefix(); var (result, _) = Context.NativeHandle.Decode(_embeds, LLamaSeqId.Zero, batch, ref _pastTokensCount); if (result != DecodeResult.Ok) @@ -259,7 +259,7 @@ namespace LLama return Task.CompletedTask; } /// - /// The desciptor of the state of the instruct executor. + /// The descriptor of the state of the instruct executor. /// public class InstructExecutorState : ExecutorBaseState { diff --git a/LLama/LLamaInteractExecutor.cs b/LLama/LLamaInteractExecutor.cs index 9aaa1ca2..fec4f9c4 100644 --- a/LLama/LLamaInteractExecutor.cs +++ b/LLama/LLamaInteractExecutor.cs @@ -234,7 +234,7 @@ namespace LLama HandleRunOutOfContext(inferenceParams.TokensKeep); } - TryReuseMathingPrefix(); + TryReuseMatchingPrefix(); // Changes to support Multi-Modal LLMs. // diff --git a/LLama/LLamaStatelessExecutor.cs b/LLama/LLamaStatelessExecutor.cs index a3c52a02..39d74f90 100644 --- a/LLama/LLamaStatelessExecutor.cs +++ b/LLama/LLamaStatelessExecutor.cs @@ -63,7 +63,7 @@ namespace LLama /// public async IAsyncEnumerable InferAsync(string prompt, IInferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) { - // Ensure the context from last time is disposed (it always hould be) + // Ensure the context from last time is disposed (it always should be) if (!Context.NativeHandle.IsClosed) Context.Dispose(); diff --git a/LLama/Native/NativeApi.LLava.cs b/LLama/Native/NativeApi.LLava.cs index 895fe43e..e3aeef4b 100644 --- a/LLama/Native/NativeApi.LLava.cs +++ b/LLama/Native/NativeApi.LLava.cs @@ -21,7 +21,7 @@ public static unsafe partial class NativeApi /// SafeHandle to the Clip Model /// Number of threads /// Binary image in jpeg format - /// Bytes lenght of the image + /// Bytes length of the image /// SafeHandle to the Embeddings [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_make_with_bytes", CallingConvention = CallingConvention.Cdecl)] @@ -35,7 +35,7 @@ public static unsafe partial class NativeApi /// SafeHandle to the Clip Model /// Number of threads /// Image filename (jpeg) to generate embeddings - /// SafeHandel to the embeddings + /// SafeHandle to the embeddings [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_make_with_filename", CallingConvention = CallingConvention.Cdecl)] public static extern SafeLlavaImageEmbedHandle llava_image_embed_make_with_filename(SafeLlavaModelHandle ctx_clip, int n_threads, diff --git a/LLama/Native/NativeApi.Load.cs b/LLama/Native/NativeApi.Load.cs index 4b4beea2..5275023e 100644 --- a/LLama/Native/NativeApi.Load.cs +++ b/LLama/Native/NativeApi.Load.cs @@ -34,7 +34,7 @@ namespace LLama.Native "3. One of the dependency of the native library is missed. Please use `ldd` on linux, `dumpbin` on windows and `otool`" + "to check if all the dependency of the native library is satisfied. Generally you could find the libraries under your output folder.\n" + "4. Try to compile llama.cpp yourself to generate a libllama library, then use `LLama.Native.NativeLibraryConfig.WithLibrary` " + - "to specify it at the very beginning of your code. For more informations about compilation, please refer to LLamaSharp repo on github.\n"); + "to specify it at the very beginning of your code. For more information about compilation, please refer to LLamaSharp repo on github.\n"); } // Now that the "loaded" flag is set configure logging in llama.cpp diff --git a/LLama/Native/NativeLibraryConfig.cs b/LLama/Native/NativeLibraryConfig.cs index ef7cd7c1..f198b179 100644 --- a/LLama/Native/NativeLibraryConfig.cs +++ b/LLama/Native/NativeLibraryConfig.cs @@ -101,7 +101,7 @@ namespace LLama.Native } /// - /// Add self-defined search directories. Note that the file stucture of the added + /// Add self-defined search directories. Note that the file structure of the added /// directories must be the same as the default directory. Besides, the directory /// won't be used recursively. /// @@ -116,7 +116,7 @@ namespace LLama.Native } /// - /// Add self-defined search directories. Note that the file stucture of the added + /// Add self-defined search directories. Note that the file structure of the added /// directories must be the same as the default directory. Besides, the directory /// won't be used recursively. /// diff --git a/README.md b/README.md index 08f4cd60..5eddeebc 100644 --- a/README.md +++ b/README.md @@ -175,7 +175,7 @@ For more examples, please refer to [LLamaSharp.Examples](./LLama.Examples). #### Why GPU is not used when I have installed CUDA 1. If you are using backend packages, please make sure you have installed the cuda backend package which matches the cuda version of your device. Please note that before LLamaSharp v0.10.0, only one backend package should be installed. -2. Add `NativeLibraryConfig.Instance.WithLogs(LLamaLogLevel.Info)` to the very beginning of your code. The log will show which native library file is loaded. If the CPU library is loaded, please try to compile the native library yourself and open an issue for that. If the CUDA libraty is loaded, please check if `GpuLayerCount > 0` when loading the model weight. +2. Add `NativeLibraryConfig.Instance.WithLogs(LLamaLogLevel.Info)` to the very beginning of your code. The log will show which native library file is loaded. If the CPU library is loaded, please try to compile the native library yourself and open an issue for that. If the CUDA library is loaded, please check if `GpuLayerCount > 0` when loading the model weight. #### Why the inference is slow diff --git a/docs/Examples/LLavaInteractiveModeExecute.md b/docs/Examples/LLavaInteractiveModeExecute.md index 826ac447..2bfbbea1 100644 --- a/docs/Examples/LLavaInteractiveModeExecute.md +++ b/docs/Examples/LLavaInteractiveModeExecute.md @@ -98,7 +98,7 @@ namespace LLama.Examples.Examples Console.WriteLine(); - // Initilize Images in executor + // Initialize Images in executor // foreach (var image in imagePaths) { diff --git a/docs/FAQ.md b/docs/FAQ.md index 6b5781fb..86a49e24 100644 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -1,11 +1,11 @@ -# Frequently asked qustions +# Frequently asked questions Sometimes, your application with LLM and LLamaSharp may have unexpected behaviours. Here are some frequently asked questions, which may help you to deal with your problem. ## Why GPU is not used when I have installed CUDA 1. If you are using backend packages, please make sure you have installed the cuda backend package which matches the cuda version of your device. Please note that before LLamaSharp v0.10.0, only one backend package should be installed. -2. Add `NativeLibraryConfig.Instance.WithLogs(LLamaLogLevel.Info)` to the very beginning of your code. The log will show which native library file is loaded. If the CPU library is loaded, please try to compile the native library yourself and open an issue for that. If the CUDA libraty is loaded, please check if `GpuLayerCount > 0` when loading the model weight. +2. Add `NativeLibraryConfig.Instance.WithLogs(LLamaLogLevel.Info)` to the very beginning of your code. The log will show which native library file is loaded. If the CPU library is loaded, please try to compile the native library yourself and open an issue for that. If the CUDA library is loaded, please check if `GpuLayerCount > 0` when loading the model weight. ## Why the inference is slow diff --git a/docs/QuickStart.md b/docs/QuickStart.md index 96151d83..a2cab198 100644 --- a/docs/QuickStart.md +++ b/docs/QuickStart.md @@ -169,7 +169,7 @@ do Console.WriteLine(); - // Initilize Images in executor + // Initialize Images in executor // ex.ImagePaths = imagePaths.ToList(); } diff --git a/docs/Tutorials/Executors.md b/docs/Tutorials/Executors.md index 8e7ce23a..7a744f7e 100644 --- a/docs/Tutorials/Executors.md +++ b/docs/Tutorials/Executors.md @@ -23,7 +23,7 @@ public interface ILLamaExecutor /// public bool IsMultiModal { get; } /// - /// Muti-Modal Projections / Clip Model weights + /// Multi-Modal Projections / Clip Model weights /// public LLavaWeights? ClipModel { get; } @@ -110,7 +110,7 @@ At this time, by repeating the same mode of `Q: xxx? A: xxx.`, LLM outputs the a ## BatchedExecutor -Different from other executors, `BatchedExecutor` could accept multiple inputs from different sessions and geneate outputs for them at the same time. Here is an example to use it. +Different from other executors, `BatchedExecutor` could accept multiple inputs from different sessions and generate outputs for them at the same time. Here is an example to use it. ```cs using LLama.Batched; @@ -249,7 +249,7 @@ Here is the parameters for LLamaSharp executors. ```cs /// -/// The paramters used for inference. +/// The parameters used for inference. /// public record InferenceParams : IInferenceParams diff --git a/docs/Tutorials/NativeLibraryConfig.md b/docs/Tutorials/NativeLibraryConfig.md index 4bdd12dc..1c3d00a0 100644 --- a/docs/Tutorials/NativeLibraryConfig.md +++ b/docs/Tutorials/NativeLibraryConfig.md @@ -8,7 +8,7 @@ As indicated in [Architecture](../Architecture.md), LLamaSharp uses the native l Before introducing the way to customize native library loading, please follow the tips below to see if you need to compile the native library yourself, rather than use the published backend packages, which contain native library files for multiple targets. 1. Your device/environment has not been supported by any published backend packages. For example, vulkan has not been supported yet. In this case, it will mean a lot to open an issue to tell us you are using it. Since our support for new backend will have a delay, you could compile yourself before that. -2. You want to gain the best performance of LLamaSharp. Because LLamaSharp offloads the model to both GPU and CPU, the performance is significantly related with CPU if your GPU memory size is small. AVX ([Advanced Vector Extensions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions)) and BLAS ([Basic Linear Algebra Subprograms](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms)) are the most important ways to accelerate the CPU computation. By default, LLamaSharp disables the support for BLAS and use AVX2 for CUDA backend yet. If you would like to enable BLAS or use AVX 512 along with CUDA, please compile the native library youself, following the [instructions here](../ContributingGuide.md). +2. You want to gain the best performance of LLamaSharp. Because LLamaSharp offloads the model to both GPU and CPU, the performance is significantly related with CPU if your GPU memory size is small. AVX ([Advanced Vector Extensions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions)) and BLAS ([Basic Linear Algebra Subprograms](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms)) are the most important ways to accelerate the CPU computation. By default, LLamaSharp disables the support for BLAS and use AVX2 for CUDA backend yet. If you would like to enable BLAS or use AVX 512 along with CUDA, please compile the native library yourself, following the [instructions here](../ContributingGuide.md). 3. You want to debug the c++ code. diff --git a/docs/index.md b/docs/index.md index 0e5ec5af..e0008eb1 100644 --- a/docs/index.md +++ b/docs/index.md @@ -11,7 +11,7 @@ If you are new to LLM, here're some tips for you to help you to get start with ` 1. The main ability of LLamaSharp is to provide an efficient way to run inference of LLM on your device (and fine-tune model in the future). The model weights, however, need to be downloaded from other resources such as [huggingface](https://huggingface.co). 2. To gain high performance, LLamaSharp interacts with a native library compiled from c++, which is called `backend`. We provide backend packages for Windows, Linux and MAC with CPU, Cuda, Metal and OpenCL. You **don't** need to handle anything about c++ but just install the backend packages. If no published backend match your device, please open an issue to let us know. If compiling c++ code is not difficult for you, you could also follow [this guide]() to compile a backend and run LLamaSharp with it. 3. `LLaMA` originally refers to the weights released by Meta (Facebook Research). After that, many models are fine-tuned based on it, such as `Vicuna`, `GPT4All`, and `Pyglion`. There are two popular file format of these model now, which are PyTorch format (.pth) and Huggingface format (.bin). LLamaSharp uses `GGUF` format file, which could be converted from these two formats. There are two options for you to get GGUF format file. a) Search model name + 'gguf' in [Huggingface](https://huggingface.co), you will find lots of model files that have already been converted to GGUF format. Please take care of the publishing time of them because some old ones could only work with old version of LLamaSharp. b) Convert PyTorch or Huggingface format to GGUF format yourself. Please follow the instructions of [this part of llama.cpp readme](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#prepare-and-quantize) to convert them with the python scripts. -4. LLamaSharp supports multi-modal, which means that the model could take both text and image as input. Note that there are two model files requied for using multi-modal (LLaVA), which are main model and mm-proj model. Here is a huggingface repo which shows that: [link](https://huggingface.co/ShadowBeast/llava-v1.6-mistral-7b-Q5_K_S-GGUF/tree/main). +4. LLamaSharp supports multi-modal, which means that the model could take both text and image as input. Note that there are two model files required for using multi-modal (LLaVA), which are main model and mm-proj model. Here is a huggingface repo which shows that: [link](https://huggingface.co/ShadowBeast/llava-v1.6-mistral-7b-Q5_K_S-GGUF/tree/main). diff --git a/docs/xmldocs/llama.abstractions.iinferenceparams.md b/docs/xmldocs/llama.abstractions.iinferenceparams.md index 4a03092a..6b1bc27f 100644 --- a/docs/xmldocs/llama.abstractions.iinferenceparams.md +++ b/docs/xmldocs/llama.abstractions.iinferenceparams.md @@ -2,7 +2,7 @@ Namespace: LLama.Abstractions -The paramters used for inference. +The parameters used for inference. ```csharp public interface IInferenceParams diff --git a/docs/xmldocs/llama.common.inferenceparams.md b/docs/xmldocs/llama.common.inferenceparams.md index f178331d..2b6e3f12 100644 --- a/docs/xmldocs/llama.common.inferenceparams.md +++ b/docs/xmldocs/llama.common.inferenceparams.md @@ -2,7 +2,7 @@ Namespace: LLama.Common -The paramters used for inference. +The parameters used for inference. ```csharp public class InferenceParams : LLama.Abstractions.IInferenceParams, System.IEquatable`1[[LLama.Common.InferenceParams, LLamaSharp, Version=0.0.0.0, Culture=neutral, PublicKeyToken=null]]