site_name: LLamaSharp Documentation nav: - Overview: index.md - Quick Start: QuickStart.md - Architecture: Architecture.md - FAQ: FAQ.md - Contributing Guide: ContributingGuide.md - Tutorials: - Customize the native library loading: Tutorials/NativeLibraryConfig.md - Use executors: Tutorials/Executors.md - Use ChatSession: Tutorials/ChatSession.md - Understand LLamaContext: Tutorials/UnderstandLLamaContext.md - Get embeddings: Tutorials/GetEmbeddings.md - Quantize the model: Tutorials/Quantization.md - Integrations: - semantic-kernel integration: Integrations/semantic-kernel.md - kernel-memory integration: Integrations/kernel-memory.md - BotSharp integration: Integrations/BotSharp.md - Langchain integration: Integrations/Langchain.md - Examples: - Bacthed executor - multi-output to one input: Examples/BatchedExecutorFork.md - Batched executor - basic guidance: Examples/BatchedExecutorGuidance.md - Batched executor - rewinding to an earlier state: Examples/BatchedExecutorRewind.md - Chinese LLM - with GB2312 encoding: Examples/ChatChineseGB2312.md - ChatSession - stripping role names: Examples/ChatSessionStripRoleName.md - ChatSession - with history: Examples/ChatSessionWithHistory.md - ChatSession - restarting: Examples/ChatSessionWithRestart.md - ChatSession - Basic: Examples/ChatSessionWithRoleName.md - Coding assistant: Examples/CodingAssistant.md - Get embeddings: Examples/GetEmbeddings.md - Grammar - json response: Examples/GrammarJsonResponse.md - Instruct executor - basic: Examples/InstructModeExecute.md - Interactive executor - basic: Examples/InteractiveModeExecute.md - Kernel memory integration - basic: Examples/KernelMemory.md - Kernel-memory - save & load: Examples/KernelMemorySaveAndLoad.md - LLaVA - basic: Examples/LLavaInteractiveModeExecute.md - ChatSession - load & save: Examples/LoadAndSaveSession.md - Executor - save/load state: Examples/LoadAndSaveState.md - Quantization: Examples/QuantizeModel.md - Semantic-kernel - chat: Examples/SemanticKernelChat.md - Semantic-kernel - with kernel-memory: Examples/SemanticKernelMemory.md - Semantic-kernel - basic: Examples/SemanticKernelPrompt.md - Stateless executor: Examples/StatelessModeExecute.md - Talk to yourself: Examples/TalkToYourself.md - API Reference: - index: ./xmldocs/index.md - llama.abstractions.adaptercollection: ./xmldocs/llama.abstractions.adaptercollection.md - llama.abstractions.icontextparams: ./xmldocs/llama.abstractions.icontextparams.md - llama.abstractions.ihistorytransform: ./xmldocs/llama.abstractions.ihistorytransform.md - llama.abstractions.iinferenceparams: ./xmldocs/llama.abstractions.iinferenceparams.md - llama.abstractions.illamaexecutor: ./xmldocs/llama.abstractions.illamaexecutor.md - llama.abstractions.illamaparams: ./xmldocs/llama.abstractions.illamaparams.md - llama.abstractions.imodelparams: ./xmldocs/llama.abstractions.imodelparams.md - llama.abstractions.itextstreamtransform: ./xmldocs/llama.abstractions.itextstreamtransform.md - llama.abstractions.itexttransform: ./xmldocs/llama.abstractions.itexttransform.md - llama.abstractions.loraadapter: ./xmldocs/llama.abstractions.loraadapter.md - llama.abstractions.metadataoverride: ./xmldocs/llama.abstractions.metadataoverride.md - llama.abstractions.metadataoverrideconverter: ./xmldocs/llama.abstractions.metadataoverrideconverter.md - llama.abstractions.tensorsplitscollection: ./xmldocs/llama.abstractions.tensorsplitscollection.md - llama.abstractions.tensorsplitscollectionconverter: ./xmldocs/llama.abstractions.tensorsplitscollectionconverter.md - llama.antipromptprocessor: ./xmldocs/llama.antipromptprocessor.md - llama.batched.alreadypromptedconversationexception: ./xmldocs/llama.batched.alreadypromptedconversationexception.md - llama.batched.batchedexecutor: ./xmldocs/llama.batched.batchedexecutor.md - llama.batched.cannotforkwhilerequiresinferenceexception: ./xmldocs/llama.batched.cannotforkwhilerequiresinferenceexception.md - llama.batched.cannotmodifywhilerequiresinferenceexception: ./xmldocs/llama.batched.cannotmodifywhilerequiresinferenceexception.md - llama.batched.cannotsamplerequiresinferenceexception: ./xmldocs/llama.batched.cannotsamplerequiresinferenceexception.md - llama.batched.cannotsamplerequirespromptexception: ./xmldocs/llama.batched.cannotsamplerequirespromptexception.md - llama.batched.conversation: ./xmldocs/llama.batched.conversation.md - llama.batched.conversationextensions: ./xmldocs/llama.batched.conversationextensions.md - llama.batched.experimentalbatchedexecutorexception: ./xmldocs/llama.batched.experimentalbatchedexecutorexception.md - llama.chatsession-1: ./xmldocs/llama.chatsession-1.md - llama.chatsession: ./xmldocs/llama.chatsession.md - llama.common.authorrole: ./xmldocs/llama.common.authorrole.md - llama.common.chathistory: ./xmldocs/llama.common.chathistory.md - llama.common.fixedsizequeue-1: ./xmldocs/llama.common.fixedsizequeue-1.md - llama.common.inferenceparams: ./xmldocs/llama.common.inferenceparams.md - llama.common.mirostattype: ./xmldocs/llama.common.mirostattype.md - llama.common.modelparams: ./xmldocs/llama.common.modelparams.md - llama.exceptions.grammarexpectedname: ./xmldocs/llama.exceptions.grammarexpectedname.md - llama.exceptions.grammarexpectednext: ./xmldocs/llama.exceptions.grammarexpectednext.md - llama.exceptions.grammarexpectedprevious: ./xmldocs/llama.exceptions.grammarexpectedprevious.md - llama.exceptions.grammarformatexception: ./xmldocs/llama.exceptions.grammarformatexception.md - llama.exceptions.grammarunexpectedcharaltelement: ./xmldocs/llama.exceptions.grammarunexpectedcharaltelement.md - llama.exceptions.grammarunexpectedcharrngelement: ./xmldocs/llama.exceptions.grammarunexpectedcharrngelement.md - llama.exceptions.grammarunexpectedendelement: ./xmldocs/llama.exceptions.grammarunexpectedendelement.md - llama.exceptions.grammarunexpectedendofinput: ./xmldocs/llama.exceptions.grammarunexpectedendofinput.md - llama.exceptions.grammarunexpectedhexcharscount: ./xmldocs/llama.exceptions.grammarunexpectedhexcharscount.md - llama.exceptions.grammarunknownescapecharacter: ./xmldocs/llama.exceptions.grammarunknownescapecharacter.md - llama.exceptions.llamadecodeerror: ./xmldocs/llama.exceptions.llamadecodeerror.md - llama.exceptions.loadweightsfailedexception: ./xmldocs/llama.exceptions.loadweightsfailedexception.md - llama.exceptions.runtimeerror: ./xmldocs/llama.exceptions.runtimeerror.md - llama.extensions.icontextparamsextensions: ./xmldocs/llama.extensions.icontextparamsextensions.md - llama.extensions.imodelparamsextensions: ./xmldocs/llama.extensions.imodelparamsextensions.md - llama.grammars.grammar: ./xmldocs/llama.grammars.grammar.md - llama.grammars.grammarrule: ./xmldocs/llama.grammars.grammarrule.md - llama.ichatmodel: ./xmldocs/llama.ichatmodel.md - llama.llamacache: ./xmldocs/llama.llamacache.md - llama.llamaembedder: ./xmldocs/llama.llamaembedder.md - llama.llamamodel: ./xmldocs/llama.llamamodel.md - llama.llamamodelv1: ./xmldocs/llama.llamamodelv1.md - llama.llamaparams: ./xmldocs/llama.llamaparams.md - llama.llamaquantizer: ./xmldocs/llama.llamaquantizer.md - llama.llamastate: ./xmldocs/llama.llamastate.md - llama.llamatransforms: ./xmldocs/llama.llamatransforms.md - llama.llavaweights: ./xmldocs/llama.llavaweights.md - llama.native.decoderesult: ./xmldocs/llama.native.decoderesult.md - llama.native.ggmltype: ./xmldocs/llama.native.ggmltype.md - llama.native.gpusplitmode: ./xmldocs/llama.native.gpusplitmode.md - llama.native.llamabatch: ./xmldocs/llama.native.llamabatch.md - llama.native.llamabeamsstate: ./xmldocs/llama.native.llamabeamsstate.md - llama.native.llamabeamview: ./xmldocs/llama.native.llamabeamview.md - llama.native.llamachatmessage: ./xmldocs/llama.native.llamachatmessage.md - llama.native.llamacontextparams: ./xmldocs/llama.native.llamacontextparams.md - llama.native.llamaftype: ./xmldocs/llama.native.llamaftype.md - llama.native.llamagrammarelement: ./xmldocs/llama.native.llamagrammarelement.md - llama.native.llamagrammarelementtype: ./xmldocs/llama.native.llamagrammarelementtype.md - llama.native.llamakvcacheview: ./xmldocs/llama.native.llamakvcacheview.md - llama.native.llamakvcacheviewcell: ./xmldocs/llama.native.llamakvcacheviewcell.md - llama.native.llamakvcacheviewsafehandle: ./xmldocs/llama.native.llamakvcacheviewsafehandle.md - llama.native.llamaloglevel: ./xmldocs/llama.native.llamaloglevel.md - llama.native.llamamodelkvoverridetype: ./xmldocs/llama.native.llamamodelkvoverridetype.md - llama.native.llamamodelmetadataoverride: ./xmldocs/llama.native.llamamodelmetadataoverride.md - llama.native.llamamodelparams: ./xmldocs/llama.native.llamamodelparams.md - llama.native.llamamodelquantizeparams: ./xmldocs/llama.native.llamamodelquantizeparams.md - llama.native.llamanativebatch: ./xmldocs/llama.native.llamanativebatch.md - llama.native.llamapoolingtype: ./xmldocs/llama.native.llamapoolingtype.md - llama.native.llamapos: ./xmldocs/llama.native.llamapos.md - llama.native.llamaropetype: ./xmldocs/llama.native.llamaropetype.md - llama.native.llamaseqid: ./xmldocs/llama.native.llamaseqid.md - llama.native.llamatoken: ./xmldocs/llama.native.llamatoken.md - llama.native.llamatokendata: ./xmldocs/llama.native.llamatokendata.md - llama.native.llamatokendataarray: ./xmldocs/llama.native.llamatokendataarray.md - llama.native.llamatokendataarraynative: ./xmldocs/llama.native.llamatokendataarraynative.md - llama.native.llamatokentype: ./xmldocs/llama.native.llamatokentype.md - llama.native.llamavocabtype: ./xmldocs/llama.native.llamavocabtype.md - llama.native.llavaimageembed: ./xmldocs/llama.native.llavaimageembed.md - llama.native.nativeapi: ./xmldocs/llama.native.nativeapi.md - llama.native.nativelibraryconfig: ./xmldocs/llama.native.nativelibraryconfig.md - llama.native.ropescalingtype: ./xmldocs/llama.native.ropescalingtype.md - llama.native.safellamacontexthandle: ./xmldocs/llama.native.safellamacontexthandle.md - llama.native.safellamagrammarhandle: ./xmldocs/llama.native.safellamagrammarhandle.md - llama.native.safellamahandlebase: ./xmldocs/llama.native.safellamahandlebase.md - llama.native.safellamamodelhandle: ./xmldocs/llama.native.safellamamodelhandle.md - llama.native.safellavaimageembedhandle: ./xmldocs/llama.native.safellavaimageembedhandle.md - llama.native.safellavamodelhandle: ./xmldocs/llama.native.safellavamodelhandle.md - llama.quantizer: ./xmldocs/llama.quantizer.md - llama.sampling.basesamplingpipeline: ./xmldocs/llama.sampling.basesamplingpipeline.md - llama.sampling.defaultsamplingpipeline: ./xmldocs/llama.sampling.defaultsamplingpipeline.md - llama.sampling.greedysamplingpipeline: ./xmldocs/llama.sampling.greedysamplingpipeline.md - llama.sampling.isamplingpipeline: ./xmldocs/llama.sampling.isamplingpipeline.md - llama.sampling.isamplingpipelineextensions: ./xmldocs/llama.sampling.isamplingpipelineextensions.md - llama.sampling.mirostate2samplingpipeline: ./xmldocs/llama.sampling.mirostate2samplingpipeline.md - llama.sampling.mirostatesamplingpipeline: ./xmldocs/llama.sampling.mirostatesamplingpipeline.md - llama.sessionstate: ./xmldocs/llama.sessionstate.md - llama.streamingtokendecoder: ./xmldocs/llama.streamingtokendecoder.md - llama.types.chatcompletion: ./xmldocs/llama.types.chatcompletion.md - llama.types.chatcompletionchoice: ./xmldocs/llama.types.chatcompletionchoice.md - llama.types.chatcompletionchunk: ./xmldocs/llama.types.chatcompletionchunk.md - llama.types.chatcompletionchunkchoice: ./xmldocs/llama.types.chatcompletionchunkchoice.md - llama.types.chatcompletionchunkdelta: ./xmldocs/llama.types.chatcompletionchunkdelta.md - llama.types.chatcompletionmessage: ./xmldocs/llama.types.chatcompletionmessage.md - llama.types.chatmessagerecord: ./xmldocs/llama.types.chatmessagerecord.md - llama.types.chatrole: ./xmldocs/llama.types.chatrole.md - llama.types.completion: ./xmldocs/llama.types.completion.md - llama.types.completionchoice: ./xmldocs/llama.types.completionchoice.md - llama.types.completionchunk: ./xmldocs/llama.types.completionchunk.md - llama.types.completionlogprobs: ./xmldocs/llama.types.completionlogprobs.md - llama.types.completionusage: ./xmldocs/llama.types.completionusage.md - llama.types.embedding: ./xmldocs/llama.types.embedding.md - llama.types.embeddingdata: ./xmldocs/llama.types.embeddingdata.md - llama.types.embeddingusage: ./xmldocs/llama.types.embeddingusage.md - logger: ./xmldocs/logger.md theme: name: material static_templates: - 404.html language: 'en' palette: # Palette toggle for light mode - media: "(prefers-color-scheme: light)" scheme: default primary: white accent: red toggle: icon: material/weather-sunny name: Switch to dark mode # Palette toggle for dark mode - media: "(prefers-color-scheme: dark)" scheme: slate primary: blue accent: blue toggle: icon: material/weather-night name: Switch to light mode include_search_page: false search_index_only: true favicon: 'media/icon128.png' icon: logo: 'material/file-document' features: - content.action.edit - navigation.instant font: text: 'Fira Sans' code: 'Fira Mono' extra: version: provider: mike extra_css: - 'css/extra.css?v=14' markdown_extensions: - admonition - def_list - footnotes - meta - toc: permalink: "" slugify: !!python/name:pymdownx.slugs.uslugify - pymdownx.arithmatex: generic: true - pymdownx.caret - pymdownx.critic - pymdownx.details - pymdownx.emoji: emoji_generator: !!python/name:pymdownx.emoji.to_svg - pymdownx.highlight: linenums: true - pymdownx.inlinehilite - pymdownx.keys - pymdownx.magiclink - pymdownx.mark - pymdownx.snippets: check_paths: true - pymdownx.progressbar - pymdownx.smartsymbols - pymdownx.superfences: custom_fences: - name: math class: arithmatex format: !!python/name:pymdownx.arithmatex.fence_mathjax_format - pymdownx.tasklist: custom_checkbox: true - pymdownx.tilde - pymdownx.tabbed: alternate_style: true