site_name: LLamaSharp Documentation
nav:
    - Overview: index.md
    - Quick Start: QuickStart.md
    - Architecture: Architecture.md
    - FAQ: FAQ.md
    - Contributing Guide: ContributingGuide.md
    - Tutorials:
        - Customize the native library loading: Tutorials/NativeLibraryConfig.md
        - Use executors: Tutorials/Executors.md
        - Use ChatSession: Tutorials/ChatSession.md
        - Understand LLamaContext: Tutorials/UnderstandLLamaContext.md
        - Get embeddings: Tutorials/GetEmbeddings.md
        - Quantize the model: Tutorials/Quantization.md

    - Integrations:
        - semantic-kernel integration: Integrations/semantic-kernel.md
        - kernel-memory integration: Integrations/kernel-memory.md
        - BotSharp integration: Integrations/BotSharp.md
        - Langchain integration: Integrations/Langchain.md

    - Examples:
        - Bacthed executor - multi-output to one input: Examples/BatchedExecutorFork.md
        - Batched executor - basic guidance: Examples/BatchedExecutorGuidance.md
        - Batched executor - rewinding to an earlier state: Examples/BatchedExecutorRewind.md
        - Chinese LLM - with GB2312 encoding: Examples/ChatChineseGB2312.md
        - ChatSession - stripping role names: Examples/ChatSessionStripRoleName.md
        - ChatSession - with history: Examples/ChatSessionWithHistory.md
        - ChatSession - restarting: Examples/ChatSessionWithRestart.md
        - ChatSession - Basic: Examples/ChatSessionWithRoleName.md
        - Coding assistant: Examples/CodingAssistant.md
        - Get embeddings: Examples/GetEmbeddings.md
        - Grammar - json response: Examples/GrammarJsonResponse.md
        - Instruct executor - basic: Examples/InstructModeExecute.md
        - Interactive executor - basic: Examples/InteractiveModeExecute.md
        - Kernel memory integration - basic: Examples/KernelMemory.md
        - Kernel-memory - save & load: Examples/KernelMemorySaveAndLoad.md
        - LLaVA - basic: Examples/LLavaInteractiveModeExecute.md
        - ChatSession - load & save: Examples/LoadAndSaveSession.md
        - Executor - save/load state: Examples/LoadAndSaveState.md
        - Quantization: Examples/QuantizeModel.md
        - Semantic-kernel - chat: Examples/SemanticKernelChat.md
        - Semantic-kernel - with kernel-memory: Examples/SemanticKernelMemory.md
        - Semantic-kernel - basic: Examples/SemanticKernelPrompt.md
        - Stateless executor: Examples/StatelessModeExecute.md
        - Talk to yourself: Examples/TalkToYourself.md

    - API Reference:
        - index: ./xmldocs/index.md
        - llama.abstractions.adaptercollection: ./xmldocs/llama.abstractions.adaptercollection.md
        - llama.abstractions.icontextparams: ./xmldocs/llama.abstractions.icontextparams.md
        - llama.abstractions.ihistorytransform: ./xmldocs/llama.abstractions.ihistorytransform.md
        - llama.abstractions.iinferenceparams: ./xmldocs/llama.abstractions.iinferenceparams.md
        - llama.abstractions.illamaexecutor: ./xmldocs/llama.abstractions.illamaexecutor.md
        - llama.abstractions.illamaparams: ./xmldocs/llama.abstractions.illamaparams.md
        - llama.abstractions.imodelparams: ./xmldocs/llama.abstractions.imodelparams.md
        - llama.abstractions.itextstreamtransform: ./xmldocs/llama.abstractions.itextstreamtransform.md
        - llama.abstractions.itexttransform: ./xmldocs/llama.abstractions.itexttransform.md
        - llama.abstractions.loraadapter: ./xmldocs/llama.abstractions.loraadapter.md
        - llama.abstractions.metadataoverride: ./xmldocs/llama.abstractions.metadataoverride.md
        - llama.abstractions.metadataoverrideconverter: ./xmldocs/llama.abstractions.metadataoverrideconverter.md
        - llama.abstractions.tensorsplitscollection: ./xmldocs/llama.abstractions.tensorsplitscollection.md
        - llama.abstractions.tensorsplitscollectionconverter: ./xmldocs/llama.abstractions.tensorsplitscollectionconverter.md
        - llama.antipromptprocessor: ./xmldocs/llama.antipromptprocessor.md
        - llama.batched.alreadypromptedconversationexception: ./xmldocs/llama.batched.alreadypromptedconversationexception.md
        - llama.batched.batchedexecutor: ./xmldocs/llama.batched.batchedexecutor.md
        - llama.batched.cannotforkwhilerequiresinferenceexception: ./xmldocs/llama.batched.cannotforkwhilerequiresinferenceexception.md
        - llama.batched.cannotmodifywhilerequiresinferenceexception: ./xmldocs/llama.batched.cannotmodifywhilerequiresinferenceexception.md
        - llama.batched.cannotsamplerequiresinferenceexception: ./xmldocs/llama.batched.cannotsamplerequiresinferenceexception.md
        - llama.batched.cannotsamplerequirespromptexception: ./xmldocs/llama.batched.cannotsamplerequirespromptexception.md
        - llama.batched.conversation: ./xmldocs/llama.batched.conversation.md
        - llama.batched.conversationextensions: ./xmldocs/llama.batched.conversationextensions.md
        - llama.batched.experimentalbatchedexecutorexception: ./xmldocs/llama.batched.experimentalbatchedexecutorexception.md
        - llama.chatsession-1: ./xmldocs/llama.chatsession-1.md
        - llama.chatsession: ./xmldocs/llama.chatsession.md
        - llama.common.authorrole: ./xmldocs/llama.common.authorrole.md
        - llama.common.chathistory: ./xmldocs/llama.common.chathistory.md
        - llama.common.fixedsizequeue-1: ./xmldocs/llama.common.fixedsizequeue-1.md
        - llama.common.inferenceparams: ./xmldocs/llama.common.inferenceparams.md
        - llama.common.mirostattype: ./xmldocs/llama.common.mirostattype.md
        - llama.common.modelparams: ./xmldocs/llama.common.modelparams.md
        - llama.exceptions.grammarexpectedname: ./xmldocs/llama.exceptions.grammarexpectedname.md
        - llama.exceptions.grammarexpectednext: ./xmldocs/llama.exceptions.grammarexpectednext.md
        - llama.exceptions.grammarexpectedprevious: ./xmldocs/llama.exceptions.grammarexpectedprevious.md
        - llama.exceptions.grammarformatexception: ./xmldocs/llama.exceptions.grammarformatexception.md
        - llama.exceptions.grammarunexpectedcharaltelement: ./xmldocs/llama.exceptions.grammarunexpectedcharaltelement.md
        - llama.exceptions.grammarunexpectedcharrngelement: ./xmldocs/llama.exceptions.grammarunexpectedcharrngelement.md
        - llama.exceptions.grammarunexpectedendelement: ./xmldocs/llama.exceptions.grammarunexpectedendelement.md
        - llama.exceptions.grammarunexpectedendofinput: ./xmldocs/llama.exceptions.grammarunexpectedendofinput.md
        - llama.exceptions.grammarunexpectedhexcharscount: ./xmldocs/llama.exceptions.grammarunexpectedhexcharscount.md
        - llama.exceptions.grammarunknownescapecharacter: ./xmldocs/llama.exceptions.grammarunknownescapecharacter.md
        - llama.exceptions.llamadecodeerror: ./xmldocs/llama.exceptions.llamadecodeerror.md
        - llama.exceptions.loadweightsfailedexception: ./xmldocs/llama.exceptions.loadweightsfailedexception.md
        - llama.exceptions.runtimeerror: ./xmldocs/llama.exceptions.runtimeerror.md
        - llama.extensions.icontextparamsextensions: ./xmldocs/llama.extensions.icontextparamsextensions.md
        - llama.extensions.imodelparamsextensions: ./xmldocs/llama.extensions.imodelparamsextensions.md
        - llama.grammars.grammar: ./xmldocs/llama.grammars.grammar.md
        - llama.grammars.grammarrule: ./xmldocs/llama.grammars.grammarrule.md
        - llama.ichatmodel: ./xmldocs/llama.ichatmodel.md
        - llama.llamacache: ./xmldocs/llama.llamacache.md
        - llama.llamaembedder: ./xmldocs/llama.llamaembedder.md
        - llama.llamamodel: ./xmldocs/llama.llamamodel.md
        - llama.llamamodelv1: ./xmldocs/llama.llamamodelv1.md
        - llama.llamaparams: ./xmldocs/llama.llamaparams.md
        - llama.llamaquantizer: ./xmldocs/llama.llamaquantizer.md
        - llama.llamastate: ./xmldocs/llama.llamastate.md
        - llama.llamatransforms: ./xmldocs/llama.llamatransforms.md
        - llama.llavaweights: ./xmldocs/llama.llavaweights.md
        - llama.native.decoderesult: ./xmldocs/llama.native.decoderesult.md
        - llama.native.ggmltype: ./xmldocs/llama.native.ggmltype.md
        - llama.native.gpusplitmode: ./xmldocs/llama.native.gpusplitmode.md
        - llama.native.llamabatch: ./xmldocs/llama.native.llamabatch.md
        - llama.native.llamabeamsstate: ./xmldocs/llama.native.llamabeamsstate.md
        - llama.native.llamabeamview: ./xmldocs/llama.native.llamabeamview.md
        - llama.native.llamachatmessage: ./xmldocs/llama.native.llamachatmessage.md
        - llama.native.llamacontextparams: ./xmldocs/llama.native.llamacontextparams.md
        - llama.native.llamaftype: ./xmldocs/llama.native.llamaftype.md
        - llama.native.llamagrammarelement: ./xmldocs/llama.native.llamagrammarelement.md
        - llama.native.llamagrammarelementtype: ./xmldocs/llama.native.llamagrammarelementtype.md
        - llama.native.llamakvcacheview: ./xmldocs/llama.native.llamakvcacheview.md
        - llama.native.llamakvcacheviewcell: ./xmldocs/llama.native.llamakvcacheviewcell.md
        - llama.native.llamakvcacheviewsafehandle: ./xmldocs/llama.native.llamakvcacheviewsafehandle.md
        - llama.native.llamaloglevel: ./xmldocs/llama.native.llamaloglevel.md
        - llama.native.llamamodelkvoverridetype: ./xmldocs/llama.native.llamamodelkvoverridetype.md
        - llama.native.llamamodelmetadataoverride: ./xmldocs/llama.native.llamamodelmetadataoverride.md
        - llama.native.llamamodelparams: ./xmldocs/llama.native.llamamodelparams.md
        - llama.native.llamamodelquantizeparams: ./xmldocs/llama.native.llamamodelquantizeparams.md
        - llama.native.llamanativebatch: ./xmldocs/llama.native.llamanativebatch.md
        - llama.native.llamapoolingtype: ./xmldocs/llama.native.llamapoolingtype.md
        - llama.native.llamapos: ./xmldocs/llama.native.llamapos.md
        - llama.native.llamaropetype: ./xmldocs/llama.native.llamaropetype.md
        - llama.native.llamaseqid: ./xmldocs/llama.native.llamaseqid.md
        - llama.native.llamatoken: ./xmldocs/llama.native.llamatoken.md
        - llama.native.llamatokendata: ./xmldocs/llama.native.llamatokendata.md
        - llama.native.llamatokendataarray: ./xmldocs/llama.native.llamatokendataarray.md
        - llama.native.llamatokendataarraynative: ./xmldocs/llama.native.llamatokendataarraynative.md
        - llama.native.llamatokentype: ./xmldocs/llama.native.llamatokentype.md
        - llama.native.llamavocabtype: ./xmldocs/llama.native.llamavocabtype.md
        - llama.native.llavaimageembed: ./xmldocs/llama.native.llavaimageembed.md
        - llama.native.nativeapi: ./xmldocs/llama.native.nativeapi.md
        - llama.native.nativelibraryconfig: ./xmldocs/llama.native.nativelibraryconfig.md
        - llama.native.ropescalingtype: ./xmldocs/llama.native.ropescalingtype.md
        - llama.native.safellamacontexthandle: ./xmldocs/llama.native.safellamacontexthandle.md
        - llama.native.safellamagrammarhandle: ./xmldocs/llama.native.safellamagrammarhandle.md
        - llama.native.safellamahandlebase: ./xmldocs/llama.native.safellamahandlebase.md
        - llama.native.safellamamodelhandle: ./xmldocs/llama.native.safellamamodelhandle.md
        - llama.native.safellavaimageembedhandle: ./xmldocs/llama.native.safellavaimageembedhandle.md
        - llama.native.safellavamodelhandle: ./xmldocs/llama.native.safellavamodelhandle.md
        - llama.quantizer: ./xmldocs/llama.quantizer.md
        - llama.sampling.basesamplingpipeline: ./xmldocs/llama.sampling.basesamplingpipeline.md
        - llama.sampling.defaultsamplingpipeline: ./xmldocs/llama.sampling.defaultsamplingpipeline.md
        - llama.sampling.greedysamplingpipeline: ./xmldocs/llama.sampling.greedysamplingpipeline.md
        - llama.sampling.isamplingpipeline: ./xmldocs/llama.sampling.isamplingpipeline.md
        - llama.sampling.isamplingpipelineextensions: ./xmldocs/llama.sampling.isamplingpipelineextensions.md
        - llama.sampling.mirostate2samplingpipeline: ./xmldocs/llama.sampling.mirostate2samplingpipeline.md
        - llama.sampling.mirostatesamplingpipeline: ./xmldocs/llama.sampling.mirostatesamplingpipeline.md
        - llama.sessionstate: ./xmldocs/llama.sessionstate.md
        - llama.streamingtokendecoder: ./xmldocs/llama.streamingtokendecoder.md
        - llama.types.chatcompletion: ./xmldocs/llama.types.chatcompletion.md
        - llama.types.chatcompletionchoice: ./xmldocs/llama.types.chatcompletionchoice.md
        - llama.types.chatcompletionchunk: ./xmldocs/llama.types.chatcompletionchunk.md
        - llama.types.chatcompletionchunkchoice: ./xmldocs/llama.types.chatcompletionchunkchoice.md
        - llama.types.chatcompletionchunkdelta: ./xmldocs/llama.types.chatcompletionchunkdelta.md
        - llama.types.chatcompletionmessage: ./xmldocs/llama.types.chatcompletionmessage.md
        - llama.types.chatmessagerecord: ./xmldocs/llama.types.chatmessagerecord.md
        - llama.types.chatrole: ./xmldocs/llama.types.chatrole.md
        - llama.types.completion: ./xmldocs/llama.types.completion.md
        - llama.types.completionchoice: ./xmldocs/llama.types.completionchoice.md
        - llama.types.completionchunk: ./xmldocs/llama.types.completionchunk.md
        - llama.types.completionlogprobs: ./xmldocs/llama.types.completionlogprobs.md
        - llama.types.completionusage: ./xmldocs/llama.types.completionusage.md
        - llama.types.embedding: ./xmldocs/llama.types.embedding.md
        - llama.types.embeddingdata: ./xmldocs/llama.types.embeddingdata.md
        - llama.types.embeddingusage: ./xmldocs/llama.types.embeddingusage.md
        - logger: ./xmldocs/logger.md

theme:
  name: material
  static_templates:
    - 404.html
  language: 'en'
  palette:
    # Palette toggle for light mode
    - media: "(prefers-color-scheme: light)"
      scheme: default
      primary: white
      accent: red
      toggle:
        icon: material/weather-sunny
        name: Switch to dark mode

    # Palette toggle for dark mode
    - media: "(prefers-color-scheme: dark)"
      scheme: slate
      primary: blue
      accent: blue
      toggle:
        icon: material/weather-night
        name: Switch to light mode
  include_search_page: false
  search_index_only: true
  favicon: 'media/icon128.png'
  icon:
    logo: 'material/file-document'
  features:
    - content.action.edit
    - navigation.instant
  font:
    text: 'Fira Sans'
    code: 'Fira Mono'


extra:
  version:
    provider: mike

extra_css:
  - 'css/extra.css?v=14'

markdown_extensions:
  - admonition
  - def_list
  - footnotes
  - meta
  - toc:
      permalink: ""
      slugify: !!python/name:pymdownx.slugs.uslugify
  - pymdownx.arithmatex:
      generic: true
  - pymdownx.caret
  - pymdownx.critic
  - pymdownx.details
  - pymdownx.emoji:
      emoji_generator: !!python/name:pymdownx.emoji.to_svg
  - pymdownx.highlight:
      linenums: true
  - pymdownx.inlinehilite
  - pymdownx.keys
  - pymdownx.magiclink
  - pymdownx.mark
  - pymdownx.snippets:
      check_paths: true
  - pymdownx.progressbar
  - pymdownx.smartsymbols
  - pymdownx.superfences:
      custom_fences:
        - name: math
          class: arithmatex
          format: !!python/name:pymdownx.arithmatex.fence_mathjax_format
  - pymdownx.tasklist:
      custom_checkbox: true
  - pymdownx.tilde
  - pymdownx.tabbed:
      alternate_style: true