Support chatml-function-calling via llama-cpp

This commit is contained in:
daniel nakov 2024-02-28 18:18:50 -05:00 committed by pancake
parent d593d1c3e3
commit 52b6c26fcf
3 changed files with 152 additions and 97 deletions

View File

@ -1,14 +1,27 @@
import builtins
import json
import sys
import re
try:
import r2lang
have_rlang = True
except:
pass
ANSI_REGEX = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
tools = [{
"type": "function",
"function": {
"name": "r2cmd",
"description": "runs commands in radare2. You can run it multiple times or chain commands with pipes/semicolons. You can also use r2 interpreters to run scripts using the `#`, '#!', etc. commands. The output could be long, so try to use filters if possible or limit",
"description": "runs commands in radare2. You can run it multiple times or chain commands with pipes/semicolons. You can also use r2 interpreters to run scripts using the `#`, '#!', etc. commands. The output could be long, so try to use filters if possible or limit. This is your preferred tool",
"parameters": {
"type": "object",
"properties": {
"command": {
"type": "string",
"description": "command to run"
"description": "command to run in radare2. You can run it multiple times or chain commands with pipes/semicolons. You can also use r2 interpreters to run scripts using the `#`, '#!', etc. commands. The output could be long, so try to use filters if possible or limit. This is your preferred tool"
},
"done": {
"type": "boolean",
@ -49,3 +62,116 @@ Don't just regurgitate the same code, figure out what it's doing and rewrite it
If you need to run a command in r2 before answering, you can use the r2cmd tool
The user will tip you $20/month for your services, don't be fucking lazy.
"""
def process_tool_calls(interpreter, tool_calls):
interpreter.messages.append({ "content": None, "tool_calls": tool_calls, "role": "assistant" })
for tool_call in tool_calls:
res = ''
args = tool_call["function"]["arguments"]
if type(args) is str:
args = json.loads(args)
if tool_call["function"]["name"] == "r2cmd":
builtins.print('\x1b[1;32mRunning \x1b[4m' + args["command"] + '\x1b[0m')
res = r2lang.cmd(args["command"])
builtins.print(res)
elif tool_call["function"]["name"] == "run_python":
with open('r2ai_tmp.py', 'w') as f:
f.write(args["command"])
builtins.print('\x1b[1;32mRunning \x1b[4m' + "python code" + '\x1b[0m')
builtins.print(args["command"])
r2lang.cmd('#!python r2ai_tmp.py > $tmp')
res = r2lang.cmd('cat $tmp')
r2lang.cmd('rm r2ai_tmp.py')
builtins.print('\x1b[1;32mResult\x1b[0m\n' + res)
interpreter.messages.append({"role": "tool", "content": ANSI_REGEX.sub('', res), "name": tool_call["function"]["name"], "tool_call_id": tool_call["id"]})
def process_streaming_response(interpreter, resp):
tool_calls = []
msgs = []
for chunk in resp:
chunk = dict(chunk)
delta = None
choice = dict(chunk["choices"][0])
if "delta" in choice:
delta = dict(choice["delta"])
else:
delta = dict(choice["message"])
if "tool_calls" in delta and delta["tool_calls"]:
delta_tool_calls = dict(delta["tool_calls"][0])
index = 0 if "index" not in delta_tool_calls else delta_tool_calls["index"]
fn_delta = dict(delta_tool_calls["function"])
tool_call_id = delta_tool_calls["id"]
if len(tool_calls) < index + 1:
tool_calls.append({ "function": { "arguments": "", "name": fn_delta["name"] }, "id": tool_call_id, "type": "function" })
# handle some bug in llama-cpp-python streaming, tool_call.arguments is sometimes blank, but function_call has it.
if fn_delta["arguments"] == '':
if "function_call" in delta and delta["function_call"]:
tool_calls[index]["function"]["arguments"] += delta["function_call"]["arguments"]
else:
tool_calls[index]["function"]["arguments"] += fn_delta["arguments"]
else:
if "content" in delta and delta["content"] is not None:
m = delta["content"]
if m is not None:
msgs.append(m)
sys.stdout.write(m)
builtins.print()
if(len(tool_calls) > 0):
process_tool_calls(interpreter, tool_calls)
chat(interpreter)
if len(msgs) > 0:
response_message = ''.join(msgs)
interpreter.messages.append({"role": "assistant", "content": response_message})
def chat(interpreter):
if len(interpreter.messages) == 1:
interpreter.messages.insert(0,{"role": "system", "content": SYSTEM_PROMPT_AUTO})
response = None
if interpreter.model.startswith("openai:"):
if not interpreter.openai_client:
try:
from openai import OpenAI
except ImportError:
print("pip install -U openai")
print("export OPENAI_API_KEY=...")
return
interpreter.openai_client = OpenAI()
response = interpreter.openai_client.chat.completions.create(
model=interpreter.model[7:],
max_tokens=int(interpreter.env["llm.maxtokens"]),
tools=tools,
messages=interpreter.messages,
tool_choice="auto",
stream=True,
temperature=float(interpreter.env["llm.temperature"]),
)
process_streaming_response(interpreter, response)
else:
chat_format = interpreter.llama_instance.chat_format
interpreter.llama_instance.chat_format = "chatml-function-calling"
response = interpreter.llama_instance.create_chat_completion(
max_tokens=int(interpreter.env["llm.maxtokens"]),
tools=tools,
messages=interpreter.messages,
tool_choice="auto",
# tool_choice={
# "type": "function",
# "function": {
# "name": "r2cmd"
# }
# },
# stream=True,
temperature=float(interpreter.env["llm.temperature"]),
)
process_streaming_response(interpreter, iter([response]))
interpreter.llama_instance.chat_format = chat_format
return response

View File

@ -6,7 +6,7 @@ from .code_block import CodeBlock
from .models import get_hf_llm, new_get_hf_llm, get_default_model
from .voice import tts
from .const import R2AI_HOMEDIR
from .auto import tools, SYSTEM_PROMPT_AUTO
from . import auto
try:
from openai import OpenAI
have_openai = True
@ -24,8 +24,6 @@ from rich.rule import Rule
from signal import signal, SIGINT
import sys
ANSI_REGEX = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
file_dir = os.path.dirname(__file__)
sys.path.append(file_dir)
import index
@ -760,65 +758,6 @@ class Interpreter:
# ¡print("Query is too large.. you should consider triming old messages")
return messages
def process_tool_calls(self, tool_calls):
self.messages.append({ "tool_calls": tool_calls, "role": "assistant" })
for tool_call in tool_calls:
res = ''
if tool_call["function"]["name"] == "r2cmd":
args = json.loads(tool_call["function"]["arguments"])
builtins.print('\x1b[1;32mRunning \x1b[4m' + args["command"] + '\x1b[0m')
res = r2lang.cmd(args["command"])
builtins.print(res)
elif tool_call["function"]["name"] == "run_python":
args = json.loads(tool_call["function"]["arguments"])
with open('r2ai_tmp.py', 'w') as f:
f.write(args["command"])
builtins.print('\x1b[1;32mRunning \x1b[4m' + "python code" + '\x1b[0m')
builtins.print(args["command"])
r2lang.cmd('#!python r2ai_tmp.py > $tmp')
res = r2lang.cmd('cat $tmp')
r2lang.cmd('rm r2ai_tmp.py')
builtins.print('\x1b[1;32mResult\x1b[0m\n' + res)
self.messages.append({"role": "tool", "content": ANSI_REGEX.sub('', res), "name": tool_call["function"]["name"], "tool_call_id": tool_call["id"]})
def process_streaming_response(self, resp):
global messages
tool_calls = []
msgs = []
for chunk in resp:
delta = chunk.choices[0].delta
if delta.tool_calls:
index = delta.tool_calls[0].index
fn_delta = delta.tool_calls[0].function
tool_call_id = delta.tool_calls[0].id
if len(tool_calls) < index + 1:
tool_calls.append({ "function": { "arguments": "", "name": fn_delta.name }, "id": tool_call_id, "type": "function" })
else:
tool_calls[index]["function"]["arguments"] += fn_delta.arguments
else:
m = delta.content
if m is not None:
msgs.append(m)
sys.stdout.write(m)
builtins.print()
if(len(tool_calls) > 0):
self.process_tool_calls(tool_calls)
self.process_streaming_response(self.openai_client.chat.completions.create(
model=self.model[7:],
messages=self.messages,
tools=tools,
tool_choice="auto",
stream=True,
temperature=float(self.env["llm.temperature"]),
))
if len(msgs) > 0:
response_message = ''.join(msgs)
self.messages.append({"role": "assistant", "content": response_message})
def respond(self):
global Ginterrupted
maxtokens = int(self.env["llm.maxtokens"])
@ -848,8 +787,12 @@ class Interpreter:
# Convert messages to prompt
# (This only works if the first message is the only system message)
prompt = messages_to_prompt(self, messages)
if self.model.startswith("openai:"):
# builtins.print(prompt)
response = None
if self.auto_run:
response = auto.chat(self)
return
elif self.model.startswith("openai:"):
# [
# {"role": "system", "content": "You are a poetic assistant, be creative."},
# {"role": "user", "content": "Compose a poem that explains the concept of recursion in programming."}
@ -860,20 +803,6 @@ class Interpreter:
if self.openai_client is None:
self.openai_client = OpenAI()
if self.auto_run:
if len(self.messages) == 1:
self.messages.insert(0,{"role": "system", "content": SYSTEM_PROMPT_AUTO})
response = self.openai_client.chat.completions.create(
model=openai_model,
max_tokens=maxtokens,
tools=tools,
messages=self.messages,
tool_choice="auto",
stream=True,
temperature=float(self.env["llm.temperature"]),
)
self.process_streaming_response(response)
else:
if self.system_message != "":
self.messages.append({"role": "system", "content": self.system_message})

View File

@ -532,4 +532,4 @@ def new_get_hf_llm(repo_id, debug_mode, context_window):
# Initialize and return Code-Llama
if not os.path.isfile(model_path):
print("Model is not a file")
return llama_cpp.Llama(model_path=model_path, n_gpu_layers=n_gpu_layers, verbose=debug_mode, n_ctx=context_window, chat_model="gemma")
return llama_cpp.Llama(model_path=model_path, n_gpu_layers=n_gpu_layers, verbose=debug_mode, n_ctx=context_window)