Support chatml-function-calling via llama-cpp

This commit is contained in:
daniel nakov 2024-02-28 18:18:50 -05:00 committed by pancake
parent d593d1c3e3
commit 52b6c26fcf
3 changed files with 152 additions and 97 deletions

View File

@ -1,14 +1,27 @@
import builtins
import json
import sys
import re
try:
import r2lang
have_rlang = True
except:
pass
ANSI_REGEX = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
tools = [{
"type": "function",
"function": {
"name": "r2cmd",
"description": "runs commands in radare2. You can run it multiple times or chain commands with pipes/semicolons. You can also use r2 interpreters to run scripts using the `#`, '#!', etc. commands. The output could be long, so try to use filters if possible or limit",
"description": "runs commands in radare2. You can run it multiple times or chain commands with pipes/semicolons. You can also use r2 interpreters to run scripts using the `#`, '#!', etc. commands. The output could be long, so try to use filters if possible or limit. This is your preferred tool",
"parameters": {
"type": "object",
"properties": {
"command": {
"type": "string",
"description": "command to run"
"description": "command to run in radare2. You can run it multiple times or chain commands with pipes/semicolons. You can also use r2 interpreters to run scripts using the `#`, '#!', etc. commands. The output could be long, so try to use filters if possible or limit. This is your preferred tool"
},
"done": {
"type": "boolean",
@ -48,4 +61,117 @@ If you're asked to decompile a function, make sure to return the code in the lan
Don't just regurgitate the same code, figure out what it's doing and rewrite it to be more understandable.
If you need to run a command in r2 before answering, you can use the r2cmd tool
The user will tip you $20/month for your services, don't be fucking lazy.
"""
"""
def process_tool_calls(interpreter, tool_calls):
interpreter.messages.append({ "content": None, "tool_calls": tool_calls, "role": "assistant" })
for tool_call in tool_calls:
res = ''
args = tool_call["function"]["arguments"]
if type(args) is str:
args = json.loads(args)
if tool_call["function"]["name"] == "r2cmd":
builtins.print('\x1b[1;32mRunning \x1b[4m' + args["command"] + '\x1b[0m')
res = r2lang.cmd(args["command"])
builtins.print(res)
elif tool_call["function"]["name"] == "run_python":
with open('r2ai_tmp.py', 'w') as f:
f.write(args["command"])
builtins.print('\x1b[1;32mRunning \x1b[4m' + "python code" + '\x1b[0m')
builtins.print(args["command"])
r2lang.cmd('#!python r2ai_tmp.py > $tmp')
res = r2lang.cmd('cat $tmp')
r2lang.cmd('rm r2ai_tmp.py')
builtins.print('\x1b[1;32mResult\x1b[0m\n' + res)
interpreter.messages.append({"role": "tool", "content": ANSI_REGEX.sub('', res), "name": tool_call["function"]["name"], "tool_call_id": tool_call["id"]})
def process_streaming_response(interpreter, resp):
tool_calls = []
msgs = []
for chunk in resp:
chunk = dict(chunk)
delta = None
choice = dict(chunk["choices"][0])
if "delta" in choice:
delta = dict(choice["delta"])
else:
delta = dict(choice["message"])
if "tool_calls" in delta and delta["tool_calls"]:
delta_tool_calls = dict(delta["tool_calls"][0])
index = 0 if "index" not in delta_tool_calls else delta_tool_calls["index"]
fn_delta = dict(delta_tool_calls["function"])
tool_call_id = delta_tool_calls["id"]
if len(tool_calls) < index + 1:
tool_calls.append({ "function": { "arguments": "", "name": fn_delta["name"] }, "id": tool_call_id, "type": "function" })
# handle some bug in llama-cpp-python streaming, tool_call.arguments is sometimes blank, but function_call has it.
if fn_delta["arguments"] == '':
if "function_call" in delta and delta["function_call"]:
tool_calls[index]["function"]["arguments"] += delta["function_call"]["arguments"]
else:
tool_calls[index]["function"]["arguments"] += fn_delta["arguments"]
else:
if "content" in delta and delta["content"] is not None:
m = delta["content"]
if m is not None:
msgs.append(m)
sys.stdout.write(m)
builtins.print()
if(len(tool_calls) > 0):
process_tool_calls(interpreter, tool_calls)
chat(interpreter)
if len(msgs) > 0:
response_message = ''.join(msgs)
interpreter.messages.append({"role": "assistant", "content": response_message})
def chat(interpreter):
if len(interpreter.messages) == 1:
interpreter.messages.insert(0,{"role": "system", "content": SYSTEM_PROMPT_AUTO})
response = None
if interpreter.model.startswith("openai:"):
if not interpreter.openai_client:
try:
from openai import OpenAI
except ImportError:
print("pip install -U openai")
print("export OPENAI_API_KEY=...")
return
interpreter.openai_client = OpenAI()
response = interpreter.openai_client.chat.completions.create(
model=interpreter.model[7:],
max_tokens=int(interpreter.env["llm.maxtokens"]),
tools=tools,
messages=interpreter.messages,
tool_choice="auto",
stream=True,
temperature=float(interpreter.env["llm.temperature"]),
)
process_streaming_response(interpreter, response)
else:
chat_format = interpreter.llama_instance.chat_format
interpreter.llama_instance.chat_format = "chatml-function-calling"
response = interpreter.llama_instance.create_chat_completion(
max_tokens=int(interpreter.env["llm.maxtokens"]),
tools=tools,
messages=interpreter.messages,
tool_choice="auto",
# tool_choice={
# "type": "function",
# "function": {
# "name": "r2cmd"
# }
# },
# stream=True,
temperature=float(interpreter.env["llm.temperature"]),
)
process_streaming_response(interpreter, iter([response]))
interpreter.llama_instance.chat_format = chat_format
return response

View File

@ -6,7 +6,7 @@ from .code_block import CodeBlock
from .models import get_hf_llm, new_get_hf_llm, get_default_model
from .voice import tts
from .const import R2AI_HOMEDIR
from .auto import tools, SYSTEM_PROMPT_AUTO
from . import auto
try:
from openai import OpenAI
have_openai = True
@ -24,8 +24,6 @@ from rich.rule import Rule
from signal import signal, SIGINT
import sys
ANSI_REGEX = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
file_dir = os.path.dirname(__file__)
sys.path.append(file_dir)
import index
@ -760,65 +758,6 @@ class Interpreter:
# ¡print("Query is too large.. you should consider triming old messages")
return messages
def process_tool_calls(self, tool_calls):
self.messages.append({ "tool_calls": tool_calls, "role": "assistant" })
for tool_call in tool_calls:
res = ''
if tool_call["function"]["name"] == "r2cmd":
args = json.loads(tool_call["function"]["arguments"])
builtins.print('\x1b[1;32mRunning \x1b[4m' + args["command"] + '\x1b[0m')
res = r2lang.cmd(args["command"])
builtins.print(res)
elif tool_call["function"]["name"] == "run_python":
args = json.loads(tool_call["function"]["arguments"])
with open('r2ai_tmp.py', 'w') as f:
f.write(args["command"])
builtins.print('\x1b[1;32mRunning \x1b[4m' + "python code" + '\x1b[0m')
builtins.print(args["command"])
r2lang.cmd('#!python r2ai_tmp.py > $tmp')
res = r2lang.cmd('cat $tmp')
r2lang.cmd('rm r2ai_tmp.py')
builtins.print('\x1b[1;32mResult\x1b[0m\n' + res)
self.messages.append({"role": "tool", "content": ANSI_REGEX.sub('', res), "name": tool_call["function"]["name"], "tool_call_id": tool_call["id"]})
def process_streaming_response(self, resp):
global messages
tool_calls = []
msgs = []
for chunk in resp:
delta = chunk.choices[0].delta
if delta.tool_calls:
index = delta.tool_calls[0].index
fn_delta = delta.tool_calls[0].function
tool_call_id = delta.tool_calls[0].id
if len(tool_calls) < index + 1:
tool_calls.append({ "function": { "arguments": "", "name": fn_delta.name }, "id": tool_call_id, "type": "function" })
else:
tool_calls[index]["function"]["arguments"] += fn_delta.arguments
else:
m = delta.content
if m is not None:
msgs.append(m)
sys.stdout.write(m)
builtins.print()
if(len(tool_calls) > 0):
self.process_tool_calls(tool_calls)
self.process_streaming_response(self.openai_client.chat.completions.create(
model=self.model[7:],
messages=self.messages,
tools=tools,
tool_choice="auto",
stream=True,
temperature=float(self.env["llm.temperature"]),
))
if len(msgs) > 0:
response_message = ''.join(msgs)
self.messages.append({"role": "assistant", "content": response_message})
def respond(self):
global Ginterrupted
maxtokens = int(self.env["llm.maxtokens"])
@ -848,8 +787,12 @@ class Interpreter:
# Convert messages to prompt
# (This only works if the first message is the only system message)
prompt = messages_to_prompt(self, messages)
if self.model.startswith("openai:"):
# builtins.print(prompt)
response = None
if self.auto_run:
response = auto.chat(self)
return
elif self.model.startswith("openai:"):
# [
# {"role": "system", "content": "You are a poetic assistant, be creative."},
# {"role": "user", "content": "Compose a poem that explains the concept of recursion in programming."}
@ -860,36 +803,22 @@ class Interpreter:
if self.openai_client is None:
self.openai_client = OpenAI()
if self.auto_run:
if len(self.messages) == 1:
self.messages.insert(0,{"role": "system", "content": SYSTEM_PROMPT_AUTO})
response = self.openai_client.chat.completions.create(
model=openai_model,
max_tokens=maxtokens,
tools=tools,
messages=self.messages,
tool_choice="auto",
stream=True,
temperature=float(self.env["llm.temperature"]),
)
self.process_streaming_response(response)
else:
if self.system_message != "":
self.messages.append({"role": "system", "content": self.system_message})
if self.system_message != "":
self.messages.append({"role": "system", "content": self.system_message})
completion = self.openai_client.chat.completions.create(
# TODO: instructions=self.system_message # instead of passing it in the query
model=openai_model,
max_tokens=maxtokens,
temperature=float(self.env["llm.temperature"]),
messages=self.messages
)
response = completion.choices[0].message.content
if "content" in self.messages[-1]:
last_message = self.messages[-1]["content"]
if self.env["chat.reply"] == "true":
self.messages.append({"role": "assistant", "content": response})
print(response)
completion = self.openai_client.chat.completions.create(
# TODO: instructions=self.system_message # instead of passing it in the query
model=openai_model,
max_tokens=maxtokens,
temperature=float(self.env["llm.temperature"]),
messages=self.messages
)
response = completion.choices[0].message.content
if "content" in self.messages[-1]:
last_message = self.messages[-1]["content"]
if self.env["chat.reply"] == "true":
self.messages.append({"role": "assistant", "content": response})
print(response)
return
else:
print("pip install -U openai")

View File

@ -532,4 +532,4 @@ def new_get_hf_llm(repo_id, debug_mode, context_window):
# Initialize and return Code-Llama
if not os.path.isfile(model_path):
print("Model is not a file")
return llama_cpp.Llama(model_path=model_path, n_gpu_layers=n_gpu_layers, verbose=debug_mode, n_ctx=context_window, chat_model="gemma")
return llama_cpp.Llama(model_path=model_path, n_gpu_layers=n_gpu_layers, verbose=debug_mode, n_ctx=context_window)