Better queries with indexed data

This commit is contained in:
pancake 2023-11-20 16:49:11 +01:00
parent dce96b3c6e
commit 7e3611a4ab
3 changed files with 16 additions and 9 deletions

View File

@ -10,6 +10,7 @@ try:
except:
from utils import slurp
MAXCHARS = 128
MAXMATCHES = 5
MASTODON_KEY = ""
try:
@ -96,12 +97,16 @@ def md2txt(text):
def filter_line(line):
line = unidecode(line) # remove accents
line = re.sub(r'https?://\S+', '', line)
line = re.sub(r'http?://\S+', '', line)
line = line.replace(":", " ").replace("/", " ").replace("`", " ").replace("?", " ")
line = line.replace("\"", " ").replace("'", " ")
line = line.replace("<", " ").replace(">", " ").replace("@", " ").replace("#", " ")
nline = line.replace("-", " ").replace(".", " ").replace(",", " ").replace("(", " ").replace(")", " ").strip(" ")
line = line.replace("-", " ").replace(".", " ").replace(",", " ").replace("(", " ").replace(")", " ").strip(" ")
if len(line) > MAXCHARS:
line = line[:MAXCHARS]
words = []
for a in nline.split(" "):
for a in line.split(" "):
b = a.strip().lower()
try:
int(b)

View File

@ -91,7 +91,7 @@ def messages_to_prompt(self, messages):
else:
formatted_messages = template_llama(self, messages)
if "DEBUG" in self.env:
if self.env["debug"] == "true":
builtins.print(formatted_messages)
return formatted_messages
@ -534,8 +534,7 @@ class Interpreter:
max_tokens=(self.context_window-self.max_tokens-25),
system_message=system_message)
# DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG
if "DEBUG" in self.env:
if self.env["debug"] == "true":
print(messages)
# Code-Llama

View File

@ -1,9 +1,10 @@
import subprocess
import os
import re
have_whisper = False
model = None
voice_model = "base" # base
voice_model = "large" # base
LANGUAGE = "ca"
DEVICE = None
try:
@ -42,7 +43,7 @@ def stt(seconds):
if device is None:
tts("(r2ai)", "cannot find a microphone")
return
tts("(r2ai) listening for 5s... ", "si?")
tts("(r2ai) listening for 5s... ", "digues?")
print(f"DEVICE IS {device}")
os.system("rm -f .audiomsg.wav")
os.system(f"ffmpeg -f avfoundation -t 5 -i '{device}' .audiomsg.wav > /dev/null 2>&1")
@ -52,7 +53,7 @@ def stt(seconds):
else:
result = model.transcribe(".audiomsg.wav", language=LANGUAGE)
os.system("rm -f .audiomsg.wav")
tts("(r2ai)", "ah")
tts("(r2ai)", "ok")
text = result["text"].strip()
if text == "you":
return ""
@ -60,5 +61,7 @@ def stt(seconds):
return text
def tts(author, text):
clean_text = re.sub(r'https?://\S+', '', text)
clean_text = re.sub(r'http?://\S+', '', clean_text)
print(f"{author}: {text}")
subprocess.run(["say", text])
subprocess.run(["say", clean_text])