diff --git a/examples/funcall.py b/examples/funcall.py index 6f99e78..c82b5b8 100755 --- a/examples/funcall.py +++ b/examples/funcall.py @@ -94,6 +94,7 @@ leprompt += r''' #model_name = "llama-2-7b-chat-codeCherryPop.Q5_K_M.gguf" model_name = "mistral-7b-instruct-v0.1.Q2_K.gguf" +# model_name = "dolphin-2_6-phi-2.Q5_K_M.gguf" # model_name = "codellama-7b-instruct.Q4_K_M.gguf" # model_name = "codellama-34b-instruct.Q4_K_M.gguf" # model_name = "Wizard-Vicuna-7B-Uncensored.Q2_K.gguf" diff --git a/main.py b/main.py index f141ba6..719805c 100755 --- a/main.py +++ b/main.py @@ -383,16 +383,16 @@ if have_r2pipe: if have_rlang: def r2ai_rlang_plugin(unused_but_required_argument): def _call(s): - if s == "r2ai": - print(help_message) - return True - elif s.startswith("r2ai"): - usertext = s[4:].strip() - try: - runline(usertext) - except Exception as e: - print(e) - traceback.print_exc() + if s.startswith("r2ai"): + if len(s) == 4: + builtins.print(help_message) + else: + usertext = s[4:].strip() + try: + runline(usertext) + except Exception as e: + builtins.print(e) + traceback.print_exc() return True return False @@ -423,4 +423,4 @@ elif have_r2pipe: r2ai_repl() else: print("r2ai plugin not initialized, you need to install rlang-python") -sys.stderr.close() +# sys.stderr.close() diff --git a/r2ai/interpreter.py b/r2ai/interpreter.py index 8941993..d220992 100644 --- a/r2ai/interpreter.py +++ b/r2ai/interpreter.py @@ -562,7 +562,7 @@ class Interpreter: debug_mode = False # maybe true when debuglevel=2 ? self.llama_instance = new_get_hf_llm(self.model, debug_mode, ctxwindow) if self.llama_instance == None: - print("Cannot find the model") + builtins.print("Cannot find the model") return except: traceback.print_exc() diff --git a/r2ai/models.py b/r2ai/models.py index 81d2fd9..327e9a9 100644 --- a/r2ai/models.py +++ b/r2ai/models.py @@ -95,7 +95,7 @@ def get_hf_llm(repo_id, debug_mode, context_window): repo_id = get_default_model() if usermodels is not None and repo_id in usermodels: model_path = usermodels[repo_id] -# print("[r2ai] Using " + r2ai_model_json+": " + model_path) +# print(f"[r2ai] Using {r2ai_model_json} {model_path}") return llama_cpp.Llama(model_path=model_path, n_gpu_layers=n_gpu_layers, verbose=debug_mode, n_ctx=context_window) except: traceback.print_exc() @@ -278,7 +278,6 @@ def get_hf_llm(repo_id, debug_mode, context_window): else: install_llama("OpenBLAS") - from llama_cpp import Llama print('', Markdown("Finished downloading `Code-Llama` interface."), '') # Check if on macOS @@ -305,9 +304,7 @@ def get_hf_llm(repo_id, debug_mode, context_window): json.dump(usermodels, fd) fd.close() print("Saved") - llama_2 = Llama(model_path=model_path, n_gpu_layers=n_gpu_layers, verbose=debug_mode, n_ctx=context_window) -# print("[r2ai] Using model: " + model_path) - return llama_2 + return llama_cpp.Llama(model_path=model_path, n_gpu_layers=n_gpu_layers, verbose=debug_mode, n_ctx=context_window) def set_default_model(repo_id): usermodels = {"default": repo_id} @@ -438,24 +435,14 @@ def new_get_hf_llm(repo_id, debug_mode, context_window): return repo_id if not os.path.exists(repo_id): return get_hf_llm(repo_id, debug_mode, context_window) - # print("LOADING FILE: " + repo_id) + # print(f"LOADING FILE: {repo_id}") n_gpu_layers = -1 # = 0 to use cpu -# n_gpu_layers = 0 - # Third stage: GPU confirm -#if confirm_action("Use GPU? (Large models might crash on GPU, but will run more quickly)"): -## n_gpu_layers = -1 -# else: -# n_gpu_layers = 0 - - # Get user data directory user_data_dir = appdirs.user_data_dir("Open Interpreter") default_path = os.path.join(user_data_dir, "models") # Ensure the directory exists os.makedirs(default_path, exist_ok=True) model_path = repo_id - # This is helpful for folks looking to delete corrupted ones and such -#print(Markdown(f"Model found at `{model_path}`")) try: from llama_cpp import Llama @@ -517,7 +504,6 @@ def new_get_hf_llm(repo_id, debug_mode, context_window): else: install_llama("OpenBLAS") - from llama_cpp import Llama print('', Markdown("Finished downloading `Code-Llama` interface."), '') # Tell them if their architecture won't work well @@ -542,4 +528,4 @@ def new_get_hf_llm(repo_id, debug_mode, context_window): # Initialize and return Code-Llama if not os.path.isfile(model_path): print("Model is not a file") - return Llama(model_path=model_path, n_gpu_layers=n_gpu_layers, verbose=debug_mode, n_ctx=context_window) + return llama_cpp.Llama(model_path=model_path, n_gpu_layers=n_gpu_layers, verbose=debug_mode, n_ctx=context_window)