diff --git a/sources/agent/imageDescription.ts b/sources/agent/imageDescription.ts index d13387e..1dd8837 100644 --- a/sources/agent/imageDescription.ts +++ b/sources/agent/imageDescription.ts @@ -1,5 +1,6 @@ import { KnownModel, ollamaInference } from "../modules/ollama"; import { groqRequest } from "../modules/groq-llama3"; +import { gptRequest } from "../modules/openai"; export async function imageDescription(src: Uint8Array, model?: KnownModel): Promise { @@ -32,4 +33,22 @@ export async function llamaFind(question: string, images: string): Promise { + return gptRequest( + ` + You are a smart AI that need to read through description of a images and answer user's questions. + + This are the provided images: + ${images} + + DO NOT mention the images, scenes or descriptions in your answer, just answer the question. + DO NOT try to generalize or provide possible scenarios. + ONLY use the information in the description of the images to answer the question. + BE concise and specific. + ` + , + question + ); } \ No newline at end of file diff --git a/sources/modules/openai.ts b/sources/modules/openai.ts index d41482b..42debf4 100644 --- a/sources/modules/openai.ts +++ b/sources/modules/openai.ts @@ -71,13 +71,13 @@ export async function describeImage(imagePath: string) { } } -export async function gptRequest(prompt: string) { +export async function gptRequest(systemPrompt: string, userPrompt: string) { try { const response = await axios.post("https://api.openai.com/v1/chat/completions", { - model: "gpt-4", + model: "gpt-4o", messages: [ - { role: "system", content: "You are a helpful assistant." }, - { role: "user", content: prompt }, + { role: "system", content: systemPrompt }, + { role: "user", content: userPrompt }, ], }); return response.data; @@ -88,4 +88,22 @@ export async function gptRequest(prompt: string) { } -textToSpeech("Hello I am an agent") \ No newline at end of file +textToSpeech("Hello I am an agent") +console.info(gptRequest( + ` + You are a smart AI that need to read through description of a images and answer user's questions. + + This are the provided images: + The image features a woman standing in an open space with a metal roof, possibly at a train station or another large building. + She is wearing a hat and appears to be looking up towards the sky. + The scene captures her attention as she gazes upwards, perhaps admiring something above her or simply enjoying the view from this elevated position. + + DO NOT mention the images, scenes or descriptions in your answer, just answer the question. + DO NOT try to generalize or provide possible scenarios. + ONLY use the information in the description of the images to answer the question. + BE concise and specific. + ` + , + 'where is the person?' + +)) \ No newline at end of file