Small tweaks

This commit is contained in:
Franci Penov 2024-06-05 19:31:26 -07:00
parent 31fc374a10
commit cad29a1772
3 changed files with 29 additions and 14 deletions

View File

@ -3,9 +3,9 @@ import { groqRequest } from "../modules/groq-llama3";
import { gptRequest } from "../modules/openai";
export async function imageDescription(src: Uint8Array, model?: KnownModel): Promise<string> {
export async function imageDescription(src: Uint8Array, model: KnownModel = 'moondream:1.8b-v2-fp16'): Promise<string> {
return ollamaInference({
model: model || 'moondream:1.8b-v2-moondream2-text-model-f16',
model: model,
messages: [{
role: 'system',
content: 'You are a very advanced model and your task is to describe the image as precisely as possible. Transcribe any text you see.'
@ -14,14 +14,14 @@ export async function imageDescription(src: Uint8Array, model?: KnownModel): Pro
content: 'Describe the scene',
images: [src],
}]
});
});
}
export async function llamaFind(question: string, images: string): Promise<string> {
return groqRequest(
`
You are a smart AI that need to read through description of a images and answer user's questions.
You are a smart AI that need to read through description of a images and answer user's questions.
This are the provided images:
${images}
@ -30,7 +30,7 @@ export async function llamaFind(question: string, images: string): Promise<strin
ONLY use the information in the description of the images to answer the question.
BE concise and specific.
`
,
,
question
);
}
@ -38,8 +38,8 @@ export async function llamaFind(question: string, images: string): Promise<strin
export async function openAIFind(question: string, images: string): Promise<string> {
return gptRequest(
`
You are a smart AI that need to read through description of a images and answer user's questions.
You are a smart AI that need to read through description of a images and answer user's questions.
This are the provided images:
${images}
@ -48,7 +48,7 @@ export async function openAIFind(question: string, images: string): Promise<stri
ONLY use the information in the description of the images to answer the question.
BE concise and specific.
`
,
,
question
);
}

View File

@ -37,5 +37,5 @@ export async function ollamaInference(args: {
});
return resp.data;
});
return trimIdent((response.message.content as string));
return trimIdent(((response.message?.content ?? '') as string));
}

View File

@ -7,6 +7,11 @@ export async function transcribeAudio(audioPath: string) {
try {
const response = await axios.post("https://api.openai.com/v1/audio/transcriptions", {
audio: audioBase64,
}, {
headers: {
'Authorization': `Bearer ${keys.openai}`, // Replace YOUR_API_KEY with your actual OpenAI API key
'Content-Type': 'application/json'
},
});
return response.data;
} catch (error) {
@ -25,7 +30,7 @@ export async function textToSpeech(text: string) {
try {
const response = await axios.post("https://api.openai.com/v1/audio/speech", {
input: text, // Use 'input' instead of 'text'
voice: "nova",
voice: "nova",
model: "tts-1",
}, {
headers: {
@ -63,6 +68,11 @@ export async function describeImage(imagePath: string) {
try {
const response = await axios.post("https://api.openai.com/v1/images/descriptions", {
image: imageBase64,
}, {
headers: {
'Authorization': `Bearer ${keys.openai}`, // Replace YOUR_API_KEY with your actual OpenAI API key
'Content-Type': 'application/json'
},
});
return response.data;
} catch (error) {
@ -79,6 +89,11 @@ export async function gptRequest(systemPrompt: string, userPrompt: string) {
{ role: "system", content: systemPrompt },
{ role: "user", content: userPrompt },
],
}, {
headers: {
'Authorization': `Bearer ${keys.openai}`, // Replace YOUR_API_KEY with your actual OpenAI API key
'Content-Type': 'application/json'
},
});
return response.data;
} catch (error) {
@ -91,8 +106,8 @@ export async function gptRequest(systemPrompt: string, userPrompt: string) {
textToSpeech("Hello I am an agent")
console.info(gptRequest(
`
You are a smart AI that need to read through description of a images and answer user's questions.
You are a smart AI that need to read through description of a images and answer user's questions.
This are the provided images:
The image features a woman standing in an open space with a metal roof, possibly at a train station or another large building.
She is wearing a hat and appears to be looking up towards the sky.
@ -103,7 +118,7 @@ console.info(gptRequest(
ONLY use the information in the description of the images to answer the question.
BE concise and specific.
`
,
,
'where is the person?'
))