Small tweaks
This commit is contained in:
parent
31fc374a10
commit
cad29a1772
|
@ -3,9 +3,9 @@ import { groqRequest } from "../modules/groq-llama3";
|
|||
import { gptRequest } from "../modules/openai";
|
||||
|
||||
|
||||
export async function imageDescription(src: Uint8Array, model?: KnownModel): Promise<string> {
|
||||
export async function imageDescription(src: Uint8Array, model: KnownModel = 'moondream:1.8b-v2-fp16'): Promise<string> {
|
||||
return ollamaInference({
|
||||
model: model || 'moondream:1.8b-v2-moondream2-text-model-f16',
|
||||
model: model,
|
||||
messages: [{
|
||||
role: 'system',
|
||||
content: 'You are a very advanced model and your task is to describe the image as precisely as possible. Transcribe any text you see.'
|
||||
|
@ -14,14 +14,14 @@ export async function imageDescription(src: Uint8Array, model?: KnownModel): Pro
|
|||
content: 'Describe the scene',
|
||||
images: [src],
|
||||
}]
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
export async function llamaFind(question: string, images: string): Promise<string> {
|
||||
return groqRequest(
|
||||
`
|
||||
You are a smart AI that need to read through description of a images and answer user's questions.
|
||||
|
||||
You are a smart AI that need to read through description of a images and answer user's questions.
|
||||
|
||||
This are the provided images:
|
||||
${images}
|
||||
|
||||
|
@ -30,7 +30,7 @@ export async function llamaFind(question: string, images: string): Promise<strin
|
|||
ONLY use the information in the description of the images to answer the question.
|
||||
BE concise and specific.
|
||||
`
|
||||
,
|
||||
,
|
||||
question
|
||||
);
|
||||
}
|
||||
|
@ -38,8 +38,8 @@ export async function llamaFind(question: string, images: string): Promise<strin
|
|||
export async function openAIFind(question: string, images: string): Promise<string> {
|
||||
return gptRequest(
|
||||
`
|
||||
You are a smart AI that need to read through description of a images and answer user's questions.
|
||||
|
||||
You are a smart AI that need to read through description of a images and answer user's questions.
|
||||
|
||||
This are the provided images:
|
||||
${images}
|
||||
|
||||
|
@ -48,7 +48,7 @@ export async function openAIFind(question: string, images: string): Promise<stri
|
|||
ONLY use the information in the description of the images to answer the question.
|
||||
BE concise and specific.
|
||||
`
|
||||
,
|
||||
,
|
||||
question
|
||||
);
|
||||
}
|
|
@ -37,5 +37,5 @@ export async function ollamaInference(args: {
|
|||
});
|
||||
return resp.data;
|
||||
});
|
||||
return trimIdent((response.message.content as string));
|
||||
return trimIdent(((response.message?.content ?? '') as string));
|
||||
}
|
|
@ -7,6 +7,11 @@ export async function transcribeAudio(audioPath: string) {
|
|||
try {
|
||||
const response = await axios.post("https://api.openai.com/v1/audio/transcriptions", {
|
||||
audio: audioBase64,
|
||||
}, {
|
||||
headers: {
|
||||
'Authorization': `Bearer ${keys.openai}`, // Replace YOUR_API_KEY with your actual OpenAI API key
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
});
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
|
@ -25,7 +30,7 @@ export async function textToSpeech(text: string) {
|
|||
try {
|
||||
const response = await axios.post("https://api.openai.com/v1/audio/speech", {
|
||||
input: text, // Use 'input' instead of 'text'
|
||||
voice: "nova",
|
||||
voice: "nova",
|
||||
model: "tts-1",
|
||||
}, {
|
||||
headers: {
|
||||
|
@ -63,6 +68,11 @@ export async function describeImage(imagePath: string) {
|
|||
try {
|
||||
const response = await axios.post("https://api.openai.com/v1/images/descriptions", {
|
||||
image: imageBase64,
|
||||
}, {
|
||||
headers: {
|
||||
'Authorization': `Bearer ${keys.openai}`, // Replace YOUR_API_KEY with your actual OpenAI API key
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
});
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
|
@ -79,6 +89,11 @@ export async function gptRequest(systemPrompt: string, userPrompt: string) {
|
|||
{ role: "system", content: systemPrompt },
|
||||
{ role: "user", content: userPrompt },
|
||||
],
|
||||
}, {
|
||||
headers: {
|
||||
'Authorization': `Bearer ${keys.openai}`, // Replace YOUR_API_KEY with your actual OpenAI API key
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
});
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
|
@ -91,8 +106,8 @@ export async function gptRequest(systemPrompt: string, userPrompt: string) {
|
|||
textToSpeech("Hello I am an agent")
|
||||
console.info(gptRequest(
|
||||
`
|
||||
You are a smart AI that need to read through description of a images and answer user's questions.
|
||||
|
||||
You are a smart AI that need to read through description of a images and answer user's questions.
|
||||
|
||||
This are the provided images:
|
||||
The image features a woman standing in an open space with a metal roof, possibly at a train station or another large building.
|
||||
She is wearing a hat and appears to be looking up towards the sky.
|
||||
|
@ -103,7 +118,7 @@ console.info(gptRequest(
|
|||
ONLY use the information in the description of the images to answer the question.
|
||||
BE concise and specific.
|
||||
`
|
||||
,
|
||||
,
|
||||
'where is the person?'
|
||||
|
||||
))
|
Loading…
Reference in New Issue