Compare commits
1 Commits
main
...
siglip-sem
Author | SHA1 | Date |
---|---|---|
Joshua Lochner | 604cc4ad18 |
|
@ -8,7 +8,7 @@
|
||||||
"name": "semantic-image-search-client",
|
"name": "semantic-image-search-client",
|
||||||
"version": "0.1.0",
|
"version": "0.1.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@xenova/transformers": "^2.6.1",
|
"@xenova/transformers": "^2.14.0",
|
||||||
"autoprefixer": "10.4.14",
|
"autoprefixer": "10.4.14",
|
||||||
"blurhash": "^2.0.5",
|
"blurhash": "^2.0.5",
|
||||||
"eslint": "8.45.0",
|
"eslint": "8.45.0",
|
||||||
|
@ -102,6 +102,14 @@
|
||||||
"node": "^12.22.0 || ^14.17.0 || >=16.0.0"
|
"node": "^12.22.0 || ^14.17.0 || >=16.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@huggingface/jinja": {
|
||||||
|
"version": "0.1.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.1.2.tgz",
|
||||||
|
"integrity": "sha512-x5mpbfJt1nKmVep5WNP5VjNsjWApWNj8pPYI+uYMkBWH9bWUJmQmHt2lbf0VCoQd54Oq3XuFEh/UyoVh7rPxmg==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@humanwhocodes/config-array": {
|
"node_modules/@humanwhocodes/config-array": {
|
||||||
"version": "0.11.10",
|
"version": "0.11.10",
|
||||||
"resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.11.10.tgz",
|
"resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.11.10.tgz",
|
||||||
|
@ -553,10 +561,11 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@xenova/transformers": {
|
"node_modules/@xenova/transformers": {
|
||||||
"version": "2.6.1",
|
"version": "2.14.0",
|
||||||
"resolved": "https://registry.npmjs.org/@xenova/transformers/-/transformers-2.6.1.tgz",
|
"resolved": "https://registry.npmjs.org/@xenova/transformers/-/transformers-2.14.0.tgz",
|
||||||
"integrity": "sha512-fK1SkZUCvTdH1gEWmBUU5rvugZBqqu0ibkaBmUIr5t9Kf+Z8W4n0IszSRS2+M5ZHxRKS3SE7pFpsMDXByIzmQw==",
|
"integrity": "sha512-rQ3O7SW5EM64b6XFZGx3XQ2cfiroefxUwU9ShfSpEZyhd082GvwNJJKndxgaukse1hZP1JUDoT0DfjDiq4IZiw==",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"@huggingface/jinja": "^0.1.0",
|
||||||
"onnxruntime-web": "1.14.0",
|
"onnxruntime-web": "1.14.0",
|
||||||
"sharp": "^0.32.0"
|
"sharp": "^0.32.0"
|
||||||
},
|
},
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
"lint": "next lint"
|
"lint": "next lint"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@xenova/transformers": "^2.6.1",
|
"@xenova/transformers": "^2.14.0",
|
||||||
"autoprefixer": "10.4.14",
|
"autoprefixer": "10.4.14",
|
||||||
"blurhash": "^2.0.5",
|
"blurhash": "^2.0.5",
|
||||||
"eslint": "8.45.0",
|
"eslint": "8.45.0",
|
||||||
|
|
|
@ -2,14 +2,14 @@
|
||||||
import { env, AutoTokenizer, CLIPTextModelWithProjection } from '@xenova/transformers';
|
import { env, AutoTokenizer, CLIPTextModelWithProjection } from '@xenova/transformers';
|
||||||
import { getCachedFile, getCachedJSON } from './utils.js';
|
import { getCachedFile, getCachedJSON } from './utils.js';
|
||||||
|
|
||||||
const EMBED_DIM = 512;
|
const EMBED_DIM = 768;
|
||||||
|
const DB_SIZE = 10; // or 25
|
||||||
// Skip local model check
|
// Skip local model check
|
||||||
env.allowLocalModels = false;
|
env.allowLocalModels = false;
|
||||||
|
|
||||||
class ApplicationSingleton {
|
class ApplicationSingleton {
|
||||||
static model_id = 'Xenova/clip-vit-base-patch16';
|
static model_id = 'Xenova/siglip-base-patch16-224';
|
||||||
static BASE_URL = 'https://huggingface.co/datasets/Xenova/semantic-image-search-assets/resolve/main/';
|
static BASE_URL = 'https://huggingface.co/datasets/Xenova/siglip-semantic-image-search-assets/resolve/main/';
|
||||||
|
|
||||||
static tokenizer = null;
|
static tokenizer = null;
|
||||||
static text_model = null;
|
static text_model = null;
|
||||||
|
@ -25,12 +25,12 @@ class ApplicationSingleton {
|
||||||
this.text_model = CLIPTextModelWithProjection.from_pretrained(this.model_id, { progress_callback });
|
this.text_model = CLIPTextModelWithProjection.from_pretrained(this.model_id, { progress_callback });
|
||||||
}
|
}
|
||||||
if (this.metadata === null) {
|
if (this.metadata === null) {
|
||||||
this.metadata = getCachedJSON(this.BASE_URL + 'image-embeddings.json');
|
this.metadata = getCachedJSON(`${this.BASE_URL}metadata_${DB_SIZE}k.json`);
|
||||||
}
|
}
|
||||||
if (this.embeddings === null) {
|
if (this.embeddings === null) {
|
||||||
this.embeddings = new Promise(
|
this.embeddings = new Promise(
|
||||||
(resolve, reject) => {
|
(resolve, reject) => {
|
||||||
getCachedFile(this.BASE_URL + 'image-embeddings_25k-512-32bit.bin')
|
getCachedFile(`${this.BASE_URL}image-embeddings_${DB_SIZE}k-768-32bit.bin`)
|
||||||
.then((buffer) => {
|
.then((buffer) => {
|
||||||
resolve(new Float32Array(buffer));
|
resolve(new Float32Array(buffer));
|
||||||
})
|
})
|
||||||
|
@ -80,10 +80,10 @@ self.addEventListener('message', async (event) => {
|
||||||
self.postMessage({ status: 'ready' });
|
self.postMessage({ status: 'ready' });
|
||||||
|
|
||||||
// Run tokenization
|
// Run tokenization
|
||||||
const text_inputs = tokenizer(event.data.text, { padding: true, truncation: true });
|
const text_inputs = tokenizer(event.data.text, { padding: 'max_length', truncation: true });
|
||||||
|
|
||||||
// Compute embeddings
|
// Compute embeddings
|
||||||
const { text_embeds } = await text_model(text_inputs);
|
const { pooler_output: text_embeds } = await text_model(text_inputs);
|
||||||
|
|
||||||
// Compute similarity scores
|
// Compute similarity scores
|
||||||
const scores = cosineSimilarity(text_embeds.data, embeddings);
|
const scores = cosineSimilarity(text_embeds.data, embeddings);
|
||||||
|
|
Loading…
Reference in New Issue