Add SAM demo (#513)
This commit is contained in:
parent
5b5aa4cf6a
commit
935274fd3f
|
@ -0,0 +1,116 @@
|
|||
* {
|
||||
box-sizing: border-box;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
font-family: sans-serif;
|
||||
}
|
||||
|
||||
html,
|
||||
body {
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
body {
|
||||
padding: 16px 32px;
|
||||
}
|
||||
|
||||
body,
|
||||
#container,
|
||||
#upload-button {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
h1 {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
#container {
|
||||
position: relative;
|
||||
width: 640px;
|
||||
height: 420px;
|
||||
max-width: 100%;
|
||||
max-height: 100%;
|
||||
border: 2px dashed #D1D5DB;
|
||||
border-radius: 0.75rem;
|
||||
overflow: hidden;
|
||||
cursor: pointer;
|
||||
margin-top: 1rem;
|
||||
background-size: 100% 100%;
|
||||
background-position: center;
|
||||
background-repeat: no-repeat;
|
||||
}
|
||||
|
||||
#mask-output {
|
||||
position: absolute;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
pointer-events: none;
|
||||
}
|
||||
|
||||
#upload-button {
|
||||
gap: 0.4rem;
|
||||
font-size: 18px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
#upload {
|
||||
display: none;
|
||||
}
|
||||
|
||||
svg {
|
||||
pointer-events: none;
|
||||
}
|
||||
|
||||
#example {
|
||||
font-size: 14px;
|
||||
text-decoration: underline;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
#example:hover {
|
||||
color: #2563EB;
|
||||
}
|
||||
|
||||
canvas {
|
||||
position: absolute;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
opacity: 0.6;
|
||||
}
|
||||
|
||||
#status {
|
||||
min-height: 16px;
|
||||
margin: 8px 0;
|
||||
}
|
||||
|
||||
.icon {
|
||||
height: 16px;
|
||||
width: 16px;
|
||||
position: absolute;
|
||||
transform: translate(-50%, -50%);
|
||||
}
|
||||
|
||||
#controls>button {
|
||||
padding: 6px 12px;
|
||||
background-color: #3498db;
|
||||
color: white;
|
||||
border: 1px solid #2980b9;
|
||||
border-radius: 5px;
|
||||
cursor: pointer;
|
||||
font-size: 16px;
|
||||
}
|
||||
|
||||
#controls>button:disabled {
|
||||
background-color: #d1d5db;
|
||||
color: #6b7280;
|
||||
border: 1px solid #9ca3af;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
#information {
|
||||
margin-top: 0.25rem;
|
||||
font-size: 15px;
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<link rel="stylesheet" href="index.css" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Transformers.js - Segment Anything</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>Segment Anything w/ 🤗 Transformers.js</h1>
|
||||
<div id="container">
|
||||
<label id="upload-button" for="upload">
|
||||
<svg width="25" height="25" viewBox="0 0 25 25" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path fill="#000"
|
||||
d="M3.5 24.3a3 3 0 0 1-1.9-.8c-.5-.5-.8-1.2-.8-1.9V2.9c0-.7.3-1.3.8-1.9.6-.5 1.2-.7 2-.7h18.6c.7 0 1.3.2 1.9.7.5.6.7 1.2.7 2v18.6c0 .7-.2 1.4-.7 1.9a3 3 0 0 1-2 .8H3.6Zm0-2.7h18.7V2.9H3.5v18.7Zm2.7-2.7h13.3c.3 0 .5 0 .6-.3v-.7l-3.7-5a.6.6 0 0 0-.6-.2c-.2 0-.4 0-.5.3l-3.5 4.6-2.4-3.3a.6.6 0 0 0-.6-.3c-.2 0-.4.1-.5.3l-2.7 3.6c-.1.2-.2.4 0 .7.1.2.3.3.6.3Z">
|
||||
</path>
|
||||
</svg>
|
||||
Click to upload image
|
||||
<label id="example">(or try example)</label>
|
||||
</label>
|
||||
<canvas id="mask-output"></canvas>
|
||||
</div>
|
||||
<label id="status"></label>
|
||||
<div id="controls">
|
||||
<button id="reset-image">Reset image</button>
|
||||
<button id="clear-points">Clear points</button>
|
||||
<button id="cut-mask" disabled>Cut mask</button>
|
||||
</div>
|
||||
<p id="information">
|
||||
Left click = positive points, right click = negative points.
|
||||
</p>
|
||||
<input id="upload" type="file" accept="image/*" />
|
||||
|
||||
<script src="index.js" type="module"></script>
|
||||
</body>
|
||||
|
||||
</html>
|
|
@ -0,0 +1,295 @@
|
|||
|
||||
// Reference the elements we will use
|
||||
const statusLabel = document.getElementById('status');
|
||||
const fileUpload = document.getElementById('upload');
|
||||
const imageContainer = document.getElementById('container');
|
||||
const example = document.getElementById('example');
|
||||
const maskCanvas = document.getElementById('mask-output');
|
||||
const uploadButton = document.getElementById('upload-button');
|
||||
const resetButton = document.getElementById('reset-image');
|
||||
const clearButton = document.getElementById('clear-points');
|
||||
const cutButton = document.getElementById('cut-mask');
|
||||
|
||||
// State variables
|
||||
let lastPoints = null;
|
||||
let isEncoded = false;
|
||||
let isDecoding = false;
|
||||
let isMultiMaskMode = false;
|
||||
let modelReady = false;
|
||||
let imageDataURI = null;
|
||||
|
||||
// Constants
|
||||
const BASE_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/';
|
||||
const EXAMPLE_URL = BASE_URL + 'corgi.jpg';
|
||||
|
||||
// Create a web worker so that the main (UI) thread is not blocked during inference.
|
||||
const worker = new Worker('worker.js', {
|
||||
type: 'module',
|
||||
});
|
||||
|
||||
// Preload star and cross images to avoid lag on first click
|
||||
const star = new Image();
|
||||
star.src = BASE_URL + 'star-icon.png';
|
||||
star.className = 'icon';
|
||||
|
||||
const cross = new Image();
|
||||
cross.src = BASE_URL + 'cross-icon.png';
|
||||
cross.className = 'icon';
|
||||
|
||||
// Set up message handler
|
||||
worker.addEventListener('message', (e) => {
|
||||
const { type, data } = e.data;
|
||||
if (type === 'ready') {
|
||||
modelReady = true;
|
||||
statusLabel.textContent = 'Ready';
|
||||
|
||||
} else if (type === 'decode_result') {
|
||||
isDecoding = false;
|
||||
|
||||
if (!isEncoded) {
|
||||
return; // We are not ready to decode yet
|
||||
}
|
||||
|
||||
if (!isMultiMaskMode && lastPoints) {
|
||||
// Perform decoding with the last point
|
||||
decode();
|
||||
lastPoints = null;
|
||||
}
|
||||
|
||||
const { mask, scores } = data;
|
||||
|
||||
// Update canvas dimensions (if different)
|
||||
if (maskCanvas.width !== mask.width || maskCanvas.height !== mask.height) {
|
||||
maskCanvas.width = mask.width;
|
||||
maskCanvas.height = mask.height;
|
||||
}
|
||||
|
||||
// Create context and allocate buffer for pixel data
|
||||
const context = maskCanvas.getContext('2d');
|
||||
const imageData = context.createImageData(maskCanvas.width, maskCanvas.height);
|
||||
|
||||
// Select best mask
|
||||
const numMasks = scores.length; // 3
|
||||
let bestIndex = 0;
|
||||
for (let i = 1; i < numMasks; ++i) {
|
||||
if (scores[i] > scores[bestIndex]) {
|
||||
bestIndex = i;
|
||||
}
|
||||
}
|
||||
statusLabel.textContent = `Segment score: ${scores[bestIndex].toFixed(2)}`;
|
||||
|
||||
// Fill mask with colour
|
||||
const pixelData = imageData.data;
|
||||
for (let i = 0; i < pixelData.length; ++i) {
|
||||
if (mask.data[numMasks * i + bestIndex] === 1) {
|
||||
const offset = 4 * i;
|
||||
pixelData[offset] = 0; // red
|
||||
pixelData[offset + 1] = 114; // green
|
||||
pixelData[offset + 2] = 189; // blue
|
||||
pixelData[offset + 3] = 255; // alpha
|
||||
}
|
||||
}
|
||||
|
||||
// Draw image data to context
|
||||
context.putImageData(imageData, 0, 0);
|
||||
|
||||
} else if (type === 'segment_result') {
|
||||
if (data === 'start') {
|
||||
statusLabel.textContent = 'Extracting image embedding...';
|
||||
} else {
|
||||
statusLabel.textContent = 'Embedding extracted!';
|
||||
isEncoded = true;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
function decode() {
|
||||
isDecoding = true;
|
||||
worker.postMessage({ type: 'decode', data: lastPoints });
|
||||
}
|
||||
|
||||
function clearPointsAndMask() {
|
||||
// Reset state
|
||||
isMultiMaskMode = false;
|
||||
lastPoints = null;
|
||||
|
||||
// Remove points from previous mask (if any)
|
||||
document.querySelectorAll('.icon').forEach(e => e.remove());
|
||||
|
||||
// Disable cut button
|
||||
cutButton.disabled = true;
|
||||
|
||||
// Reset mask canvas
|
||||
maskCanvas.getContext('2d').clearRect(0, 0, maskCanvas.width, maskCanvas.height);
|
||||
}
|
||||
clearButton.addEventListener('click', clearPointsAndMask);
|
||||
|
||||
resetButton.addEventListener('click', () => {
|
||||
// Update state
|
||||
isEncoded = false;
|
||||
imageDataURI = null;
|
||||
|
||||
// Indicate to worker that we have reset the state
|
||||
worker.postMessage({ type: 'reset' });
|
||||
|
||||
// Clear points and mask (if present)
|
||||
clearPointsAndMask();
|
||||
|
||||
// Update UI
|
||||
cutButton.disabled = true;
|
||||
imageContainer.style.backgroundImage = 'none';
|
||||
uploadButton.style.display = 'flex';
|
||||
statusLabel.textContent = 'Ready';
|
||||
});
|
||||
|
||||
function segment(data) {
|
||||
// Update state
|
||||
isEncoded = false;
|
||||
if (!modelReady) {
|
||||
statusLabel.textContent = 'Loading model...';
|
||||
}
|
||||
imageDataURI = data;
|
||||
|
||||
// Update UI
|
||||
imageContainer.style.backgroundImage = `url(${data})`;
|
||||
uploadButton.style.display = 'none';
|
||||
cutButton.disabled = true;
|
||||
|
||||
// Instruct worker to segment the image
|
||||
worker.postMessage({ type: 'segment', data });
|
||||
}
|
||||
|
||||
// Handle file selection
|
||||
fileUpload.addEventListener('change', function (e) {
|
||||
const file = e.target.files[0];
|
||||
if (!file) {
|
||||
return;
|
||||
}
|
||||
|
||||
const reader = new FileReader();
|
||||
|
||||
// Set up a callback when the file is loaded
|
||||
reader.onload = e2 => segment(e2.target.result);
|
||||
|
||||
reader.readAsDataURL(file);
|
||||
});
|
||||
|
||||
example.addEventListener('click', (e) => {
|
||||
e.preventDefault();
|
||||
segment(EXAMPLE_URL);
|
||||
});
|
||||
|
||||
function addIcon({ point, label }) {
|
||||
const icon = (label === 1 ? star : cross).cloneNode();
|
||||
icon.style.left = `${point[0] * 100}%`;
|
||||
icon.style.top = `${point[1] * 100}%`;
|
||||
imageContainer.appendChild(icon);
|
||||
}
|
||||
|
||||
// Attach hover event to image container
|
||||
imageContainer.addEventListener('mousedown', e => {
|
||||
if (e.button !== 0 && e.button !== 2) {
|
||||
return; // Ignore other buttons
|
||||
}
|
||||
if (!isEncoded) {
|
||||
return; // Ignore if not encoded yet
|
||||
}
|
||||
if (!isMultiMaskMode) {
|
||||
lastPoints = [];
|
||||
isMultiMaskMode = true;
|
||||
cutButton.disabled = false;
|
||||
}
|
||||
|
||||
const point = getPoint(e);
|
||||
lastPoints.push(point);
|
||||
|
||||
// add icon
|
||||
addIcon(point);
|
||||
|
||||
decode();
|
||||
});
|
||||
|
||||
|
||||
// Clamp a value inside a range [min, max]
|
||||
function clamp(x, min = 0, max = 1) {
|
||||
return Math.max(Math.min(x, max), min)
|
||||
}
|
||||
|
||||
function getPoint(e) {
|
||||
// Get bounding box
|
||||
const bb = imageContainer.getBoundingClientRect();
|
||||
|
||||
// Get the mouse coordinates relative to the container
|
||||
const mouseX = clamp((e.clientX - bb.left) / bb.width);
|
||||
const mouseY = clamp((e.clientY - bb.top) / bb.height);
|
||||
|
||||
return {
|
||||
point: [mouseX, mouseY],
|
||||
label: e.button === 2 // right click
|
||||
? 0 // negative prompt
|
||||
: 1, // positive prompt
|
||||
}
|
||||
}
|
||||
|
||||
// Do not show context menu on right click
|
||||
imageContainer.addEventListener('contextmenu', e => {
|
||||
e.preventDefault();
|
||||
});
|
||||
|
||||
// Attach hover event to image container
|
||||
imageContainer.addEventListener('mousemove', e => {
|
||||
if (!isEncoded || isMultiMaskMode) {
|
||||
// Ignore mousemove events if the image is not encoded yet,
|
||||
// or we are in multi-mask mode
|
||||
return;
|
||||
}
|
||||
lastPoints = [getPoint(e)];
|
||||
|
||||
if (!isDecoding) {
|
||||
decode(); // Only decode if we are not already decoding
|
||||
}
|
||||
});
|
||||
|
||||
// Handle cut button click
|
||||
cutButton.addEventListener('click', () => {
|
||||
const [w, h] = [maskCanvas.width, maskCanvas.height];
|
||||
|
||||
// Get the mask pixel data
|
||||
const maskContext = maskCanvas.getContext('2d');
|
||||
const maskPixelData = maskContext.getImageData(0, 0, w, h);
|
||||
|
||||
// Load the image
|
||||
const image = new Image();
|
||||
image.crossOrigin = 'anonymous';
|
||||
image.onload = async () => {
|
||||
// Create a new canvas to hold the image
|
||||
const imageCanvas = new OffscreenCanvas(w, h);
|
||||
const imageContext = imageCanvas.getContext('2d');
|
||||
imageContext.drawImage(image, 0, 0, w, h);
|
||||
const imagePixelData = imageContext.getImageData(0, 0, w, h);
|
||||
|
||||
// Create a new canvas to hold the cut-out
|
||||
const cutCanvas = new OffscreenCanvas(w, h);
|
||||
const cutContext = cutCanvas.getContext('2d');
|
||||
const cutPixelData = cutContext.getImageData(0, 0, w, h);
|
||||
|
||||
// Copy the image pixel data to the cut canvas
|
||||
for (let i = 3; i < maskPixelData.data.length; i += 4) {
|
||||
if (maskPixelData.data[i] > 0) {
|
||||
for (let j = 0; j < 4; ++j) {
|
||||
const offset = i - j;
|
||||
cutPixelData.data[offset] = imagePixelData.data[offset];
|
||||
}
|
||||
}
|
||||
}
|
||||
cutContext.putImageData(cutPixelData, 0, 0);
|
||||
|
||||
// Download image
|
||||
const link = document.createElement('a');
|
||||
link.download = 'image.png';
|
||||
link.href = URL.createObjectURL(await cutCanvas.convertToBlob());
|
||||
link.click();
|
||||
link.remove();
|
||||
}
|
||||
image.src = imageDataURI;
|
||||
});
|
|
@ -0,0 +1,109 @@
|
|||
import { env, SamModel, AutoProcessor, RawImage, Tensor } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.14.0';
|
||||
|
||||
// Since we will download the model from the Hugging Face Hub, we can skip the local model check
|
||||
env.allowLocalModels = false;
|
||||
|
||||
// We adopt the singleton pattern to enable lazy-loading of the model and processor.
|
||||
export class SegmentAnythingSingleton {
|
||||
static model_id = 'Xenova/slimsam-77-uniform';
|
||||
static model;
|
||||
static processor;
|
||||
static quantized = true;
|
||||
|
||||
static getInstance() {
|
||||
if (!this.model) {
|
||||
this.model = SamModel.from_pretrained(this.model_id, {
|
||||
quantized: this.quantized,
|
||||
});
|
||||
}
|
||||
if (!this.processor) {
|
||||
this.processor = AutoProcessor.from_pretrained(this.model_id);
|
||||
}
|
||||
|
||||
return Promise.all([this.model, this.processor]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// State variables
|
||||
let image_embeddings = null;
|
||||
let image_inputs = null;
|
||||
let ready = false;
|
||||
|
||||
self.onmessage = async (e) => {
|
||||
const [model, processor] = await SegmentAnythingSingleton.getInstance();
|
||||
if (!ready) {
|
||||
// Indicate that we are ready to accept requests
|
||||
ready = true;
|
||||
self.postMessage({
|
||||
type: 'ready',
|
||||
});
|
||||
}
|
||||
|
||||
const { type, data } = e.data;
|
||||
if (type === 'reset') {
|
||||
image_inputs = null;
|
||||
image_embeddings = null;
|
||||
|
||||
} else if (type === 'segment') {
|
||||
// Indicate that we are starting to segment the image
|
||||
self.postMessage({
|
||||
type: 'segment_result',
|
||||
data: 'start',
|
||||
});
|
||||
|
||||
// Read the image and recompute image embeddings
|
||||
const image = await RawImage.read(e.data.data);
|
||||
image_inputs = await processor(image);
|
||||
image_embeddings = await model.get_image_embeddings(image_inputs)
|
||||
|
||||
// Indicate that we have computed the image embeddings, and we are ready to accept decoding requests
|
||||
self.postMessage({
|
||||
type: 'segment_result',
|
||||
data: 'done',
|
||||
});
|
||||
|
||||
} else if (type === 'decode') {
|
||||
// Prepare inputs for decoding
|
||||
const reshaped = image_inputs.reshaped_input_sizes[0];
|
||||
const points = data.map(x => [x.point[0] * reshaped[1], x.point[1] * reshaped[0]])
|
||||
const labels = data.map(x => BigInt(x.label));
|
||||
|
||||
const input_points = new Tensor(
|
||||
'float32',
|
||||
points.flat(Infinity),
|
||||
[1, 1, points.length, 2],
|
||||
)
|
||||
const input_labels = new Tensor(
|
||||
'int64',
|
||||
labels.flat(Infinity),
|
||||
[1, 1, labels.length],
|
||||
)
|
||||
|
||||
// Generate the mask
|
||||
const outputs = await model({
|
||||
...image_embeddings,
|
||||
input_points,
|
||||
input_labels,
|
||||
})
|
||||
|
||||
// Post-process the mask
|
||||
const masks = await processor.post_process_masks(
|
||||
outputs.pred_masks,
|
||||
image_inputs.original_sizes,
|
||||
image_inputs.reshaped_input_sizes,
|
||||
);
|
||||
|
||||
// Send the result back to the main thread
|
||||
self.postMessage({
|
||||
type: 'decode_result',
|
||||
data: {
|
||||
mask: RawImage.fromTensor(masks[0][0]),
|
||||
scores: outputs.iou_scores.data,
|
||||
},
|
||||
});
|
||||
|
||||
} else {
|
||||
throw new Error(`Unknown message type: ${type}`);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue