2022-11-21 21:20:42 +01:00
<!doctype html>
< html lang = "en-us" >
< head >
< title > Talk - GPT-2 meets Whisper in WebAssembly< / title >
< style >
#output {
width: 100%;
height: 100%;
margin: 0 auto;
margin-top: 10px;
border-left: 0px;
border-right: 0px;
padding-left: 0px;
padding-right: 0px;
display: block;
background-color: black;
color: white;
font-size: 10px;
font-family: 'Lucida Console', Monaco, monospace;
outline: none;
white-space: pre;
overflow-wrap: normal;
overflow-x: scroll;
}
< / style >
< / head >
< body >
< div id = "main-container" >
< b > Talk - GPT-2 meets Whisper in WebAssembly< / b >
< br > < br >
On this page you can talk with an AI entity. It uses:
< ul >
< li > < a href = "https://github.com/ggerganov/whisper.cpp" > OpenAI's Whisper< / a > model to listen to you as you speak in the microphone< / li >
< li > < a href = "https://github.com/ggerganov/ggml/tree/master/examples/gpt-2" > OpenAI's GPT-2< / a > model to generate a text response< / li >
< li > < a href = "https://developer.mozilla.org/en-US/docs/Web/API/Web_Speech_API" > Web Speech API< / a > to speak the response to you through the speakers< / li >
< / ul >
All of this runs < b > locally in your browser< / b > using WebAssembly.< br >
You can find more about this project on < a href = "https://github.com/ggerganov/whisper.cpp/tree/master/examples/talk.wasm" > GitHub< / a > .
< br > < br >
< hr >
2022-11-21 21:42:29 +01:00
Select the models you would like to use and click the "Start" button to begin the conversation
< br > < br >
2022-11-21 21:20:42 +01:00
< div id = "model-whisper" >
< span id = "model-whisper-status" > Whisper model:< / span >
< button id = "fetch-whisper-tiny-en" onclick = "loadWhisper('tiny.en')" > tiny.en (75 MB)< / button >
< button id = "fetch-whisper-base-en" onclick = "loadWhisper('base.en')" > base.en (142 MB)< / button >
< span id = "fetch-whisper-progress" > < / span >
<!--
< input type = "file" id = "file" name = "file" onchange = "loadFile(event, 'whisper.bin')" / >
-->
< / div >
< br >
< div id = "model-gpt-2" >
< span id = "model-gpt-2-status" > GPT-2 model:< / span >
< button id = "fetch-gpt-2-small" onclick = "loadGPT2('small')" > small 117M (240 MB)< / button >
<!-- <button id="fetch - gpt - 2 - medium" onclick="loadGPT2('medium')">medium 345M (720 MB)</button> -->
< span id = "fetch-gpt-2-progress" > < / span >
<!--
< input type = "file" id = "file" name = "file" onchange = "loadFile(event, 'gpt-2.bin')" / >
-->
< / div >
< br >
< div id = "input" >
< button id = "start" onclick = "onStart()" > Start< / button >
< button id = "stop" onclick = "onStop()" disabled > Stop< / button >
< select id = "voice" onchange = "onVoiceChange()" >
< option value = "0" > Default< / option >
< / select >
< button id = "speak" onclick = "onSpeak('Hello')" > Say hello< / button >
< button id = "speak" onclick = "onSpeakRandom()" > Say something< / button >
< button id = "speak" onclick = "clearCache()" > Clear Cache< / button >
< / div >
< br >
< div id = "state" >
Status: < b > < span id = "state-status" > idle< / span > < / b >
< pre id = "state-context" > [The text context will be displayed here]< / pre >
< / div >
< hr >
Debug output:
< textarea id = "output" rows = "20" > < / textarea >
< br >
< b > Troubleshooting< / b >
< br > < br >
The page does some heavy computations, so make sure:
< ul >
< li > To use a modern web browser (e.g. Chrome, Firefox)< / li >
< li > To use a fast desktop or laptop computer (e.g. not a mobile phone)< / li >
< li > Your browser supports WASM < a href = "https://webassembly.org/roadmap/" > Fixed-width SIMD< / a > < / li >
< / ul >
< br > < br >
< div class = "cell-version" >
< span >
|
Build time: < span class = "nav-link" > @GIT_DATE@< / span > |
Commit hash: < a class = "nav-link" href = "https://github.com/ggerganov/whisper.cpp/commit/@GIT_SHA1@" > @GIT_SHA1@< / a > |
Commit subject: < span class = "nav-link" > @GIT_COMMIT_SUBJECT@< / span > |
< a class = "nav-link" href = "https://github.com/ggerganov/whisper.cpp/tree/master/examples/talk.wasm" > Source Code< / a > |
< / span >
< / div >
< / div >
< script type = 'text/javascript' >
var printTextarea = (function() {
var element = document.getElementById('output');
if (element) element.alue = ''; // clear browser cache
return function(text) {
if (arguments.length > 1) text = Array.prototype.slice.call(arguments).join(' ');
console.log(text);
if (element) {
element.value += text + "\n";
element.scrollTop = element.scrollHeight; // focus on bottom
}
};
})();
const kRestartRecording_s = 15;
const kSampleRate = 16000;
window.AudioContext = window.AudioContext || window.webkitAudioContext;
window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
// web audio context
var context = null;
// audio data
var audio = null;
var audio0 = null;
// the talk instance
var instance = null;
// model names
var model_whisper = null;
var model_gpt_2 = null;
// speech synthesis
const synth = window.speechSynthesis;
var voice = null;
var Module = {
print: printTextarea,
printErr: printTextarea,
setStatus: function(text) {
printTextarea('js: ' + text);
},
monitorRunDependencies: function(left) {
},
preRun: function() {
printTextarea('js: Preparing ...');
},
postRun: function() {
printTextarea('js: Initialized successfully!');
// populate the voice list
var voices = synth.getVoices();
var el = document.getElementById('voice');
var n = 0;
voices.forEach(function(voice, i) {
if (!voice.lang.startsWith('en')) return;
var option = document.createElement('option');
option.value = i;
option.innerHTML = voice.name + ' (' + voice.lang + ')';
el.appendChild(option);
n++;
});
// select random voice
if (n > 0) {
for (var k = 0; k < 10 ; k + + ) {
var i = Math.floor(Math.random() * n);
el.selectedIndex = i;
voice = voices[document.getElementById('voice').options[i].value];
// give preference to Google voices
if (voice.name.startsWith('Google')) break;
}
}
}
};
// helper function
function convertTypedArray(src, type) {
var buffer = new ArrayBuffer(src.byteLength);
var baseView = new src.constructor(buffer).set(src);
return new type(buffer);
}
//
// fetch models
//
function storeFS(fname, buf) {
// write to WASM file using FS_createDataFile
// if the file exists, delete it
try {
Module.FS_unlink(fname);
} catch (e) {
// ignore
}
Module.FS_createDataFile("/", fname, buf, true, true);
printTextarea('js: stored model: ' + fname + ' size: ' + buf.length);
if (fname == 'whisper.bin') {
document.getElementById('model-whisper').innerHTML = 'Whisper model: loaded "' + model_whisper + '"!';
} else if (fname == 'gpt-2.bin') {
document.getElementById('model-gpt-2').innerHTML = 'GPT-2 model: loaded "' + model_gpt_2 + '"!';
}
}
let dbVersion = 1
let dbName = 'talk.ggerganov.com';
let indexedDB = window.indexedDB || window.mozIndexedDB || window.webkitIndexedDB || window.msIndexedDB
// fetch a remote file from remote URL using the Fetch API
async function fetchRemote(url, elProgress) {
printTextarea('js: downloading with fetch()...');
const response = await fetch(
url,
{
method: 'GET',
headers: {
'Content-Type': 'application/octet-stream',
},
}
);
if (!response.ok) {
printTextarea('js: failed to fetch ' + url);
return;
}
const contentLength = response.headers.get('content-length');
const total = parseInt(contentLength, 10);
const reader = response.body.getReader();
var chunks = [];
var receivedLength = 0;
var progressLast = -1;
while (true) {
const { done, value } = await reader.read();
if (done) {
break;
}
chunks.push(value);
receivedLength += value.length;
if (contentLength) {
// update progress bar element with the new percentage
elProgress.innerHTML = Math.round((receivedLength / total) * 100) + '%';
var progressCur = Math.round((receivedLength / total) * 10);
if (progressCur != progressLast) {
printTextarea('js: fetching ' + 10*progressCur + '% ...');
progressLast = progressCur;
}
}
}
var chunksAll = new Uint8Array(receivedLength);
var position = 0;
for (var chunk of chunks) {
chunksAll.set(chunk, position);
position += chunk.length;
}
return chunksAll;
}
// load remote data
// - check if the data is already in the IndexedDB
// - if not, fetch it from the remote URL and store it in the IndexedDB
// - store it in WASM memory
function loadRemote(url, dst, elProgress, size_mb) {
// query the storage quota and print it
navigator.storage.estimate().then(function (estimate) {
printTextarea('js: storage quota: ' + estimate.quota + ' bytes');
printTextarea('js: storage usage: ' + estimate.usage + ' bytes');
});
// check if the data is already in the IndexedDB
var request = indexedDB.open(dbName, dbVersion);
request.onupgradeneeded = function (event) {
var db = event.target.result;
if (db.version == 1) {
var objectStore = db.createObjectStore('models', { autoIncrement: false });
printTextarea('js: created IndexedDB ' + db.name + ' version ' + db.version);
} else {
// clear the database
var objectStore = event.currentTarget.transaction.objectStore('models');
objectStore.clear();
printTextarea('js: cleared IndexedDB ' + db.name + ' version ' + db.version);
}
};
request.onsuccess = function (event) {
var db = event.target.result;
var transaction = db.transaction(['models'], 'readonly');
var objectStore = transaction.objectStore('models');
var request = objectStore.get(url);
request.onsuccess = function (event) {
if (request.result) {
printTextarea('js: "' + url + '" is already in the IndexedDB');
storeFS(dst, request.result);
} else {
// data is not in the IndexedDB
printTextarea('js: "' + url + '" is not in the IndexedDB');
// alert and ask the user to confirm
if (!confirm('You are about to download ' + size_mb + ' MB of data.\nThe model data will be cached in the browser for future use.\n\nPress OK to continue.')) {
document.getElementById('fetch-whisper-tiny-en').style.display = 'inline-block';
document.getElementById('fetch-whisper-base-en').style.display = 'inline-block';
document.getElementById('fetch-gpt-2-small').style.display = 'inline-block';
return;
}
fetchRemote(url, elProgress).then(function (data) {
if (data) {
// store the data in the IndexedDB
var request = indexedDB.open(dbName, dbVersion);
request.onsuccess = function (event) {
var db = event.target.result;
var transaction = db.transaction(['models'], 'readwrite');
var objectStore = transaction.objectStore('models');
var request = objectStore.put(data, url);
request.onsuccess = function (event) {
printTextarea('js: "' + url + '" stored in the IndexedDB');
storeFS(dst, data);
};
request.onerror = function (event) {
printTextarea('js: failed to store "' + url + '" in the IndexedDB');
};
};
}
});
}
};
request.onerror = function (event) {
printTextarea('js: failed to get data from the IndexedDB');
};
};
request.onerror = function (event) {
printTextarea('js: failed to open IndexedDB');
};
request.onblocked = function (event) {
printTextarea('js: failed to open IndexedDB: blocked');
};
request.onabort = function (event) {
printTextarea('js: failed to open IndexedDB: abort');
};
}
function loadWhisper(model) {
let urls = {
'tiny.en': 'https://talk.ggerganov.com/ggml-model-whisper-tiny.en.bin',
'base.en': 'https://talk.ggerganov.com/ggml-model-whisper-base.en.bin',
};
let sizes = {
'tiny.en': 75,
'base.en': 142,
};
let url = urls[model];
let dst = 'whisper.bin';
let el = document.getElementById('fetch-whisper-progress');
let size_mb = sizes[model];
model_whisper = model;
document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
document.getElementById('fetch-whisper-base-en').style.display = 'none';
document.getElementById('model-whisper-status').innerHTML = 'Whisper model: loading "' + model + '" ... ';
loadRemote(url, dst, el, size_mb);
}
function loadGPT2(model) {
let urls = {
'small': 'https://talk.ggerganov.com/ggml-model-gpt-2-117M.bin',
'medium': 'https://talk.ggerganov.com/ggml-model-gpt-2-345M.bin',
};
let sizes = {
'small': 240,
'medium': 712,
};
let url = urls[model];
let dst = 'gpt-2.bin';
let el = document.getElementById('fetch-gpt-2-progress');
let size_mb = sizes[model];
model_gpt_2 = model;
document.getElementById('fetch-gpt-2-small').style.display = 'none';
document.getElementById('model-gpt-2-status').innerHTML = 'GPT-2 model: loading "' + model + '" ... ';
loadRemote(url, dst, el, size_mb);
}
//
// microphone
//
var mediaRecorder = null;
var doRecording = false;
var startTime = 0;
function stopRecording() {
Module.set_status("paused");
doRecording = false;
audio0 = null;
audio = null;
}
function startRecording() {
if (!context) {
context = new AudioContext({sampleRate: 16000});
}
Module.set_status("");
document.getElementById('start').disabled = true;
document.getElementById('stop').disabled = false;
doRecording = true;
startTime = Date.now();
var chunks = [];
var stream = null;
navigator.mediaDevices.getUserMedia({audio: true, video: false})
.then(function(s) {
stream = s;
mediaRecorder = new MediaRecorder(stream);
mediaRecorder.ondataavailable = function(e) {
chunks.push(e.data);
var blob = new Blob(chunks, { 'type' : 'audio/ogg; codecs=opus' });
var reader = new FileReader();
reader.onload = function(event) {
var buf = new Uint8Array(reader.result);
context.decodeAudioData(buf.buffer, function(audioBuffer) {
var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
var source = offlineContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(offlineContext.destination);
source.start(0);
offlineContext.startRendering().then(function(renderedBuffer) {
audio = renderedBuffer.getChannelData(0);
//printTextarea('js: audio recorded, size: ' + audio.length + ', old size: ' + (audio0 == null ? 0 : audio0.length));
var audioAll = new Float32Array(audio0 == null ? audio.length : audio0.length + audio.length);
if (audio0 != null) {
audioAll.set(audio0, 0);
}
audioAll.set(audio, audio0 == null ? 0 : audio0.length);
if (instance) {
Module.set_audio(instance, audioAll);
}
});
}, function(e) {
audio = null;
});
}
reader.readAsArrayBuffer(blob);
};
mediaRecorder.onstop = function(e) {
if (doRecording) {
setTimeout(function() {
startRecording();
});
}
};
mediaRecorder.start(250);
})
.catch(function(err) {
printTextarea('js: error getting audio stream: ' + err);
});
var interval = setInterval(function() {
if (!doRecording) {
clearInterval(interval);
mediaRecorder.stop();
stream.getTracks().forEach(function(track) {
track.stop();
});
document.getElementById('start').disabled = false;
document.getElementById('stop').disabled = true;
mediaRecorder = null;
}
// if audio length is more than kRestartRecording_s seconds, restart recording
if (audio != null & & audio.length > kSampleRate*kRestartRecording_s) {
if (doRecording) {
//printTextarea('js: restarting recording');
clearInterval(interval);
audio0 = audio;
audio = null;
mediaRecorder.stop();
stream.getTracks().forEach(function(track) {
track.stop();
});
}
}
}, 250);
}
//
// speak
//
function onSpeak(text) {
var voices = synth.getVoices();
var msg = new SpeechSynthesisUtterance(text);
if (voice == null) {
voice = voices[0];
}
msg.voice = voice;
synth.speak(msg);
if (doRecording) {
Module.set_status("speaking ...");
printTextarea('js: speaking');
stopRecording();
var interval = setInterval(function() {
if (!synth.speaking) {
printTextarea('js: done speaking');
clearInterval(interval);
startRecording();
} else {
Module.set_status("");
}
}, 100);
}
}
function onSpeakRandom() {
Module.force_speak(instance);
}
async function clearCache() {
if (confirm('Are you sure you want to clear the cache?\nAll the models will be downloaded again.')) {
indexedDB.deleteDatabase(dbName);
}
}
//
// main
//
var intervalUpdate = null;
function onStart() {
if (!instance) {
instance = Module.init('whisper.bin');
if (instance) {
printTextarea("js: whisper initialized, instance: " + instance);
}
}
if (!instance) {
printTextarea("js: failed to initialize whisper");
return;
}
startRecording();
intervalUpdate = setInterval(function() {
var textToSpeak = Module.get_text_to_speak();
if (textToSpeak != null & & textToSpeak.length > 1) {
onSpeak(textToSpeak);
}
document.getElementById('state-status').innerHTML = Module.get_status();
document.getElementById('state-context').innerHTML = Module.get_text_context();
}, 100);
}
function onStop() {
stopRecording();
}
function onVoiceChange() {
printTextarea('js: voice changed to: ' + document.getElementById('voice').value);
voice = synth.getVoices()[document.getElementById('voice').value];
}
< / script >
< script type = "text/javascript" src = "talk.js" > < / script >
< / body >
< / html >