Add voice activity detection
This commit is contained in:
parent
c49dabbfc4
commit
80f766379d
|
@ -11,7 +11,7 @@ Instead Websockets are used for all communications.
|
|||
libopus, libcelt (0.7.1) and libsamplerate, compiled to JS via emscripten, are used for audio decoding.
|
||||
Therefore, at the moment only the Opus and CELT Alpha codecs are supported.
|
||||
|
||||
Quite a few features, most noticeably voice activity detection and all
|
||||
Quite a few features, most noticeably all
|
||||
administrative functionallity, are still missing.
|
||||
|
||||
### Installing
|
||||
|
|
|
@ -67,11 +67,23 @@
|
|||
<td>
|
||||
<select data-bind='value: voiceMode'>
|
||||
<option value="cont">Continuous</option>
|
||||
<option value="vad" disabled>Voice Activity</option>
|
||||
<option value="vad">Voice Activity</option>
|
||||
<option value="ptt">Push To Talk</option>
|
||||
</td>
|
||||
</tr>
|
||||
<tr data-bind="style: {visibility: voiceMode() == 'ptt' ? 'visible' : 'hidden'}">
|
||||
<tr data-bind="visible: voiceMode() == 'vad'">
|
||||
<td colspan="2">
|
||||
<div class="mic-volume-container">
|
||||
<div class="mic-volume" data-bind="style: {
|
||||
width: testVadLevel()*100 + '%',
|
||||
background: testVadActive() ? 'green' : 'red'
|
||||
}"></div>
|
||||
</div>
|
||||
<input type="range" min="0" max="1" step="0.01"
|
||||
data-bind="value: vadLevel">
|
||||
</td>
|
||||
</tr>
|
||||
<tr data-bind="visible: voiceMode() == 'ptt'">
|
||||
<td>PTT Key</td>
|
||||
<td>
|
||||
<input type="button" data-bind="value: pttKeyDisplay, click: recordPttKey">
|
||||
|
|
42
app/index.js
42
app/index.js
|
@ -9,7 +9,7 @@ import ko from 'knockout'
|
|||
import _dompurify from 'dompurify'
|
||||
import keyboardjs from 'keyboardjs'
|
||||
|
||||
import { ContinuousVoiceHandler, PushToTalkVoiceHandler, initVoice } from './voice'
|
||||
import { ContinuousVoiceHandler, PushToTalkVoiceHandler, VADVoiceHandler, initVoice } from './voice'
|
||||
|
||||
const dompurify = _dompurify(window)
|
||||
|
||||
|
@ -58,11 +58,34 @@ class SettingsDialog {
|
|||
this.voiceMode = ko.observable(settings.voiceMode)
|
||||
this.pttKey = ko.observable(settings.pttKey)
|
||||
this.pttKeyDisplay = ko.observable(settings.pttKey)
|
||||
this.vadLevel = ko.observable(settings.vadLevel)
|
||||
this.testVadLevel = ko.observable(0)
|
||||
this.testVadActive = ko.observable(false)
|
||||
|
||||
this._setupTestVad()
|
||||
this.vadLevel.subscribe(() => this._setupTestVad())
|
||||
}
|
||||
|
||||
_setupTestVad () {
|
||||
if (this._testVad) {
|
||||
this._testVad.end()
|
||||
}
|
||||
this._testVad = new VADVoiceHandler(null, this.vadLevel())
|
||||
this._testVad.on('started_talking', () => this.testVadActive(true))
|
||||
.on('stopped_talking', () => this.testVadActive(false))
|
||||
.on('level', level => this.testVadLevel(level))
|
||||
testVoiceHandler = this._testVad
|
||||
}
|
||||
|
||||
applyTo (settings) {
|
||||
settings.voiceMode = this.voiceMode()
|
||||
settings.pttKey = this.pttKey()
|
||||
settings.vadLevel = this.vadLevel()
|
||||
}
|
||||
|
||||
end () {
|
||||
this._testVad.end()
|
||||
testVoiceHandler = null
|
||||
}
|
||||
|
||||
recordPttKey () {
|
||||
|
@ -89,14 +112,16 @@ class SettingsDialog {
|
|||
class Settings {
|
||||
constructor () {
|
||||
const load = key => window.localStorage.getItem('mumble.' + key)
|
||||
this.voiceMode = load('voiceMode') || 'cont'
|
||||
this.voiceMode = load('voiceMode') || 'vad'
|
||||
this.pttKey = load('pttKey') || 'ctrl + shift'
|
||||
this.vadLevel = load('vadLevel') || 0.3
|
||||
}
|
||||
|
||||
save () {
|
||||
const save = (key, val) => window.localStorage.setItem('mumble.' + key, val)
|
||||
save('voiceMode', this.voiceMode)
|
||||
save('pttKey', this.pttKey)
|
||||
save('vadLevel', this.vadLevel)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -130,10 +155,13 @@ class GlobalBindings {
|
|||
this._updateVoiceHandler()
|
||||
|
||||
this.settings.save()
|
||||
this.settingsDialog(null)
|
||||
this.closeSettings()
|
||||
}
|
||||
|
||||
this.closeSettings = () => {
|
||||
if (this.settingsDialog()) {
|
||||
this.settingsDialog().end()
|
||||
}
|
||||
this.settingsDialog(null)
|
||||
}
|
||||
|
||||
|
@ -360,7 +388,7 @@ class GlobalBindings {
|
|||
} else if (mode === 'ptt') {
|
||||
voiceHandler = new PushToTalkVoiceHandler(this.client, this.settings.pttKey)
|
||||
} else if (mode === 'vad') {
|
||||
|
||||
voiceHandler = new VADVoiceHandler(this.client, this.settings.vadLevel)
|
||||
} else {
|
||||
log('Unknown voice mode:', mode)
|
||||
return
|
||||
|
@ -586,15 +614,19 @@ function userToState () {
|
|||
}
|
||||
|
||||
var voiceHandler
|
||||
var testVoiceHandler
|
||||
|
||||
initVoice(data => {
|
||||
if (testVoiceHandler) {
|
||||
testVoiceHandler.write(data)
|
||||
}
|
||||
if (!ui.client) {
|
||||
if (voiceHandler) {
|
||||
voiceHandler.end()
|
||||
}
|
||||
voiceHandler = null
|
||||
} else if (voiceHandler) {
|
||||
voiceHandler.write(new Float32Array(data.buffer, data.byteOffset, data.byteLength / 4))
|
||||
voiceHandler.write(data)
|
||||
}
|
||||
}, err => {
|
||||
log('Cannot initialize user media. Microphone will not work:', err)
|
||||
|
|
104
app/voice.js
104
app/voice.js
|
@ -1,10 +1,12 @@
|
|||
import { Writable } from 'stream'
|
||||
import { Writable, Transform } from 'stream'
|
||||
import MicrophoneStream from 'microphone-stream'
|
||||
import audioContext from 'audio-context'
|
||||
import chunker from 'stream-chunker'
|
||||
import Resampler from 'libsamplerate.js'
|
||||
import getUserMedia from 'getusermedia'
|
||||
import keyboardjs from 'keyboardjs'
|
||||
import vad from 'voice-activity-detection'
|
||||
import DropStream from 'drop-stream'
|
||||
|
||||
class VoiceHandler extends Writable {
|
||||
constructor (client) {
|
||||
|
@ -15,7 +17,29 @@ class VoiceHandler extends Writable {
|
|||
|
||||
_getOrCreateOutbound () {
|
||||
if (!this._outbound) {
|
||||
this._outbound = this._client.createVoiceStream()
|
||||
if (!this._client) {
|
||||
this._outbound = DropStream.obj()
|
||||
this.emit('started_talking')
|
||||
return this._outbound
|
||||
}
|
||||
this._outbound = new Resampler({
|
||||
unsafe: true,
|
||||
type: Resampler.Type.SINC_FASTEST,
|
||||
ratio: 48000 / audioContext.sampleRate
|
||||
})
|
||||
|
||||
const buffer2Float32Array = new Transform({
|
||||
transform (data, _, callback) {
|
||||
callback(null, new Float32Array(data.buffer, data.byteOffset, data.byteLength / 4))
|
||||
},
|
||||
readableObjectMode: true
|
||||
})
|
||||
|
||||
this._outbound
|
||||
.pipe(chunker(4 * 480))
|
||||
.pipe(buffer2Float32Array)
|
||||
.pipe(this._client.createVoiceStream())
|
||||
|
||||
this.emit('started_talking')
|
||||
}
|
||||
return this._outbound
|
||||
|
@ -74,24 +98,76 @@ export class PushToTalkVoiceHandler extends VoiceHandler {
|
|||
}
|
||||
}
|
||||
|
||||
export class VADVoiceHandler extends VoiceHandler {
|
||||
constructor (client, level) {
|
||||
super(client)
|
||||
const self = this
|
||||
this._vad = vad(audioContext, theUserMedia, {
|
||||
onVoiceStart () {
|
||||
console.log('vad: start')
|
||||
self._active = true
|
||||
},
|
||||
onVoiceStop () {
|
||||
console.log('vad: stop')
|
||||
self._stopOutbound()
|
||||
self._active = false
|
||||
},
|
||||
onUpdate (val) {
|
||||
self._level = val
|
||||
self.emit('level', val)
|
||||
},
|
||||
noiseCaptureDuration: 0,
|
||||
minNoiseLevel: level,
|
||||
maxNoiseLevel: level
|
||||
})
|
||||
// Need to keep a backlog of the last ~150ms (dependent on sample rate)
|
||||
// because VAD will activate with ~125ms delay
|
||||
this._backlog = []
|
||||
this._backlogLength = 0
|
||||
this._backlogLengthMin = 1024 * 6 * 4 // vadBufferLen * (vadDelay + 1) * bytesPerSample
|
||||
}
|
||||
|
||||
_write (data, _, callback) {
|
||||
if (this._active) {
|
||||
if (this._backlog.length > 0) {
|
||||
for (let oldData of this._backlog) {
|
||||
this._getOrCreateOutbound().write(oldData)
|
||||
}
|
||||
this._backlog = []
|
||||
this._backlogLength = 0
|
||||
}
|
||||
this._getOrCreateOutbound().write(data, callback)
|
||||
} else {
|
||||
// Make sure we always keep the backlog filled if we're not (yet) talking
|
||||
this._backlog.push(data)
|
||||
this._backlogLength += data.length
|
||||
// Check if we can discard the oldest element without becoming too short
|
||||
if (this._backlogLength - this._backlog[0].length > this._backlogLengthMin) {
|
||||
this._backlogLength -= this._backlog.shift().length
|
||||
}
|
||||
callback()
|
||||
}
|
||||
}
|
||||
|
||||
_final (callback) {
|
||||
super._final(e => {
|
||||
this._vad.destroy()
|
||||
callback(e)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
var theUserMedia = null
|
||||
|
||||
export function initVoice (onData, onUserMediaError) {
|
||||
var resampler = new Resampler({
|
||||
unsafe: true,
|
||||
type: Resampler.Type.SINC_FASTEST,
|
||||
ratio: 48000 / audioContext.sampleRate
|
||||
})
|
||||
|
||||
resampler.pipe(chunker(4 * 480)).on('data', data => {
|
||||
onData(data)
|
||||
})
|
||||
|
||||
getUserMedia({ audio: true }, (err, userMedia) => {
|
||||
if (err) {
|
||||
onUserMediaError(err)
|
||||
} else {
|
||||
var micStream = new MicrophoneStream(userMedia, { objectMode: true })
|
||||
theUserMedia = userMedia
|
||||
var micStream = new MicrophoneStream(userMedia, { objectMode: true, bufferSize: 1024 })
|
||||
micStream.on('data', data => {
|
||||
resampler.write(Buffer.from(data.getChannelData(0).buffer))
|
||||
onData(Buffer.from(data.getChannelData(0).buffer))
|
||||
})
|
||||
}
|
||||
})
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
"brfs": "^1.4.3",
|
||||
"css-loader": "^0.26.0",
|
||||
"dompurify": "^0.8.9",
|
||||
"drop-stream": "^1.0.0",
|
||||
"duplex-maker": "^1.0.0",
|
||||
"extract-loader": "^0.1.0",
|
||||
"file-loader": "^0.9.0",
|
||||
|
@ -39,10 +40,11 @@
|
|||
"regexp-replace-loader": "0.0.1",
|
||||
"stream-chunker": "^1.2.8",
|
||||
"transform-loader": "^0.2.3",
|
||||
"voice-activity-detection": "johni0702/voice-activity-detection#9f8bd90",
|
||||
"webpack": "^1.13.3",
|
||||
"webworkify-webpack-dropin": "^1.1.9",
|
||||
"libsamplerate.js": "^1.0.0",
|
||||
"mumble-client-codecs-browser": "^1.1.0",
|
||||
"mumble-client-codecs-browser": "^1.1.1",
|
||||
"mumble-client-websocket": "^1.0.0",
|
||||
"mumble-client": "^1.1.0",
|
||||
"web-audio-buffer-queue": "^1.0.0"
|
||||
|
|
|
@ -208,8 +208,10 @@ form {
|
|||
border-bottom: 1px solid darkgray;
|
||||
}
|
||||
.dialog-footer {
|
||||
position: absolute;
|
||||
bottom: 0px;
|
||||
width: calc(100% - 20px);
|
||||
margin: 10px;
|
||||
margin-bottom: 0px;
|
||||
}
|
||||
.dialog-close {
|
||||
float: left;
|
||||
|
@ -255,6 +257,14 @@ form {
|
|||
}
|
||||
.settings-dialog table input {
|
||||
width: 100%;
|
||||
margin: 0px;
|
||||
}
|
||||
.settings-dialog .mic-volume-container {
|
||||
height: 10px;
|
||||
border: 3px solid black;
|
||||
}
|
||||
.settings-dialog .mic-volume {
|
||||
height: 100%;
|
||||
}
|
||||
.connect-dialog {
|
||||
width: 300px;
|
||||
|
|
Loading…
Reference in a new issue