import { observe } from "mobx";
import { getEnv, getRoot, getType, isAlive, types } from "mobx-state-tree";
import { createRef } from "react";
import { customTypes } from "../../../core/CustomTypes";
import { AnnotationMixin } from "../../../mixins/AnnotationMixin";
import IsReadyMixin from "../../../mixins/IsReadyMixin";
import ProcessAttrsMixin from "../../../mixins/ProcessAttrs";
import { SyncableMixin } from "../../../mixins/Syncable";
import { AudioRegionModel } from "../../../regions/AudioRegion";
import { FF_LSDV_E_278, isFF } from "../../../utils/feature-flags";
import { isDefined } from "../../../utils/utilities";
import ObjectBase from "../Base";
import { WS_SPEED, WS_VOLUME, WS_ZOOM_X } from "./constants";
import { ff } from "@humansignal/core";
const isSyncedBuffering = ff.isActive(ff.FF_SYNCED_BUFFERING);
/**
* The Audio tag plays audio and shows its waveform. Use for audio annotation tasks where you want to label regions of audio, see the waveform, and manipulate audio during annotation.
*
* Use with the following data types: audio
* @example
*
*
*
*
* @example
*
*
*
*
* @example
*
*
*
*
*
*
*
*
* @example
*
*
*
*
*
* @example
*
*
*
*
*
*
*
*
*
* @example
*
*
*
*
*
*
*
*
*
* @example
*
*
*
*
*
*
*
*
*
* @regions AudioRegion
* @meta_title Audio Tag for Audio Labeling
* @meta_description Customize Label Studio with the Audio tag for advanced audio annotation tasks for machine learning and data science projects.
* @name Audio
* @param {string} name - Name of the element
* @param {string} value - Data field containing path or a URL to the audio.
* @param {string} [defaultspeed=1] - Default speed level (from 0.5 to 2).
* @param {string} [defaultscale=1] - Audio pane default y-scale for waveform.
* @param {string} [defaultzoom=1] - Default zoom level for waveform. (from 1 to 1500).
* @param {string} [defaultvolume=1] - Default volume level (from 0 to 1).
* @param {string} [hotkey] - Hotkey used to play or pause audio.
* @param {string} [sync] Object name to sync with.
* @param {string} [height=96] - Total height of the audio player.
* @param {string} [waveheight=32] - Minimum height of a waveform when in `splitchannels` mode with multiple channels to display.
* @param {boolean} [spectrogram=false] - Determines whether an audio spectrogram is automatically displayed upon loading.
* @param {boolean} [splitchannels=false] - Display multiple audio channels separately, if the audio file has more than one channel. (**NOTE: Requires more memory to operate.**)
* @param {string} [decoder=webaudio] - Decoder type to use to decode audio data. (`"webaudio"`, `"ffmpeg"`, or `"none"` for no decoding - provides fast loading for large files but disables waveform visualization)
* @param {string} [player=html5] - Player type to use to play audio data. (`"html5"` or `"webaudio"`)
*/
const TagAttrs = types.model({
name: types.identifier,
value: types.maybeNull(types.string),
muted: types.optional(types.boolean, false),
zoom: types.optional(types.boolean, true),
defaultzoom: types.optional(types.string, WS_ZOOM_X.default.toString()),
volume: types.optional(types.boolean, true),
defaultvolume: types.optional(types.string, WS_VOLUME.default.toString()),
speed: types.optional(types.boolean, true),
defaultspeed: types.optional(types.string, WS_SPEED.default.toString()),
hotkey: types.maybeNull(types.string),
showlabels: types.optional(types.boolean, false),
showscores: types.optional(types.boolean, false),
height: types.optional(types.string, "96"),
waveheight: types.optional(types.string, "32"),
cursorwidth: types.optional(types.string, "2"),
cursorcolor: types.optional(customTypes.color, "#333"),
defaultscale: types.optional(types.string, "1"),
autocenter: types.optional(types.boolean, true),
scrollparent: types.optional(types.boolean, true),
splitchannels: types.optional(types.boolean, false),
decoder: types.optional(types.enumeration(["ffmpeg", "webaudio", "none"]), "webaudio"),
player: types.optional(types.enumeration(["html5", "webaudio"]), "html5"),
spectrogram: types.optional(types.boolean, false),
});
export const AudioModel = types.compose(
"AudioModel",
TagAttrs,
SyncableMixin,
ProcessAttrsMixin,
ObjectBase,
AnnotationMixin,
IsReadyMixin,
types
.model("AudioModel", {
type: "audio",
_value: types.optional(types.string, ""),
regions: types.array(AudioRegionModel),
})
.volatile(() => ({
errors: [],
stageRef: createRef(),
_ws: null,
_wfFrame: null,
_skip_seek_event: false,
}))
.views((self) => ({
get hasStates() {
const states = self.states();
return states && states.length > 0;
},
get store() {
return getRoot(self);
},
states() {
return self.annotation?.toNames.get(self.name) || [];
},
activeStates() {
const states = self.states();
return states?.filter((s) => getType(s).name === "LabelsModel" && s.isSelected);
},
get activeState() {
const states = self.states();
return states?.filter((s) => getType(s).name === "LabelsModel" && s.isSelected)[0];
},
get activeLabel() {
const state = self.activeState;
return state?.selectedValues()?.[0];
},
get activeLabelKey() {
const labels = self.activeState?.selectedValues();
// use label to generate a unique key to ensure that adding/deleting can trigger changes
return labels ? labels.join(",") : "";
},
get readonly() {
return self.annotation.isReadOnly();
},
}))
////// Sync actions
.actions((self) => ({
////// Outgoing
triggerSync(event, data) {
if (!self._ws) return;
self.syncSend(
{
playing: self._ws.playing,
time: self._ws.currentTime,
speed: self._ws.rate,
...data,
},
event,
);
},
triggerSyncSpeed(speed) {
self.triggerSync("speed", { speed });
},
triggerSyncPlay(isManual = false) {
if (isSyncedBuffering && self.isBuffering && !isManual) return;
self.wasPlayingBeforeBuffering = true;
// @todo should not be handled like this
self.handleSyncPlay();
// trigger play only after it actually started to play
self.triggerSync("play", { playing: true });
},
triggerSyncPause(isManual = false) {
if (isSyncedBuffering && self.isBuffering && !isManual) return;
self.wasPlayingBeforeBuffering = false;
// @todo should not be handled like this
self.handleSyncPause();
self.triggerSync("pause", { playing: false });
},
triggerSyncSeek(time) {
self.triggerSync("seek", { time, ...(isSyncedBuffering ? { playing: self.wasPlayingBeforeBuffering } : {}) });
},
triggerSyncBuffering(isBuffering) {
if (!self._ws) return;
const playing = self.wasPlayingBeforeBuffering;
self.triggerSync("buffering", {
buffering: isBuffering,
playing,
});
},
////// Incoming
registerSyncHandlers() {
for (const event of ["play", "pause", "seek"]) {
self.syncHandlers.set(event, self.handleSync);
}
self.syncHandlers.set("speed", self.handleSyncSpeed);
if (isSyncedBuffering) {
self.syncHandlers.set("buffering", self.handleSyncBuffering);
}
},
handleSyncBuffering({ playing, ...data }) {
self.isBuffering = self.syncManager?.isBuffering;
if (data.buffering) {
self.wasPlayingBeforeBuffering = playing;
self._skip_seek_event = true;
self.isPlaying = false;
self._ws?.pause();
self._skip_seek_event = false;
}
if (!self.isBuffering && !data.buffering) {
if (playing) {
self._skip_seek_event = true;
self.isPlaying = true;
self._ws?.play();
self._skip_seek_event = false;
}
}
// process other data
self.handleSyncSeek(data);
},
handleSync(data, event) {
if (!self._ws?.loaded) return;
if (!isSyncedBuffering) {
self.handleSyncSeek(data);
}
const isBuffering = self.syncManager?.isBuffering;
// Normal logic when no buffering
if (!isSyncedBuffering || (!isBuffering && isDefined(data.playing))) {
if (data.playing) {
if (!self._ws.playing) {
self.isPlaying = true;
self._ws?.play();
}
} else {
if (self._ws.playing) {
self.isPlaying = false;
self._ws?.pause();
}
}
}
// during the buffering only these events have real `playing` values (in other cases it's paused all the time)
if (["play", "pause"].indexOf(event) > -1) {
self.wasPlayingBeforeBuffering = data.playing;
}
if (isSyncedBuffering) {
self.handleSyncSeek(data);
}
},
// @todo remove both of these methods
handleSyncPlay() {
if (self._ws?.playing) return;
self.isPlaying = true;
self._ws?.play();
},
handleSyncPause() {
if (self.isPlaying) return;
self.isPlaying = false;
self._ws?.pause();
},
handleSyncSeek({ time }) {
if (!self._ws?.loaded || !isDefined(time)) return;
try {
// setCurrentTime some times can take up to 76ms and it is syncronous
self._ws.setCurrentTime(time, true);
// syncCursor provides sync drawing which can cost up to 10ms which is too much for syncing playback
setTimeout(() => {
if (isAlive(self)) self._ws?.syncCursor();
});
} catch (err) {
console.log(err);
}
},
handleSyncSpeed({ speed }) {
if (!self._ws) return;
self._ws.rate = speed;
},
syncMuted(muted) {
if (!self._ws) return;
self._ws.muted = muted;
},
}))
.actions((self) => {
let dispose;
let updateTimeout = null;
return {
afterCreate() {
dispose = observe(
self,
"activeLabelKey",
() => {
const selectedRegions = self._ws?.regions?.selected;
if (!selectedRegions || selectedRegions.length === 0) return;
const activeState = self.activeState;
const selectedColor = activeState?.selectedColor;
const labels = activeState?.selectedValues();
for (const r of selectedRegions) {
r.update({ color: selectedColor, labels: labels ?? [] });
const region = r.isRegion ? self.updateRegion(r) : self.addRegion(r);
self.annotation.selectArea(region);
}
if (selectedRegions.length) {
self.requestWSUpdate();
}
},
false,
);
},
needsUpdate() {
self.handleNewRegions();
self.requestWSUpdate();
},
requestWSUpdate() {
if (!self._ws) return;
if (updateTimeout) {
clearTimeout(updateTimeout);
}
updateTimeout = setTimeout(() => {
self._ws.regions.redraw();
}, 33);
},
onReady() {
self.setReady(true);
},
onRateChange(rate) {
self.triggerSyncSpeed(rate);
},
/**
* Load any synced paragraph text segments which contain start and end values
* as Audio segments for visualization of the excerpts within the audio track
**/
loadSyncedParagraphs() {
if (!self.syncManager) return;
// find synced paragraphs if any
// and add their regions to the audio
const syncedParagraphs = Array.from(self.syncManager.syncTargets, ([, value]) => value).filter(
(target) => target.type === "paragraphs" && target.contextscroll,
);
for (const paragraph of syncedParagraphs) {
const segments = Object.values(paragraph.regionsStartEnd).map(({ start, end }) => ({
start,
end,
showInTimeline: true,
external: true,
locked: true,
}));
self._ws.addRegions(segments);
}
},
handleNewRegions() {
if (!self._ws) return;
self.regs.map((reg) => {
if (reg._ws_region) {
self.updateWsRegion(reg);
} else {
self.createWsRegion(reg);
}
});
},
findRegionByWsRegion(wsRegion) {
return self.regs.find((r) => r._ws_region?.id === wsRegion?.id);
},
getRegionColor() {
const control = self.activeState;
if (control) {
return control.selectedColor;
}
return null;
},
onHotKey(e) {
e?.preventDefault();
self._ws.togglePlay();
return false;
},
setRangeValue(val) {
self.rangeValue = val;
},
setPlaybackRate(val) {
self.playBackRate = val;
},
addRegion(wsRegion) {
// area id is assigned to WS region during deserealization
const find_r = self.annotation.areas.get(wsRegion.id);
if (find_r) {
find_r.setWSRegion(wsRegion);
find_r.updateColor();
return find_r;
}
const states = self.getAvailableStates();
if (states.length === 0) {
// wsRegion.on("update-end", ev=> self.selectRange(ev, wsRegion));
if (wsRegion.isRegion) {
wsRegion.convertToSegment().handleSelected();
}
return;
}
const activeStates = self.activeStates();
const [control, ...rest] = activeStates;
const labels = { [control.valueType]: control.selectedValues() };
const r = ff.isActive(ff.FF_MULTIPLE_LABELS_REGIONS)
? self.annotation.createResult(wsRegion, labels, control, self, false, rest)
: self.annotation.createResult(wsRegion, labels, control, self, false);
const updatedRegion = wsRegion.convertToRegion(labels.labels);
r.setWSRegion(updatedRegion);
r.updateColor();
return r;
},
updateRegion(wsRegion) {
const r = self.findRegionByWsRegion(wsRegion);
if (!r) return;
r.onUpdateEnd();
return r;
},
createWsRegion(region) {
if (!self._ws) return;
const options = region.wsRegionOptions();
options.labels = region.labels?.length ? region.labels : undefined;
const r = self._ws.addRegion(options, false);
region.setWSRegion(r);
},
updateWsRegion(region) {
if (!self._ws) return;
const options = region.wsRegionOptions();
options.labels = region.labels?.length ? region.labels : undefined;
self._ws.updateRegion(options, false);
},
clearRegionMappings() {
for (const r of self.regs) {
r.setWSRegion(null);
}
},
onLoad(ws) {
self.clearRegionMappings();
self._ws = ws;
self.checkReady();
self.needsUpdate();
if (isFF(FF_LSDV_E_278)) {
self.loadSyncedParagraphs();
}
},
checkReady() {
if (!self._ws || self._ws.destroyed) return;
if (self._ws.isDrawing) {
requestAnimationFrame(() => self.checkReady());
return;
}
self.onReady();
},
onSeek(time) {
if (isSyncedBuffering && self._skip_seek_event) return;
self.triggerSyncSeek(time);
},
onPlaying(playing) {
if (isSyncedBuffering && self.isPlaying === playing) return;
if (playing) {
// @todo self.play();
self.triggerSyncPlay();
} else {
// @todo self.pause();
self.triggerSyncPause();
}
self.isPlaying = playing;
},
handleBuffering(isBuffering) {
if (!isSyncedBuffering) return;
if (self.syncManager?.isBufferingOrigin(self.name) === isBuffering) return;
const isAlreadyBuffering = self.syncManager?.isBuffering;
const isLastCauseOfBuffering =
self.syncManager?.bufferingOrigins.size === 1 && self.syncManager?.isBufferingOrigin(self.name);
const willStartBuffering = !isAlreadyBuffering && isBuffering;
const willStopBuffering = isLastCauseOfBuffering && !isBuffering;
if (willStopBuffering) {
if (self.wasPlayingBeforeBuffering) {
self.isPlaying = true;
self._ws?.play();
}
}
self.triggerSyncBuffering(isBuffering);
// The real value, relevant for all medias synced together we have only after triggering the buffering event
self.isBuffering = self.syncManager?.isBuffering;
if (willStartBuffering) {
if (self._ws?.playing) {
self.isPlaying = false;
self._ws?.pause();
}
}
},
onError(error) {
let messageHandler;
if (error.name === "HTTPError") {
messageHandler = "ERR_LOADING_HTTP";
} else {
messageHandler = "ERR_LOADING_AUDIO";
}
const message = getEnv(self.store).messages[messageHandler]({
attr: self.value,
url: self._value,
error: error.message,
});
self.errors = [message];
},
beforeDestroy() {
try {
if (updateTimeout) clearTimeout(updateTimeout);
if (dispose) dispose();
if (isDefined(self._ws)) {
self._ws.destroy();
self._ws = null;
}
} catch (_err) {
self._ws = null;
console.warn("Already destroyed");
}
},
setWFFrame(frame) {
self._wfFrame = frame;
},
};
}),
);