label-studio.git

import { observe } from "mobx";
import { getEnv, getRoot, getType, isAlive, types } from "mobx-state-tree";
import { createRef } from "react";
import { customTypes } from "../../../core/CustomTypes";
import { AnnotationMixin } from "../../../mixins/AnnotationMixin";
import IsReadyMixin from "../../../mixins/IsReadyMixin";
import ProcessAttrsMixin from "../../../mixins/ProcessAttrs";
import { SyncableMixin } from "../../../mixins/Syncable";
import { AudioRegionModel } from "../../../regions/AudioRegion";
import { FF_LSDV_E_278, isFF } from "../../../utils/feature-flags";
import { isDefined } from "../../../utils/utilities";
import ObjectBase from "../Base";
import { WS_SPEED, WS_VOLUME, WS_ZOOM_X } from "./constants";
import { ff } from "@humansignal/core";
 
const isSyncedBuffering = ff.isActive(ff.FF_SYNCED_BUFFERING);
 
/**
 * The Audio tag plays audio and shows its waveform. Use for audio annotation tasks where you want to label regions of audio, see the waveform, and manipulate audio during annotation.
 *
 * Use with the following data types: audio
 * @example
 * <!-- Play audio on the labeling interface -->
 * <View>
 *   <Audio name="audio" value="$audio" />
 * </View>
 * @example
 * <!-- Play audio with multichannel support -->
 * <View>
 *   <Audio name="audio" value="$audio" splitchannels="true" />
 * </View>
 * @example
 * <!-- Audio classification -->
 * <View>
 *   <Audio name="audio" value="$audio" />
 *   <Choices name="ch" toName="audio">
 *     <Choice value="Positive" />
 *     <Choice value="Negative" />
 *   </Choices>
 * </View>
 * @example
 * <!-- Audio transcription -->
 * <View>
 *   <Audio name="audio" value="$audio" />
 *   <TextArea name="ta" toName="audio" />
 * </View>
 * @example
 * <!-- Labeling configuration to label regions of audio and rate the audio sample-->
 * <View>
 *   <Labels name="lbl-1" toName="audio-1">
 *     <Label value="Guitar" />
 *     <Label value="Drums" />
 *   </Labels>
 *   <Rating name="rate-1" toName="audio-1" />
 *   <Audio name="audio-1" value="$audio" />
 * </View>
 * @example
 * <!-- Sync with video -->
 * <View>
 *   <Video name="video-1" value="$video" sync="audio-1" />
 *   <Labels name="lbl-1" toName="audio-1">
 *     <Label value="Guitar" />
 *     <Label value="Drums" />
 *   </Labels>
 *   <Audio name="audio-1" value="$video" sync="video-1" />
 * </View>
 * @example
 * <!-- Sync with paragraphs -->
 * <View>
 *   <Labels name="lbl-1" toName="audio-1">
 *     <Label value="Guitar" />
 *     <Label value="Drums" />
 *   </Labels>
 *   <Audio name="audio-1" value="$audio" sync="txt-1" />
 *   <Paragraphs audioUrl="$audio" sync="audio-1" name="txt-1" value="$text" layout="dialogue" showplayer="true" />
 * </View>
 * @regions AudioRegion
 * @meta_title Audio Tag for Audio Labeling
 * @meta_description Customize Label Studio with the Audio tag for advanced audio annotation tasks for machine learning and data science projects.
 * @name Audio
 * @param {string} name - Name of the element
 * @param {string} value - Data field containing path or a URL to the audio.
 * @param {string} [defaultspeed=1] - Default speed level (from 0.5 to 2).
 * @param {string} [defaultscale=1] - Audio pane default y-scale for waveform.
 * @param {string} [defaultzoom=1] - Default zoom level for waveform. (from 1 to 1500).
 * @param {string} [defaultvolume=1] - Default volume level (from 0 to 1).
 * @param {string} [hotkey] - Hotkey used to play or pause audio.
 * @param {string} [sync] Object name to sync with.
 * @param {string} [height=96] - Total height of the audio player.
 * @param {string} [waveheight=32] - Minimum height of a waveform when in `splitchannels` mode with multiple channels to display.
 * @param {boolean} [spectrogram=false] - Determines whether an audio spectrogram is automatically displayed upon loading.
 * @param {boolean} [splitchannels=false] - Display multiple audio channels separately, if the audio file has more than one channel. (**NOTE: Requires more memory to operate.**)
 * @param {string} [decoder=webaudio] - Decoder type to use to decode audio data. (`"webaudio"`, `"ffmpeg"`, or `"none"` for no decoding - provides fast loading for large files but disables waveform visualization)
 * @param {string} [player=html5] - Player type to use to play audio data. (`"html5"` or `"webaudio"`)
 */
const TagAttrs = types.model({
  name: types.identifier,
  value: types.maybeNull(types.string),
  muted: types.optional(types.boolean, false),
  zoom: types.optional(types.boolean, true),
  defaultzoom: types.optional(types.string, WS_ZOOM_X.default.toString()),
  volume: types.optional(types.boolean, true),
  defaultvolume: types.optional(types.string, WS_VOLUME.default.toString()),
  speed: types.optional(types.boolean, true),
  defaultspeed: types.optional(types.string, WS_SPEED.default.toString()),
  hotkey: types.maybeNull(types.string),
  showlabels: types.optional(types.boolean, false),
  showscores: types.optional(types.boolean, false),
  height: types.optional(types.string, "96"),
  waveheight: types.optional(types.string, "32"),
  cursorwidth: types.optional(types.string, "2"),
  cursorcolor: types.optional(customTypes.color, "#333"),
  defaultscale: types.optional(types.string, "1"),
  autocenter: types.optional(types.boolean, true),
  scrollparent: types.optional(types.boolean, true),
  splitchannels: types.optional(types.boolean, false),
  decoder: types.optional(types.enumeration(["ffmpeg", "webaudio", "none"]), "webaudio"),
  player: types.optional(types.enumeration(["html5", "webaudio"]), "html5"),
  spectrogram: types.optional(types.boolean, false),
});
 
export const AudioModel = types.compose(
  "AudioModel",
  TagAttrs,
  SyncableMixin,
  ProcessAttrsMixin,
  ObjectBase,
  AnnotationMixin,
  IsReadyMixin,
  types
    .model("AudioModel", {
      type: "audio",
      _value: types.optional(types.string, ""),
      regions: types.array(AudioRegionModel),
    })
    .volatile(() => ({
      errors: [],
      stageRef: createRef(),
      _ws: null,
      _wfFrame: null,
      _skip_seek_event: false,
    }))
    .views((self) => ({
      get hasStates() {
        const states = self.states();
 
        return states && states.length > 0;
      },
 
      get store() {
        return getRoot(self);
      },
 
      states() {
        return self.annotation?.toNames.get(self.name) || [];
      },
 
      activeStates() {
        const states = self.states();
 
        return states?.filter((s) => getType(s).name === "LabelsModel" && s.isSelected);
      },
 
      get activeState() {
        const states = self.states();
 
        return states?.filter((s) => getType(s).name === "LabelsModel" && s.isSelected)[0];
      },
 
      get activeLabel() {
        const state = self.activeState;
 
        return state?.selectedValues()?.[0];
      },
      get activeLabelKey() {
        const labels = self.activeState?.selectedValues();
 
        // use label to generate a unique key to ensure that adding/deleting can trigger changes
        return labels ? labels.join(",") : "";
      },
      get readonly() {
        return self.annotation.isReadOnly();
      },
    }))
    ////// Sync actions
    .actions((self) => ({
      ////// Outgoing
 
      triggerSync(event, data) {
        if (!self._ws) return;
 
        self.syncSend(
          {
            playing: self._ws.playing,
            time: self._ws.currentTime,
            speed: self._ws.rate,
            ...data,
          },
          event,
        );
      },
 
      triggerSyncSpeed(speed) {
        self.triggerSync("speed", { speed });
      },
 
      triggerSyncPlay(isManual = false) {
        if (isSyncedBuffering && self.isBuffering && !isManual) return;
        self.wasPlayingBeforeBuffering = true;
        // @todo should not be handled like this
        self.handleSyncPlay();
        // trigger play only after it actually started to play
        self.triggerSync("play", { playing: true });
      },
 
      triggerSyncPause(isManual = false) {
        if (isSyncedBuffering && self.isBuffering && !isManual) return;
        self.wasPlayingBeforeBuffering = false;
        // @todo should not be handled like this
        self.handleSyncPause();
        self.triggerSync("pause", { playing: false });
      },
 
      triggerSyncSeek(time) {
        self.triggerSync("seek", { time, ...(isSyncedBuffering ? { playing: self.wasPlayingBeforeBuffering } : {}) });
      },
 
      triggerSyncBuffering(isBuffering) {
        if (!self._ws) return;
 
        const playing = self.wasPlayingBeforeBuffering;
 
        self.triggerSync("buffering", {
          buffering: isBuffering,
          playing,
        });
      },
 
      ////// Incoming
 
      registerSyncHandlers() {
        for (const event of ["play", "pause", "seek"]) {
          self.syncHandlers.set(event, self.handleSync);
        }
        self.syncHandlers.set("speed", self.handleSyncSpeed);
        if (isSyncedBuffering) {
          self.syncHandlers.set("buffering", self.handleSyncBuffering);
        }
      },
 
      handleSyncBuffering({ playing, ...data }) {
        self.isBuffering = self.syncManager?.isBuffering;
        if (data.buffering) {
          self.wasPlayingBeforeBuffering = playing;
          self._skip_seek_event = true;
          self.isPlaying = false;
          self._ws?.pause();
          self._skip_seek_event = false;
        }
        if (!self.isBuffering && !data.buffering) {
          if (playing) {
            self._skip_seek_event = true;
            self.isPlaying = true;
            self._ws?.play();
            self._skip_seek_event = false;
          }
        }
        // process other data
        self.handleSyncSeek(data);
      },
 
      handleSync(data, event) {
        if (!self._ws?.loaded) return;
 
        if (!isSyncedBuffering) {
          self.handleSyncSeek(data);
        }
 
        const isBuffering = self.syncManager?.isBuffering;
 
        // Normal logic when no buffering
        if (!isSyncedBuffering || (!isBuffering && isDefined(data.playing))) {
          if (data.playing) {
            if (!self._ws.playing) {
              self.isPlaying = true;
              self._ws?.play();
            }
          } else {
            if (self._ws.playing) {
              self.isPlaying = false;
              self._ws?.pause();
            }
          }
        }
        // during the buffering only these events have real `playing` values (in other cases it's paused all the time)
        if (["play", "pause"].indexOf(event) > -1) {
          self.wasPlayingBeforeBuffering = data.playing;
        }
 
        if (isSyncedBuffering) {
          self.handleSyncSeek(data);
        }
      },
 
      // @todo remove both of these methods
      handleSyncPlay() {
        if (self._ws?.playing) return;
 
        self.isPlaying = true;
        self._ws?.play();
      },
 
      handleSyncPause() {
        if (self.isPlaying) return;
 
        self.isPlaying = false;
        self._ws?.pause();
      },
 
      handleSyncSeek({ time }) {
        if (!self._ws?.loaded || !isDefined(time)) return;
 
        try {
          // setCurrentTime some times can take up to 76ms and it is syncronous
          self._ws.setCurrentTime(time, true);
          // syncCursor provides sync drawing which can cost up to 10ms which is too much for syncing playback
          setTimeout(() => {
            if (isAlive(self)) self._ws?.syncCursor();
          });
        } catch (err) {
          console.log(err);
        }
      },
 
      handleSyncSpeed({ speed }) {
        if (!self._ws) return;
        self._ws.rate = speed;
      },
 
      syncMuted(muted) {
        if (!self._ws) return;
        self._ws.muted = muted;
      },
    }))
    .actions((self) => {
      let dispose;
      let updateTimeout = null;
 
      return {
        afterCreate() {
          dispose = observe(
            self,
            "activeLabelKey",
            () => {
              const selectedRegions = self._ws?.regions?.selected;
 
              if (!selectedRegions || selectedRegions.length === 0) return;
 
              const activeState = self.activeState;
              const selectedColor = activeState?.selectedColor;
              const labels = activeState?.selectedValues();
 
              for (const r of selectedRegions) {
                r.update({ color: selectedColor, labels: labels ?? [] });
 
                const region = r.isRegion ? self.updateRegion(r) : self.addRegion(r);
 
                self.annotation.selectArea(region);
              }
 
              if (selectedRegions.length) {
                self.requestWSUpdate();
              }
            },
            false,
          );
        },
 
        needsUpdate() {
          self.handleNewRegions();
          self.requestWSUpdate();
        },
 
        requestWSUpdate() {
          if (!self._ws) return;
          if (updateTimeout) {
            clearTimeout(updateTimeout);
          }
 
          updateTimeout = setTimeout(() => {
            self._ws.regions.redraw();
          }, 33);
        },
 
        onReady() {
          self.setReady(true);
        },
 
        onRateChange(rate) {
          self.triggerSyncSpeed(rate);
        },
 
        /**
         * Load any synced paragraph text segments which contain start and end values
         * as Audio segments for visualization of the excerpts within the audio track
         **/
        loadSyncedParagraphs() {
          if (!self.syncManager) return;
 
          // find synced paragraphs if any
          // and add their regions to the audio
          const syncedParagraphs = Array.from(self.syncManager.syncTargets, ([, value]) => value).filter(
            (target) => target.type === "paragraphs" && target.contextscroll,
          );
 
          for (const paragraph of syncedParagraphs) {
            const segments = Object.values(paragraph.regionsStartEnd).map(({ start, end }) => ({
              start,
              end,
              showInTimeline: true,
              external: true,
              locked: true,
            }));
 
            self._ws.addRegions(segments);
          }
        },
 
        handleNewRegions() {
          if (!self._ws) return;
 
          self.regs.map((reg) => {
            if (reg._ws_region) {
              self.updateWsRegion(reg);
            } else {
              self.createWsRegion(reg);
            }
          });
        },
 
        findRegionByWsRegion(wsRegion) {
          return self.regs.find((r) => r._ws_region?.id === wsRegion?.id);
        },
 
        getRegionColor() {
          const control = self.activeState;
 
          if (control) {
            return control.selectedColor;
          }
 
          return null;
        },
 
        onHotKey(e) {
          e?.preventDefault();
          self._ws.togglePlay();
          return false;
        },
 
        setRangeValue(val) {
          self.rangeValue = val;
        },
 
        setPlaybackRate(val) {
          self.playBackRate = val;
        },
 
        addRegion(wsRegion) {
          // area id is assigned to WS region during deserealization
          const find_r = self.annotation.areas.get(wsRegion.id);
 
          if (find_r) {
            find_r.setWSRegion(wsRegion);
            find_r.updateColor();
            return find_r;
          }
 
          const states = self.getAvailableStates();
 
          if (states.length === 0) {
            // wsRegion.on("update-end", ev=> self.selectRange(ev, wsRegion));
            if (wsRegion.isRegion) {
              wsRegion.convertToSegment().handleSelected();
            }
 
            return;
          }
 
          const activeStates = self.activeStates();
          const [control, ...rest] = activeStates;
          const labels = { [control.valueType]: control.selectedValues() };
          const r = ff.isActive(ff.FF_MULTIPLE_LABELS_REGIONS)
            ? self.annotation.createResult(wsRegion, labels, control, self, false, rest)
            : self.annotation.createResult(wsRegion, labels, control, self, false);
          const updatedRegion = wsRegion.convertToRegion(labels.labels);
 
          r.setWSRegion(updatedRegion);
          r.updateColor();
          return r;
        },
 
        updateRegion(wsRegion) {
          const r = self.findRegionByWsRegion(wsRegion);
 
          if (!r) return;
 
          r.onUpdateEnd();
          return r;
        },
 
        createWsRegion(region) {
          if (!self._ws) return;
 
          const options = region.wsRegionOptions();
 
          options.labels = region.labels?.length ? region.labels : undefined;
 
          const r = self._ws.addRegion(options, false);
 
          region.setWSRegion(r);
        },
 
        updateWsRegion(region) {
          if (!self._ws) return;
 
          const options = region.wsRegionOptions();
 
          options.labels = region.labels?.length ? region.labels : undefined;
 
          self._ws.updateRegion(options, false);
        },
 
        clearRegionMappings() {
          for (const r of self.regs) {
            r.setWSRegion(null);
          }
        },
 
        onLoad(ws) {
          self.clearRegionMappings();
          self._ws = ws;
 
          self.checkReady();
          self.needsUpdate();
          if (isFF(FF_LSDV_E_278)) {
            self.loadSyncedParagraphs();
          }
        },
 
        checkReady() {
          if (!self._ws || self._ws.destroyed) return;
          if (self._ws.isDrawing) {
            requestAnimationFrame(() => self.checkReady());
            return;
          }
          self.onReady();
        },
 
        onSeek(time) {
          if (isSyncedBuffering && self._skip_seek_event) return;
          self.triggerSyncSeek(time);
        },
 
        onPlaying(playing) {
          if (isSyncedBuffering && self.isPlaying === playing) return;
          if (playing) {
            // @todo self.play();
            self.triggerSyncPlay();
          } else {
            // @todo self.pause();
            self.triggerSyncPause();
          }
          self.isPlaying = playing;
        },
 
        handleBuffering(isBuffering) {
          if (!isSyncedBuffering) return;
          if (self.syncManager?.isBufferingOrigin(self.name) === isBuffering) return;
          const isAlreadyBuffering = self.syncManager?.isBuffering;
          const isLastCauseOfBuffering =
            self.syncManager?.bufferingOrigins.size === 1 && self.syncManager?.isBufferingOrigin(self.name);
          const willStartBuffering = !isAlreadyBuffering && isBuffering;
          const willStopBuffering = isLastCauseOfBuffering && !isBuffering;
 
          if (willStopBuffering) {
            if (self.wasPlayingBeforeBuffering) {
              self.isPlaying = true;
              self._ws?.play();
            }
          }
 
          self.triggerSyncBuffering(isBuffering);
 
          // The real value, relevant for all medias synced together we have only after triggering the buffering event
          self.isBuffering = self.syncManager?.isBuffering;
 
          if (willStartBuffering) {
            if (self._ws?.playing) {
              self.isPlaying = false;
              self._ws?.pause();
            }
          }
        },
 
        onError(error) {
          let messageHandler;
 
          if (error.name === "HTTPError") {
            messageHandler = "ERR_LOADING_HTTP";
          } else {
            messageHandler = "ERR_LOADING_AUDIO";
          }
 
          const message = getEnv(self.store).messages[messageHandler]({
            attr: self.value,
            url: self._value,
            error: error.message,
          });
 
          self.errors = [message];
        },
 
        beforeDestroy() {
          try {
            if (updateTimeout) clearTimeout(updateTimeout);
            if (dispose) dispose();
            if (isDefined(self._ws)) {
              self._ws.destroy();
              self._ws = null;
            }
          } catch (_err) {
            self._ws = null;
            console.warn("Already destroyed");
          }
        },
        setWFFrame(frame) {
          self._wfFrame = frame;
        },
      };
    }),
);