import { createRef } from "react";
import { getRoot, types } from "mobx-state-tree";
import ColorScheme from "pleasejs";
import { errorBuilder } from "../../../core/DataValidator/ConfigValidator";
import { AnnotationMixin } from "../../../mixins/AnnotationMixin";
import RegionsMixin from "../../../mixins/Regions";
import { SyncableMixin } from "../../../mixins/Syncable";
import { ParagraphsRegionModel } from "../../../regions/ParagraphsRegion";
import Utils from "../../../utils";
import { parseValue } from "../../../utils/data";
import { FF_DEV_2669, FF_DEV_2918, FF_LSDV_E_278, isFF } from "../../../utils/feature-flags";
import messages from "../../../utils/messages";
import { clamp, isDefined, isValidObjectURL } from "../../../utils/utilities";
import ObjectBase from "../Base";
import styles from "./Paragraphs.module.scss";
import { ff } from "@humansignal/core";
const isSyncedBuffering = ff.isActive(ff.FF_SYNCED_BUFFERING);
/**
* The `Paragraphs` tag displays paragraphs of text on the labeling interface. Use to label dialogue transcripts for NLP and NER projects.
* The `Paragraphs` tag expects task data formatted as an array of objects like the following:
* [{ $nameKey: "Author name", $textKey: "Text" }, ... ]
*
* Use with the following data types: text.
* @example
*
*
*
*
*
*
*
*
* @example
*
*
*
*
*
*
*
*
*
*
*
* @name Paragraphs
* @regions ParagraphsRegion
* @meta_title Paragraph Tags for Paragraphs
* @meta_description Customize Label Studio with the Paragraphs tag to annotate paragraphs for NLP and NER machine learning and data science projects.
* @param {string} name - Name of the element
* @param {string} value - Data field containing the paragraph content
* @param {json|url} [valueType=json] - Whether the data is stored directly in uploaded JSON data or needs to be loaded from a URL
* @param {string} [audioUrl] - Audio to sync phrases with
* @param {string} [sync] - Object name to sync with
* @param {boolean} [showPlayer=false] - Whether to show audio player above the paragraphs. Ignored if sync object is audio
* @param {no|yes} [saveTextResult=yes] - Whether to store labeled text along with the results. By default, doesn't store text for `valueType=url`
* @param {none|dialogue} [layout=none] - Whether to use a dialogue-style layout or not
* @param {string} [nameKey=author] - The key field to use for name
* @param {string} [textKey=text] - The key field to use for the text
* @param {boolean} [contextScroll=false] - Turn on contextual scroll mode
*/
const TagAttrs = types.model("ParagraphsModel", {
value: types.maybeNull(types.string),
valuetype: types.optional(types.enumeration(["json", "url"]), () => (window.LS_SECURE_MODE ? "url" : "json")),
audiourl: types.maybeNull(types.string),
showplayer: false,
highlightcolor: types.maybeNull(types.string),
showlabels: types.optional(types.boolean, false),
layout: types.optional(types.enumeration(["none", "dialogue"]), "none"),
// @todo add `valueType=url` to Paragraphs and make autodetection of `savetextresult`
savetextresult: types.optional(types.enumeration(["none", "no", "yes"]), () =>
window.LS_SECURE_MODE ? "no" : "yes",
),
namekey: types.optional(types.string, "author"),
textkey: types.optional(types.string, "text"),
contextscroll: types.optional(types.boolean, false),
});
const Model = types
.model("ParagraphsModel", {
type: "paragraphs",
_update: types.optional(types.number, 1),
})
.views((self) => ({
get hasStates() {
const states = self.states();
return states && states.length > 0;
},
get store() {
return getRoot(self);
},
get audio() {
if (!self.audiourl) return null;
if (self.audiourl[0] === "$") {
const store = getRoot(self);
const val = self.audiourl.substr(1);
return store.task.dataObj[val];
}
return self.audiourl;
},
layoutStyles(data) {
if (self.layout === "dialogue") {
const seed = data[self.namekey];
const color = ColorScheme.make_color({ seed })[0];
if (isFF(FF_LSDV_E_278)) {
return {
phrase: {
"--highlight-color": color,
"--background-color": "#FFF",
},
name: { color },
inactive: {
phrase: {
"--highlight-color": Utils.Colors.convertToRGBA(color, 0.4),
"--background-color": "#FAFAFA",
},
name: { color: Utils.Colors.convertToRGBA(color, 0.9) },
},
};
}
return {
phrase: { backgroundColor: Utils.Colors.convertToRGBA(color, 0.25) },
};
}
return {};
},
get layoutClasses() {
if (self.layout === "dialogue") {
return {
phrase: styles.phrase,
name: styles.dialoguename,
text: styles.dialoguetext,
};
}
return {
phrase: styles.phrase,
name: styles.name,
text: styles.text,
};
},
states() {
return self.annotation.toNames.get(self.name);
},
activeStates() {
const states = self.states();
return states && states.filter((s) => s.isSelected && s._type === "paragraphlabels");
},
isVisibleForAuthorFilter(data) {
if (!isFF(FF_DEV_2669)) return true;
return !self.filterByAuthor.length || self.filterByAuthor.includes(data[self.namekey]);
},
}));
const PlayableAndSyncable = types
.model()
.volatile(() => ({
_value: null,
filterByAuthor: [],
searchAuthor: "",
playingId: -1,
playing: false, // just internal state for UI
audioRef: createRef(),
audioDuration: null,
audioFrameHandler: null,
_viewRef: null, // Add this line
}))
.views((self) => ({
/**
* All regions indices that are active at the given time.
* @param {number} time
* @returns {Array}
*/
regionIndicesByTime(time) {
const indices = [];
self._value?.forEach(({ start, duration, end }, idx) => {
if (start === undefined) return false;
if (start > time) return false;
if (duration === undefined && end === undefined) indices.push(idx);
else if ((end ?? start + duration) > time) indices.push(idx);
});
return indices;
},
/**
* Returns regions start and end times.
* Memoized and with no external dependencies, so will be computed only once.
* @returns {Array<{start: number, end: number}>}
*/
get regionsStartEnd() {
if (!self.audioDuration) return [];
return self._value?.map((value) => {
if (value.start === undefined) return {};
const start = clamp(value.start ?? 0, 0, self.audioDuration);
const _end = value.duration ? start + value.duration : (value.end ?? self.audioDuration);
const end = clamp(_end, start, self.audioDuration);
return { start, end };
});
},
get regionsValues() {
return Object.values(self.regionsStartEnd);
},
}))
.actions((self) => ({
/**
* Wrapper to always send important data during sync
* @param {string} event
* @param {object} data
*/
triggerSync(event, data) {
const audio = self.audioRef.current;
if (!audio) return;
self.syncSend(
{
playing: !audio.paused,
time: audio.currentTime,
...data,
},
event,
);
},
triggerSyncBuffering(isBuffering) {
if (!self.audioRef?.current) return;
if (!isSyncedBuffering) return;
const playing = self.wasPlayingBeforeBuffering;
self.triggerSync("buffering", {
buffering: isBuffering,
playing,
});
},
registerSyncHandlers() {
self.syncHandlers.set("pause", isSyncedBuffering ? self.handleSyncPause : self.stopNow);
self.syncHandlers.set("play", self.handleSyncPlay);
self.syncHandlers.set("seek", self.handleSyncPlay);
self.syncHandlers.set("speed", self.handleSyncSpeed);
if (isSyncedBuffering) {
self.syncHandlers.set("buffering", self.handleSyncBuffering);
}
},
handleSyncBuffering({ playing, ...data }) {
const audio = self.audioRef.current;
audio.currentTime = data.time;
self.isBuffering = self.syncManager?.isBuffering;
if (data.buffering) {
self.wasPlayingBeforeBuffering = playing;
audio?.pause();
self.playing = false;
}
if (!self.isBuffering && !data.buffering) {
if (playing) {
audio?.play();
self.playing = true;
self.trackPlayingId();
}
}
},
handleSyncPlay({ time, playing }, event) {
const audio = self.audioRef.current;
if (!audio) return;
const isBuffering = self.syncManager?.isBuffering;
audio.currentTime = time;
// Normal logic when no buffering
if (!isSyncedBuffering || (!isBuffering && isDefined(playing))) {
// so we are changing time inside current region only
const isPaused = isSyncedBuffering ? !self.wasPlayingBeforeBuffering : audio.paused;
if (isPaused && playing) {
self.play();
} else if (isSyncedBuffering && !isPaused && !playing) {
// some times video can trigger `seek` event with `playing=false` and we need to pause at this case
self.stopNow();
} else {
self.trackPlayingId();
}
}
// during the buffering only these events have real `playing` values (in other cases it's paused all the time)
if (["play", "pause"].indexOf(event) > -1) {
self.wasPlayingBeforeBuffering = playing;
}
},
handleSyncPause({ playing }, event) {
if (event === "pause") {
self.wasPlayingBeforeBuffering = false;
}
const isBuffering = self.syncManager?.isBuffering;
if (!isSyncedBuffering || (!isBuffering && isDefined(playing))) {
self.stopNow();
}
},
handleSyncSpeed({ speed }) {
const audio = self.audioRef.current;
if (audio) audio.playbackRate = speed;
},
syncMuted(muted) {
const audio = self.audioRef.current;
if (audio) audio.muted = muted;
},
}))
.actions((self) => ({
handleAudioLoaded(e) {
const audio = e.target;
self.audioDuration = audio.duration;
},
reset() {
self.playingId = -1;
if (self.audioFrameHandler) {
cancelAnimationFrame(self.audioFrameHandler);
self.audioFrameHandler = null;
}
},
stopNow() {
const audio = self.audioRef.current;
if (!audio) return;
if (audio.paused) return;
audio.pause();
self.playing = false;
self.triggerSync("pause", isSyncedBuffering ? { playing: false } : undefined);
},
/**
* Audio can be seeked to another time or speed can be changed,
* so we need to check if we already reached the end of current region
* and stop if needed.
* Runs timer to check this every frame.
*/
stopAtTheEnd() {
const audio = self.audioRef.current;
if (!audio) return;
if (audio.paused) return;
const { end } = self.regionsStartEnd[self.playingId] ?? {};
if (audio.currentTime < end) {
self.audioFrameHandler = requestAnimationFrame(self.stopAtTheEnd);
return;
}
self.stopNow();
self.reset();
},
trackPlayingId() {
if (!isSyncedBuffering) {
self._trackPlayingId();
return;
}
if (self.audioFrameHandler) cancelAnimationFrame(self.audioFrameHandler);
self.audioFrameHandler = requestAnimationFrame(self._trackPlayingId);
},
_trackPlayingId() {
if (self.audioFrameHandler) cancelAnimationFrame(self.audioFrameHandler);
const audio = self.audioRef.current;
const currentTime = audio?.currentTime;
const endTime = audio?.duration;
if (!isDefined(currentTime) || !isDefined(endTime) || currentTime >= endTime) {
self.reset();
return;
}
const regions = self.regionsValues;
self.playingId = regions.findIndex(({ start, end }) => {
return currentTime >= start && currentTime < end;
});
const isPlaying = isSyncedBuffering ? self.playing && !self.isBuffering : !audio.paused;
if (isPlaying) {
self.audioFrameHandler = requestAnimationFrame(self._trackPlayingId);
}
},
playAny() {
const audio = self.audioRef?.current;
if (!isDefined(audio)) return;
const isPaused = audio.paused;
if (isPaused) {
audio.play();
self.triggerSync("play", isSyncedBuffering ? { playing: true } : undefined);
}
self.playing = true;
self.trackPlayingId();
},
play(idx) {
self.wasPlayingBeforeBuffering = true;
if (!isDefined(idx)) {
self.playAny();
return;
}
const { start, end } = self.regionsStartEnd[idx] ?? {};
const audio = self.audioRef?.current;
if (!isDefined(audio) || !isDefined(start) || !isDefined(end)) return;
const isPlaying = !audio.paused;
const currentId = self.playingId;
if (isPlaying && currentId === idx) {
self.wasPlayingBeforeBuffering = false;
self.stopNow();
return;
}
if (idx !== currentId) {
audio.currentTime = start;
}
audio.play();
self.playing = true;
self.playingId = idx;
self.triggerSync("play", isSyncedBuffering ? { playing: true } : undefined);
self.trackPlayingId();
},
handleBuffering(isBuffering) {
if (!isSyncedBuffering) return;
if (self.syncManager?.isBufferingOrigin(self.name) === isBuffering) return;
const isAlreadyBuffering = self.syncManager?.isBuffering;
const isLastCauseOfBuffering =
self.syncManager?.bufferingOrigins.size === 1 && self.syncManager?.isBufferingOrigin(self.name);
const willStartBuffering = !isAlreadyBuffering && isBuffering;
const willStopBuffering = isLastCauseOfBuffering && !isBuffering;
const audio = self.audioRef?.current;
if (willStopBuffering) {
if (self.wasPlayingBeforeBuffering) {
audio?.play();
}
}
self.triggerSyncBuffering(isBuffering);
// The real value, relevant for all medias synced together we have only after triggering the buffering event
self.isBuffering = self.syncManager?.isBuffering;
if (willStartBuffering) {
audio?.pause();
}
if (willStopBuffering && self.wasPlayingBeforeBuffering) {
self.trackPlayingId();
}
},
}))
.actions((self) => ({
setAuthorSearch(value) {
self.searchAuthor = value;
},
setAuthorFilter(value) {
self.filterByAuthor = value;
},
seekToPhrase(idx) {
if (!isDefined(idx) || !self._value || idx < 0 || idx >= self._value.length) return;
const phrase = self._value[idx];
if (self.playingId === idx) {
return;
}
// Check if audio is currently playing
const audio = self.audioRef?.current;
const isAudioPlaying = audio && !audio.paused;
// If audio is playing and phrase has timing data, use play() method for proper transition
if (isAudioPlaying && phrase.start !== undefined) {
self.play(idx);
return;
}
// Always set playingId for visual selection, regardless of audio
self.playingId = idx;
// Only handle audio/sync logic if audio timing data exists
if (phrase.start !== undefined) {
if (self.syncSend) {
self.syncSend({ time: phrase.start, playing: false }, "seek");
} else {
if (audio) {
audio.currentTime = phrase.start;
}
}
}
},
setViewRef(ref) {
self._viewRef = ref;
},
selectPhraseText(phraseIndex) {
self._viewRef?.selectText?.(phraseIndex);
},
selectAndAnnotatePhrase(phraseIndex) {
// Select text and create annotation
self.selectPhraseText(phraseIndex);
self._viewRef?.createAnnotationForPhrase?.(phraseIndex);
},
selectAllAndAnnotateCurrentPhrase() {
if (!self._value || self._value.length === 0) return;
const idx = Math.max(0, self.playingId);
self.selectAndAnnotatePhrase(idx);
},
goToNextPhrase() {
if (!self._value || self._value.length === 0) return;
const currentIdx = self.playingId >= 0 ? self.playingId : -1;
const nextIdx = (currentIdx + 1) % self._value.length;
self.seekToPhrase(nextIdx);
},
goToPreviousPhrase() {
if (!self._value || self._value.length === 0) return;
const currentIdx = Math.max(0, self.playingId);
const prevIdx = (currentIdx - 1 + self._value.length) % self._value.length;
self.seekToPhrase(prevIdx);
},
}));
const ParagraphsLoadingModel = types.model().actions((self) => ({
needsUpdate() {
self._update = self._update + 1;
},
updateValue(store) {
const value = parseValue(self.value, store.task.dataObj);
if (self.valuetype === "url") {
const url = value;
if (!isValidObjectURL(url, true)) {
const message = [];
if (url) {
message.push(`URL (${url}) is not valid.`);
message.push('You should not put data directly into your task if you use valuetype="url".');
} else {
message.push(`URL is empty, check ${value} in data JSON.`);
}
if (window.LS_SECURE_MODE) message.unshift('In SECURE MODE valuetype set to "url" by default.');
store.annotationStore.addErrors([errorBuilder.generalError(message.join("\n"))]);
self.setRemoteValue("");
return;
}
fetch(url)
.then((res) => {
if (!res.ok) throw new Error(`${res.status} ${res.statusText}`);
return res.json();
})
.then(self.setRemoteValue)
.catch((e) => {
const message = messages.ERR_LOADING_HTTP({ attr: self.value, error: String(e), url });
store.annotationStore.addErrors([errorBuilder.generalError(message)]);
self.setRemoteValue("");
});
} else {
self.setRemoteValue(value);
}
},
setRemoteValue(val) {
const errors = [];
if (!Array.isArray(val)) {
errors.push("Provided data is not an array");
} else {
if (!(self.namekey in val[0])) {
errors.push(`"${self.namekey}" field not found in task data; check your nameKey parameter`);
}
if (!(self.textkey in val[0])) {
errors.push(`"${self.textkey}" field not found in task data; check your textKey parameter`);
}
}
if (errors.length) {
const general = [
`Task data (provided as ${self.value}) has wrong format.
`,
"It should be an array of objects with fields,",
'defined by nameKey ("author" by default)',
'and textKey ("text" by default)',
].join(" ");
self.store.annotationStore.addErrors([
errorBuilder.generalError(`${general}${errors.map((error) => `- ${error}
`).join("")}
`),
]);
return;
}
const contextScroll = isFF(FF_LSDV_E_278) && self.contextscroll;
const value = contextScroll
? val.sort((a, b) => {
if (!a.start) return 1;
if (!b.start) return -1;
const aEnd = a.end ? a.end : a.start + a.duration || 0;
const bEnd = b.end ? b.end : b.start + b.duration || 0;
if (a.start === b.start) return aEnd - bEnd;
return a.start - b.start;
})
: val;
self._value = value;
self.needsUpdate();
},
createRegion(p) {
const r = ParagraphsRegionModel.create({
pid: p.id,
...p,
});
r._range = p._range;
self.regions.push(r);
self.annotation.addRegion(r);
return r;
},
addRegions(ranges) {
const areas = [];
const states = self.getAvailableStates();
if (states.length === 0) return;
const [control, ...rest] = states;
const labels = { [control.valueType]: control.selectedValues() };
for (const range of ranges) {
const area = ff.isActive(ff.FF_MULTIPLE_LABELS_REGIONS)
? self.annotation.createResult(range, labels, control, self, false, rest)
: self.annotation.createResult(range, labels, control, self, false);
area.setText(range.text);
area.notifyDrawingFinished();
area._range = range._range;
areas.push(area);
}
return areas;
},
addRegion(range) {
if (isFF(FF_DEV_2918)) {
return self.addRegions([range])[0];
}
const states = self.getAvailableStates();
if (states.length === 0) return;
const [control, ...rest] = states;
const labels = { [control.valueType]: control.selectedValues() };
const area = ff.isActive(ff.FF_MULTIPLE_LABELS_REGIONS)
? self.annotation.createResult(range, labels, control, self, false, rest)
: self.annotation.createResult(range, labels, control, self, false);
area.setText(range.text);
area.notifyDrawingFinished();
area._range = range._range;
return area;
},
}));
const paragraphModelMixins = [
RegionsMixin,
TagAttrs,
SyncableMixin,
ObjectBase,
AnnotationMixin,
Model,
PlayableAndSyncable,
ParagraphsLoadingModel,
].filter(Boolean);
export const ParagraphsModel = types.compose("ParagraphsModel", ...paragraphModelMixins);