import insertAfter from "insert-after";
|
import * as Checkers from "./utilities";
|
import sanitizeHTML from "sanitize-html";
|
import Canvas from "./canvas";
|
import { cn } from "./bem";
|
|
// fast way to change labels visibility for all text regions
|
function toggleLabelsAndScores(show) {
|
const toggleInDocument = (document) => {
|
const els = document.getElementsByClassName("htx-highlight");
|
|
Array.from(els).forEach((el) => {
|
// labels presence controlled by explicit `showLabels` in the config
|
if (el.classList.contains("htx-manual-label")) return;
|
|
if (show) el.classList.remove("htx-no-label");
|
else el.classList.add("htx-no-label");
|
});
|
};
|
const richtextClassName = cn("htx-richtext").toClassName();
|
|
toggleInDocument(document);
|
document
|
.querySelectorAll(`iframe.${richtextClassName}`)
|
.forEach((iframe) => toggleInDocument(iframe.contentWindow.document));
|
}
|
|
const labelWithCSS = (() => {
|
const cache = {};
|
|
return (node, { index, labels, score }) => {
|
const labelsStr = labels ? labels.join(",") : "";
|
const labelText = [index, labelsStr].filter(Boolean).join(":");
|
const clsName = Checkers.hashCode(labelText + score);
|
|
let cssCls = `htx-label-${clsName}`;
|
|
cssCls = cssCls.toLowerCase();
|
|
if (cssCls in cache) return cache[cssCls];
|
|
node.setAttribute("data-labels", labelsStr);
|
|
const resSVG = Canvas.labelToSVG({ label: labelText, score });
|
const svgURL = `url(${resSVG})`;
|
|
createClass(`.${cssCls}:after`, `content:${svgURL}`);
|
|
cache[clsName] = true;
|
|
return cssCls;
|
};
|
})();
|
|
// work directly with the html tree
|
function createClass(name, rules) {
|
const style = document.createElement("style");
|
|
style.type = "text/css";
|
document.getElementsByTagName("head")[0].appendChild(style);
|
if (!(style.sheet || {}).insertRule) (style.styleSheet || style.sheet).addRule(name, rules);
|
else style.sheet.insertRule(`${name}{${rules}}`, 0);
|
}
|
|
function documentForward(node) {
|
if (node.firstChild) return node.firstChild;
|
|
while (!node.nextSibling) {
|
node = node.parentNode;
|
if (!node) return null;
|
}
|
|
return node.nextSibling;
|
}
|
|
function isTextNode(node) {
|
return node.nodeType === Node.TEXT_NODE;
|
}
|
|
function firstLeaf(node) {
|
while (node.hasChildNodes()) node = node.firstChild;
|
return node;
|
}
|
|
/* Find the last leaf node. */
|
function lastLeaf(node) {
|
while (node.hasChildNodes()) node = node.lastChild;
|
|
return node;
|
}
|
|
function getNextNode(node) {
|
if (node.firstChild) return node.firstChild;
|
while (node) {
|
if (node.nextSibling) return node.nextSibling;
|
node = node.parentNode;
|
}
|
}
|
|
export function isValidTreeNode(node, commonAncestor) {
|
while (node) {
|
if (commonAncestor && node === commonAncestor) return true;
|
if (node.nodeType === Node.ELEMENT_NODE && node.dataset.skipNode === "true") return false;
|
node = node.parentNode;
|
}
|
return true;
|
}
|
|
export function getNodesInRange(range) {
|
const start = range.startContainer;
|
const end = range.endContainer;
|
const commonAncestor = range.commonAncestorContainer;
|
const nodes = [];
|
let node;
|
|
// walk parent nodes from start to common ancestor
|
for (node = start.parentNode; node; node = node.parentNode) {
|
if (isValidTreeNode(node, commonAncestor)) nodes.push(node);
|
if (node === commonAncestor) break;
|
}
|
nodes.reverse();
|
|
// walk children and siblings from start until end is found
|
for (node = start; node; node = getNextNode(node)) {
|
if (isValidTreeNode(node, commonAncestor)) nodes.push(node);
|
if (node === end) break;
|
}
|
|
return nodes;
|
}
|
|
export function getTextNodesInRange(range) {
|
return getNodesInRange(range).filter((n) => isTextNode(n));
|
}
|
|
function documentReverse(node) {
|
if (node.lastChild) return node.lastChild;
|
|
while (!node.previousSibling) {
|
node = node.parentNode;
|
if (!node) return null;
|
}
|
|
return node.previousSibling;
|
}
|
|
/**
|
* Split text node into two nodes following each other
|
* @param {Text} node
|
* @param {number} offset
|
*/
|
function splitText(node, offset) {
|
const tail = node.cloneNode(false);
|
|
tail.deleteData(0, offset);
|
node.deleteData(offset, node.length - offset);
|
return insertAfter(tail, node);
|
}
|
|
function normalizeBoundaries(range) {
|
let { startContainer, startOffset, endContainer, endOffset } = range;
|
let node;
|
let next;
|
let last;
|
|
// Move the start container to the last leaf before any sibling boundary,
|
// guaranteeing that any children of the container are within the range.
|
if (startContainer.childNodes.length && startOffset > 0) {
|
startContainer = lastLeaf(startContainer.childNodes[startOffset - 1]);
|
startOffset = startContainer.length || startContainer.childNodes.length;
|
}
|
|
// Move the end container to the first leaf after any sibling boundary,
|
// guaranteeing that any children of the container are within the range.
|
if (endOffset < endContainer.childNodes.length) {
|
endContainer = firstLeaf(endContainer.childNodes[endOffset]);
|
endOffset = 0;
|
}
|
|
// Any TextNode in the traversal is valid unless excluded by the offset.
|
function isTextNodeInRange(node) {
|
if (!isTextNode(node)) return false;
|
if (node === startContainer && startOffset > 0) return false;
|
if (node === endContainer && endOffset === 0) return false;
|
return true;
|
}
|
|
// Find the start TextNode.
|
// The guarantees above provide that a document order traversal visits every
|
// Node in the Range before visiting the last leaf of the end container.
|
node = startContainer;
|
next = (node) => (node === last ? null : documentForward(node));
|
last = lastLeaf(endContainer);
|
while (node && !isTextNodeInRange(node)) node = next(node);
|
const start = node;
|
|
// Find the end TextNode.
|
// Similarly, a reverse document order traversal visits every Node in the
|
// Range before visiting the first leaf of the start container.
|
node = endContainer;
|
next = (node) => (node === last ? null : documentReverse(node));
|
last = firstLeaf(startContainer);
|
while (node && !isTextNodeInRange(node)) node = next(node);
|
const end = node;
|
|
range.setStart(start, 0);
|
range.setEnd(end, end.length);
|
}
|
|
function highlightRange(normedRange, cssClass, cssStyle) {
|
if (typeof cssClass === "undefined" || cssClass === null) {
|
cssClass = "htx-annotation";
|
}
|
|
const textNodes = getTextNodesInRange(normedRange._range);
|
|
const white = /^\s*$/;
|
|
const nodes = textNodes; // normedRange.textNodes(),
|
|
let start = 0;
|
|
if (normedRange._range.startOffset === nodes[start].length) start++;
|
|
let nlen = nodes.length;
|
|
if (nlen > 1 && nodes[nodes.length - 1].length !== normedRange._range.endOffset) nlen = nlen - 1;
|
|
const results = [];
|
|
for (let i = start, len = nlen; i < len; i++) {
|
const node = nodes[i];
|
|
if (!white.test(node.nodeValue)) {
|
const hl = window.document.createElement("span");
|
|
hl.style.backgroundColor = cssStyle.backgroundColor;
|
|
hl.className = cssClass;
|
node.parentNode.replaceChild(hl, node);
|
hl.appendChild(node);
|
|
results.push(hl);
|
}
|
}
|
|
return results;
|
}
|
|
/**
|
*
|
* @param {Range} range
|
*/
|
function splitBoundaries(range) {
|
let { startContainer, endContainer } = range;
|
const { startOffset, endOffset } = range;
|
|
if (isTextNode(endContainer)) {
|
if (endOffset > 0 && endOffset < endContainer.length) {
|
endContainer = splitText(endContainer, endOffset);
|
range.setEnd(endContainer, 0);
|
}
|
}
|
|
if (isTextNode(startContainer)) {
|
if (startOffset > 0 && startOffset < startContainer.length) {
|
if (startContainer === endContainer) {
|
startContainer = splitText(startContainer, startOffset);
|
range.setEnd(startContainer, endOffset - startOffset);
|
} else {
|
startContainer = splitText(startContainer, startOffset);
|
}
|
range.setStart(startContainer, 0);
|
}
|
}
|
}
|
|
const toGlobalOffset = (container, element, len) => {
|
let pos = 0;
|
const count = (node) => {
|
if (node === element) {
|
return pos;
|
}
|
if (node.nodeName === "#text") pos = pos + node.length;
|
if (node.nodeName === "BR") pos = pos + 1;
|
|
for (let i = 0; i <= node.childNodes.length; i++) {
|
const n = node.childNodes[i];
|
|
if (n) {
|
const res = count(n);
|
|
if (res !== undefined) return res;
|
}
|
}
|
};
|
|
return len + count(container);
|
};
|
|
const mainOffsets = (element) => {
|
const range = window.getSelection().getRangeAt(0).cloneRange();
|
let start = range.startOffset;
|
let end = range.endOffset;
|
|
let passedStart = false;
|
let passedEnd = false;
|
|
const traverse = (node) => {
|
if (node.nodeName === "#text") {
|
if (node !== range.startContainer && !passedStart) start = start + node.length;
|
if (node === range.startContainer) passedStart = true;
|
|
if (node !== range.endContainer && !passedEnd) end = end + node.length;
|
if (node === range.endContainer) passedEnd = true;
|
}
|
|
if (node.nodeName === "BR") {
|
if (!passedStart) start = start + 1;
|
|
if (!passedEnd) end = end + 1;
|
}
|
|
if (node.childNodes.length > 0) {
|
for (let i = 0; i <= node.childNodes.length; i++) {
|
const n = node.childNodes[i];
|
|
if (n) {
|
const res = traverse(n);
|
|
if (res) return res;
|
}
|
}
|
}
|
};
|
|
traverse(element);
|
|
return { start, end };
|
};
|
|
const findIdxContainer = (el, globidx) => {
|
let len = globidx;
|
|
const traverse = (node) => {
|
if (!node) return;
|
|
if (node.nodeName === "#text") {
|
if (len - node.length <= 0) return node;
|
len = len - node.length;
|
} else if (node.nodeName === "BR") {
|
len = len - 1;
|
} else if (node.childNodes.length > 0) {
|
for (let i = 0; i <= node.childNodes.length; i++) {
|
const n = node.childNodes[i];
|
|
if (n) {
|
const res = traverse(n);
|
|
if (res) return res;
|
}
|
}
|
}
|
};
|
|
const node = traverse(el);
|
|
return { node, len };
|
};
|
|
function removeSpans(spans) {
|
const norm = [];
|
|
if (spans) {
|
spans.forEach((span) => {
|
while (span.firstChild) span.parentNode.insertBefore(span.firstChild, span);
|
|
norm.push(span.parentNode);
|
span.parentNode.removeChild(span);
|
});
|
}
|
|
norm.forEach((n) => n.normalize());
|
}
|
|
function moveStylesBetweenHeadTags(srcHead, destHead) {
|
const rulesByStyleId = {};
|
const fragment = document.createDocumentFragment();
|
|
for (let i = 0; i < srcHead.children.length; ) {
|
const style = srcHead.children[i];
|
|
if (style?.tagName !== "STYLE") {
|
i++;
|
continue;
|
}
|
|
const styleSheet = style.sheet;
|
|
// Sometimes rules are not accessible
|
try {
|
const rules = styleSheet.rules;
|
|
const cssTexts = (rulesByStyleId[style.id] = []);
|
|
for (let k = 0; k < rules.length; k++) {
|
cssTexts.push(rules[k].cssText);
|
}
|
} finally {
|
fragment.appendChild(style);
|
}
|
}
|
destHead.appendChild(fragment);
|
applyHighlightStylesToDoc(destHead.ownerDocument, rulesByStyleId);
|
}
|
|
function applyHighlightStylesToDoc(destDoc, rulesByStyleId) {
|
for (let i = 0; i < destDoc.styleSheets.length; i++) {
|
const styleSheet = destDoc.styleSheets[i];
|
const style = styleSheet.ownerNode;
|
|
if (!style.id) continue;
|
// Sometimes rules are not accessible
|
try {
|
const rules = rulesByStyleId[style.id];
|
|
if (!rules) continue;
|
for (let k = 0; k < rules.length; k++) {
|
style.sheet.insertRule(rules[k]);
|
}
|
} catch {}
|
}
|
}
|
|
/**
|
* Checks if element or one of its descendants match given selector
|
* @param {HTMLElement} element Element to match
|
* @param {string} selector CSS selector
|
*/
|
export const matchesSelector = (element, selector) => {
|
return element.matches(selector) || element.closest(selector) !== null;
|
};
|
|
/**
|
* Find a node by xpath
|
* @param {string} xpath
|
* @param {Node} root
|
*/
|
export const findByXpath = (xpath, root = document) => {
|
if (root !== document && xpath[0] !== ".") {
|
xpath = `.${xpath}`;
|
}
|
|
return document.evaluate(xpath, root, null, XPathResult.ANY_TYPE, null).iterateNext();
|
};
|
|
export const htmlEscape = (string) => {
|
const matchHtmlRegExp = /["'&<>]/;
|
const str = `${string}`;
|
const match = matchHtmlRegExp.exec(str);
|
|
if (!match) {
|
return str;
|
}
|
|
let escape;
|
let html = "";
|
let index = 0;
|
let lastIndex = 0;
|
|
for (index = match.index; index < str.length; index++) {
|
switch (str.charCodeAt(index)) {
|
case 34: // "
|
escape = """;
|
break;
|
case 38: // &
|
escape = "&";
|
break;
|
case 39: // '
|
escape = "'";
|
break;
|
case 60: // <
|
escape = "<";
|
break;
|
case 62: // >
|
escape = ">";
|
break;
|
default:
|
continue;
|
}
|
|
if (lastIndex !== index) {
|
html += str.substring(lastIndex, index);
|
}
|
|
lastIndex = index + 1;
|
html += escape;
|
}
|
|
return lastIndex !== index ? html + str.substring(lastIndex, index) : html;
|
};
|
|
function findNodeAt(context, at) {
|
for (let node = context.firstChild, l = 0; node; ) {
|
if (node.textContent.length + l >= at) {
|
if (!node.firstChild) return [node, at - l];
|
node = node.firstChild;
|
} else {
|
l += node.textContent.length;
|
node = node.nextSibling;
|
}
|
}
|
}
|
|
/**
|
* Sanitize html from scripts and iframes
|
* @param {string} html
|
* @returns {string}
|
*/
|
function sanitizeHtml(html = []) {
|
if (!html) return "";
|
|
// Whitelist of allowed iframe domains - easily extensible for future additions
|
const ALLOWED_IFRAME_DOMAINS = [
|
"www.youtube.com",
|
"youtube.com",
|
"www.youtube-nocookie.com",
|
"youtube-nocookie.com",
|
"youtu.be", // YouTube's shortened URL format
|
];
|
|
// Helper function to validate if iframe src is from an allowed domain
|
const isAllowedIframeSrc = (src) => {
|
if (!src) {
|
return false;
|
}
|
|
try {
|
const url = new URL(src);
|
// Only allow HTTPS for security
|
if (url.protocol !== "https:") {
|
return false;
|
}
|
// Check if hostname matches any allowed domain
|
const isAllowed = ALLOWED_IFRAME_DOMAINS.includes(url.hostname);
|
return isAllowed;
|
} catch (e) {
|
// Invalid URL format
|
return false;
|
}
|
};
|
|
const disallowedAttributes = [
|
"onauxclick",
|
"onafterprint",
|
"onbeforematch",
|
"onbeforeprint",
|
"onbeforeunload",
|
"onbeforetoggle",
|
"onblur",
|
"oncancel",
|
"oncanplay",
|
"oncanplaythrough",
|
"onchange",
|
"onclick",
|
"onclose",
|
"oncontextlost",
|
"oncontextmenu",
|
"oncontextrestored",
|
"oncopy",
|
"oncuechange",
|
"oncut",
|
"ondblclick",
|
"ondrag",
|
"ondragend",
|
"ondragenter",
|
"ondragleave",
|
"ondragover",
|
"ondragstart",
|
"ondrop",
|
"ondurationchange",
|
"onemptied",
|
"onended",
|
"onerror",
|
"onfocus",
|
"onformdata",
|
"onhashchange",
|
"oninput",
|
"oninvalid",
|
"onkeydown",
|
"onkeypress",
|
"onkeyup",
|
"onlanguagechange",
|
"onload",
|
"onloadeddata",
|
"onloadedmetadata",
|
"onloadstart",
|
"onmessage",
|
"onmessageerror",
|
"onmousedown",
|
"onmouseenter",
|
"onmouseleave",
|
"onmousemove",
|
"onmouseout",
|
"onmouseover",
|
"onmouseup",
|
"onoffline",
|
"ononline",
|
"onpagehide",
|
"onpageshow",
|
"onpaste",
|
"onpause",
|
"onplay",
|
"onplaying",
|
"onpopstate",
|
"onprogress",
|
"onratechange",
|
"onreset",
|
"onresize",
|
"onrejectionhandled",
|
"onscroll",
|
"onscrollend",
|
"onsecuritypolicyviolation",
|
"onseeked",
|
"onseeking",
|
"onselect",
|
"onslotchange",
|
"onstalled",
|
"onstorage",
|
"onsubmit",
|
"onsuspend",
|
"ontimeupdate",
|
"ontoggle",
|
"onunhandledrejection",
|
"onunload",
|
"onvolumechange",
|
"onwaiting",
|
"onwheel",
|
];
|
|
const disallowedTags = {
|
script: true,
|
iframe: true,
|
};
|
|
const result = sanitizeHTML(html, {
|
allowedTags: false,
|
allowedAttributes: false,
|
disallowedTagsMode: "discard",
|
allowVulnerableTags: true,
|
exclusiveFilter(frame) {
|
// For iframes, only block if NOT from whitelisted domain
|
if (frame.tag === "iframe") {
|
const src = frame.attribs?.src;
|
return !isAllowedIframeSrc(src);
|
}
|
|
// Check other disallowed tags
|
return disallowedTags[frame.tag];
|
},
|
nonTextTags: ["script", "textarea", "option", "noscript"],
|
transformTags: {
|
iframe: (tagName, attribs) => {
|
return {
|
tagName,
|
attribs,
|
};
|
},
|
"*": (tagName, attribs) => {
|
Object.keys(attribs).forEach((attr) => {
|
// If the attribute is in the disallowed list, remove it
|
if (disallowedAttributes.includes(attr)) {
|
delete attribs[attr];
|
}
|
});
|
return {
|
tagName,
|
attribs,
|
};
|
},
|
},
|
});
|
|
return result;
|
}
|
|
export {
|
toggleLabelsAndScores,
|
labelWithCSS,
|
findNodeAt,
|
removeSpans,
|
mainOffsets,
|
findIdxContainer,
|
toGlobalOffset,
|
highlightRange,
|
sanitizeHtml,
|
splitBoundaries,
|
normalizeBoundaries,
|
createClass,
|
moveStylesBetweenHeadTags,
|
applyHighlightStylesToDoc,
|
};
|