import { SampleDatasetSelect } from "@humansignal/app-common/blocks/SampleDatasetSelect/SampleDatasetSelect";
|
import { ff, formatFileSize } from "@humansignal/core";
|
import { IconCode, IconErrorAlt, IconFileUpload, IconInfoOutline, IconTrash, IconUpload } from "@humansignal/icons";
|
import { Badge } from "@humansignal/shad/components/ui/badge";
|
import { cn as scn } from "@humansignal/shad/utils";
|
import { useAtomValue } from "jotai";
|
import Input from "libs/datamanager/src/components/Common/Input/Input";
|
import { useCallback, useEffect, useReducer, useRef, useState } from "react";
|
import { useAPI } from "../../../providers/ApiProvider";
|
import { cn } from "../../../utils/bem";
|
import { unique } from "../../../utils/helpers";
|
import { sampleDatasetAtom } from "../utils/atoms";
|
import "./Import.scss";
|
import { Button, CodeBlock, SimpleCard, Spinner, Tooltip, Typography } from "@humansignal/ui";
|
import truncate from "truncate-middle";
|
import samples from "./samples.json";
|
import { importFiles } from "./utils";
|
|
const importClass = cn("upload_page");
|
const dropzoneClass = cn("dropzone");
|
|
// Constants for file display and animation
|
const FLASH_ANIMATION_DURATION = 2000; // 2 seconds
|
const FILENAME_TRUNCATE_START = 24;
|
const FILENAME_TRUNCATE_END = 24;
|
|
function flatten(nested) {
|
return [].concat(...nested);
|
}
|
|
// Keep in sync with core.settings.SUPPORTED_EXTENSIONS on the BE.
|
const supportedExtensions = {
|
text: ["txt"],
|
audio: ["wav", "mp3", "flac", "m4a", "ogg"],
|
video: ["mp4", "webm"],
|
image: ["bmp", "gif", "jpg", "jpeg", "png", "svg", "webp"],
|
html: ["html", "htm", "xml"],
|
pdf: ["pdf"],
|
structuredData: ["csv", "tsv", "json"],
|
};
|
const allSupportedExtensions = flatten(Object.values(supportedExtensions));
|
|
function getFileExtension(fileName) {
|
if (!fileName) {
|
return fileName;
|
}
|
return fileName.split(".").pop().toLowerCase();
|
}
|
|
function traverseFileTree(item, path) {
|
return new Promise((resolve) => {
|
path = path || "";
|
if (item.isFile) {
|
// Avoid hidden files
|
if (item.name[0] === ".") return resolve([]);
|
|
resolve([item]);
|
} else if (item.isDirectory) {
|
// Get folder contents
|
const dirReader = item.createReader();
|
const dirPath = `${path + item.name}/`;
|
|
dirReader.readEntries((entries) => {
|
Promise.all(entries.map((entry) => traverseFileTree(entry, dirPath)))
|
.then(flatten)
|
.then(resolve);
|
});
|
}
|
});
|
}
|
|
function getFiles(files) {
|
// @todo this can be not a files, but text or any other draggable stuff
|
return new Promise((resolve) => {
|
if (!files.length) return resolve([]);
|
if (!files[0].webkitGetAsEntry) return resolve(files);
|
|
// Use DataTransferItemList interface to access the file(s)
|
const entries = Array.from(files).map((file) => file.webkitGetAsEntry());
|
|
Promise.all(entries.map(traverseFileTree))
|
.then(flatten)
|
.then((fileEntries) => fileEntries.map((fileEntry) => new Promise((res) => fileEntry.file(res))))
|
.then((filePromises) => Promise.all(filePromises))
|
.then(resolve);
|
});
|
}
|
|
const Upload = ({ children, sendFiles }) => {
|
const [hovered, setHovered] = useState(false);
|
const onHover = (e) => {
|
e.preventDefault();
|
setHovered(true);
|
};
|
const onLeave = setHovered.bind(null, false);
|
const dropzoneRef = useRef();
|
|
const onDrop = useCallback(
|
(e) => {
|
e.preventDefault();
|
onLeave();
|
getFiles(e.dataTransfer.items).then((files) => sendFiles(files));
|
},
|
[onLeave, sendFiles],
|
);
|
|
return (
|
<div
|
id="holder"
|
className={dropzoneClass.mod({ hovered })}
|
ref={dropzoneRef}
|
onDragStart={onHover}
|
onDragOver={onHover}
|
onDragLeave={onLeave}
|
onDrop={onDrop}
|
// {...getRootProps}
|
>
|
{children}
|
</div>
|
);
|
};
|
|
const ErrorMessage = ({ error }) => {
|
if (!error) return null;
|
let extra = error.validation_errors ?? error.extra;
|
// support all possible responses
|
|
if (extra && typeof extra === "object" && !Array.isArray(extra)) {
|
extra = extra.non_field_errors ?? Object.values(extra);
|
}
|
if (Array.isArray(extra)) extra = extra.join("; ");
|
|
return (
|
<div className={importClass.elem("error")}>
|
<IconErrorAlt width="24" height="24" />
|
{error.id && `[${error.id}] `}
|
{error.detail || error.message}
|
{extra && ` (${extra})`}
|
</div>
|
);
|
};
|
|
export const ImportPage = ({
|
project,
|
sample,
|
show = true,
|
onWaiting,
|
onFileListUpdate,
|
onSampleDatasetSelect,
|
highlightCsvHandling,
|
dontCommitToProject = false,
|
csvHandling,
|
setCsvHandling,
|
addColumns,
|
openLabelingConfig,
|
}) => {
|
const [error, setError] = useState();
|
const [newlyUploadedFiles, setNewlyUploadedFiles] = useState(new Set());
|
const prevUploadedRef = useRef(new Set());
|
const api = useAPI();
|
const projectConfigured = project?.label_config !== "<View></View>";
|
const sampleConfig = useAtomValue(sampleDatasetAtom);
|
|
const processFiles = (state, action) => {
|
if (action.sending) {
|
return { ...state, uploading: [...action.sending, ...state.uploading] };
|
}
|
if (action.sent) {
|
return {
|
...state,
|
uploading: state.uploading.filter((f) => !action.sent.includes(f)),
|
};
|
}
|
if (action.uploaded) {
|
return {
|
...state,
|
uploaded: unique([...state.uploaded, ...action.uploaded], (a, b) => a.id === b.id),
|
};
|
}
|
if (action.ids) {
|
const ids = unique([...state.ids, ...action.ids]);
|
|
onFileListUpdate?.(ids);
|
return { ...state, ids };
|
}
|
return state;
|
};
|
|
const [files, dispatch] = useReducer(processFiles, {
|
uploaded: [],
|
uploading: [],
|
ids: [],
|
});
|
const showList = Boolean(files.uploaded?.length || files.uploading?.length || sample);
|
|
const loadFilesList = useCallback(
|
async (file_upload_ids) => {
|
const query = {};
|
|
if (file_upload_ids) {
|
// should be stringified array "[1,2]"
|
query.ids = JSON.stringify(file_upload_ids);
|
}
|
const files = await api.callApi("fileUploads", {
|
params: { pk: project.id, ...query },
|
});
|
|
dispatch({ uploaded: files ?? [] });
|
|
if (files?.length) {
|
dispatch({ ids: files.map((f) => f.id) });
|
}
|
return files;
|
},
|
[project?.id],
|
);
|
|
const onError = (err) => {
|
console.error(err);
|
// @todo workaround for error about input size in a wrong html format
|
if (typeof err === "string" && err.includes("RequestDataTooBig")) {
|
const message = "Imported file is too big";
|
const extra = err.match(/"exception_value">(.*)<\/pre>/)?.[1];
|
|
err = { message, extra };
|
}
|
setError(err);
|
onWaiting?.(false);
|
};
|
const onFinish = useCallback(
|
async (res) => {
|
const { could_be_tasks_list, data_columns, file_upload_ids } = res;
|
|
dispatch({ ids: file_upload_ids });
|
if (could_be_tasks_list && !csvHandling) setCsvHandling("choose");
|
onWaiting?.(false);
|
addColumns(data_columns);
|
|
await loadFilesList(file_upload_ids);
|
return res;
|
},
|
[addColumns, loadFilesList],
|
);
|
|
// Track newly uploaded files for flash animation
|
useEffect(() => {
|
const currentUploadedIds = new Set(files.uploaded.map((f) => f.id));
|
const previousUploadedIds = prevUploadedRef.current;
|
|
// Find files that were just uploaded (in current but not in previous)
|
const justUploaded = new Set([...currentUploadedIds].filter((id) => !previousUploadedIds.has(id)));
|
|
// Update the ref immediately after comparison to ensure it's available for next run
|
prevUploadedRef.current = new Set(currentUploadedIds);
|
|
// Clean up animation state for files that are no longer in the uploaded list
|
setNewlyUploadedFiles((prev) => {
|
const filtered = new Set([...prev].filter((id) => currentUploadedIds.has(id)));
|
return filtered;
|
});
|
|
// Animate newly uploaded files (including first upload)
|
if (justUploaded.size > 0) {
|
// Apply animation class immediately for better responsiveness
|
setNewlyUploadedFiles((prev) => new Set([...prev, ...justUploaded]));
|
|
// Remove animation class after animation completes (CSS handles the animation timing)
|
const timeoutId = setTimeout(() => {
|
setNewlyUploadedFiles((prev) => {
|
const updated = new Set(prev);
|
justUploaded.forEach((id) => updated.delete(id));
|
return updated;
|
});
|
}, FLASH_ANIMATION_DURATION);
|
|
// Cleanup timeout on unmount or dependency change
|
return () => clearTimeout(timeoutId);
|
}
|
}, [files.uploaded]);
|
|
const importFilesImmediately = useCallback(
|
async (files, body) => {
|
importFiles({
|
files,
|
body,
|
project,
|
onError,
|
onFinish,
|
onUploadStart: (files) => dispatch({ sending: files }),
|
onUploadFinish: (files) => dispatch({ sent: files }),
|
dontCommitToProject,
|
});
|
},
|
[project, onFinish],
|
);
|
|
const sendFiles = useCallback(
|
(files) => {
|
setError(null);
|
onWaiting?.(true);
|
files = [...files]; // they can be array-like object
|
const fd = new FormData();
|
|
for (const f of files) {
|
if (!allSupportedExtensions.includes(getFileExtension(f.name))) {
|
onError(new Error(`The filetype of file "${f.name}" is not supported.`));
|
return;
|
}
|
fd.append(f.name, f);
|
}
|
return importFilesImmediately(files, fd);
|
},
|
[importFilesImmediately],
|
);
|
|
const onUpload = useCallback(
|
(e) => {
|
sendFiles(e.target.files);
|
e.target.value = "";
|
},
|
[sendFiles],
|
);
|
|
const onLoadURL = useCallback(
|
(e) => {
|
e.preventDefault();
|
setError(null);
|
const url = urlRef.current?.value;
|
|
if (!url) {
|
return;
|
}
|
urlRef.current.value = "";
|
onWaiting?.(true);
|
const body = new URLSearchParams({ url });
|
|
importFilesImmediately([{ name: url }], body);
|
},
|
[importFilesImmediately],
|
);
|
|
const openConfig = useCallback(
|
(e) => {
|
e.preventDefault();
|
e.stopPropagation();
|
openLabelingConfig?.();
|
},
|
[openLabelingConfig],
|
);
|
|
useEffect(() => {
|
if (project?.id !== undefined) {
|
loadFilesList().then((files) => {
|
if (csvHandling) return;
|
// empirical guess on start if we have some possible tasks list/structured data problem
|
if (Array.isArray(files) && files.some(({ file }) => /\.[ct]sv$/.test(file))) {
|
setCsvHandling("choose");
|
}
|
});
|
}
|
}, [project?.id, loadFilesList]);
|
|
const urlRef = useRef();
|
|
if (!project) return null;
|
if (!show) return null;
|
|
const csvProps = {
|
name: "csv",
|
type: "radio",
|
onChange: (e) => setCsvHandling(e.target.value),
|
};
|
|
return (
|
<div className={importClass}>
|
{highlightCsvHandling && <div className={importClass.elem("csv-splash")} />}
|
<input id="file-input" type="file" name="file" multiple onChange={onUpload} style={{ display: "none" }} />
|
|
<header className="flex gap-4">
|
<form
|
className={`${importClass.elem("url-form")} inline-flex items-stretch`}
|
method="POST"
|
onSubmit={onLoadURL}
|
>
|
<Input placeholder="数据集 URL" name="url" ref={urlRef} rawClassName="h-[40px]" />
|
<Button variant="primary" look="outlined" type="submit" aria-label="添加 URL">
|
添加 URL
|
</Button>
|
</form>
|
<span>或</span>
|
<Button
|
variant="primary"
|
look="outlined"
|
type="button"
|
onClick={() => document.getElementById("file-input").click()}
|
leading={<IconUpload />}
|
aria-label="上传文件"
|
>
|
上传 {files.uploaded.length ? "更多 " : ""}文件
|
</Button>
|
{ff.isActive(ff.FF_SAMPLE_DATASETS) && (
|
<SampleDatasetSelect samples={samples} sample={sample} onSampleApplied={onSampleDatasetSelect} />
|
)}
|
<div
|
className={importClass.elem("csv-handling").mod({ highlighted: highlightCsvHandling, hidden: !csvHandling })}
|
>
|
<span>将 CSV/TSV 视为</span>
|
<label>
|
<input {...csvProps} value="tasks" checked={csvHandling === "tasks"} /> 任务列表
|
</label>
|
<label>
|
<input {...csvProps} value="ts" checked={csvHandling === "ts"} /> 时间序列或纯文本文件
|
</label>
|
</div>
|
<div className={importClass.elem("status")}>
|
{files.uploaded.length ? `已上传 ${files.uploaded.length} 个文件` : ""}
|
</div>
|
</header>
|
|
<ErrorMessage error={error} />
|
|
<main>
|
<Upload sendFiles={sendFiles} project={project}>
|
<div
|
className={scn("flex gap-4 w-full min-h-full", {
|
"justify-center": !showList,
|
})}
|
>
|
{!showList && (
|
<div className="flex gap-4 justify-center items-start w-full h-full">
|
<label htmlFor="file-input" className="w-full h-full">
|
<div className={`${dropzoneClass.elem("content")} w-full`}>
|
<IconFileUpload height="64" className={dropzoneClass.elem("icon")} />
|
<header>
|
拖放文件到此处
|
<br />
|
或点击浏览
|
</header>
|
|
<dl>
|
<dt>图像</dt>
|
<dd>{supportedExtensions.image.join(", ")}</dd>
|
<dt>音频</dt>
|
<dd>{supportedExtensions.audio.join(", ")}</dd>
|
<dt>
|
<div className="flex items-center gap-1">
|
视频
|
<Tooltip title="视频格式支持取决于您的浏览器。点击了解更多。">
|
<a
|
href="https://labelstud.io/tags/video#Video-format"
|
target="_blank"
|
rel="noopener noreferrer"
|
className="inline-flex items-center"
|
aria-label="了解有关视频格式支持的更多信息(在新标签页打开)"
|
>
|
<IconInfoOutline className="w-4 h-4 text-primary-content hover:text-primary-content-hover" />
|
</a>
|
</Tooltip>
|
</div>
|
</dt>
|
<dd>{supportedExtensions.video.join(", ")}</dd>
|
<dt>HTML / 超文本</dt>
|
<dd>{supportedExtensions.html.join(", ")}</dd>
|
<dt>文本</dt>
|
<dd>{supportedExtensions.text.join(", ")}</dd>
|
<dt>结构化数据</dt>
|
<dd>{supportedExtensions.structuredData.join(", ")}</dd>
|
<dt>PDF</dt>
|
<dd>{supportedExtensions.pdf.join(", ")}</dd>
|
</dl>
|
<div className="tips">
|
<b>重要提示:</b>
|
<ul className="mt-2 ml-4 list-disc font-normal">
|
<li>
|
我们建议即使有
|
<a
|
href="https://labelstud.io/guide/tasks.html#Import-data-from-the-Label-Studio-UI"
|
target="_blank"
|
rel="noopener noreferrer"
|
aria-label="上传限制文档(在新标签页打开)"
|
>
|
上传限制
|
</a>
|
也尽量使用
|
<a
|
href="https://labelstud.io/guide/storage.html"
|
target="_blank"
|
rel="noopener noreferrer"
|
aria-label="云存储文档(在新标签页打开)"
|
>
|
云存储
|
</a>
|
而不是直接上传。
|
</li>
|
<li>
|
对于 PDF,请使用
|
<a
|
href="https://labelstud.io/templates/multi-page-document-annotation"
|
target="_blank"
|
rel="noopener noreferrer"
|
aria-label="多图标注文档(在新标签页打开)"
|
>
|
多图标注
|
</a>
|
。JSONL 或 Parquet (仅限企业版) 文件需要云存储。
|
</li>
|
<li>
|
查看文档以了解如何
|
<a target="_blank" href="https://labelstud.io/guide/predictions.html" rel="noreferrer">
|
导入预标注数据
|
</a>
|
。
|
</li>
|
</ul>
|
</div>
|
</div>
|
</label>
|
</div>
|
)}
|
|
{showList && (
|
<div className="w-full">
|
<SimpleCard
|
title="文件"
|
className="w-full h-full"
|
contentClassName="overflow-y-auto h-[calc(100%-48px)]"
|
>
|
<table className="w-full">
|
<tbody>
|
{sample && (
|
<tr key={sample.url}>
|
<td>
|
<div className="flex items-center gap-2">
|
{sample.title}
|
<Badge variant="info" className="h-5 text-xs rounded-sm">
|
Sample
|
</Badge>
|
</div>
|
</td>
|
<td>{sample.description}</td>
|
<td>
|
<Button size="smaller" variant="negative" onClick={() => onSampleDatasetSelect(undefined)}>
|
<IconTrash className="w-4 h-4" />
|
</Button>
|
</td>
|
</tr>
|
)}
|
{files.uploaded.map((file) => {
|
const truncatedFilename = truncate(
|
file.file,
|
FILENAME_TRUNCATE_START,
|
FILENAME_TRUNCATE_END,
|
"...",
|
);
|
return (
|
<tr
|
key={file.file}
|
className={newlyUploadedFiles.has(file.id) ? importClass.elem("upload-flash") : ""}
|
>
|
<td className={importClass.elem("file-name")}>
|
<Tooltip title={file.file}>
|
<Typography variant="body" size="small" className="truncate">
|
{truncatedFilename}
|
</Typography>
|
</Tooltip>
|
</td>
|
<td>
|
<span className={importClass.elem("file-status")} />
|
</td>
|
<td className={importClass.elem("file-size")}>
|
<Typography
|
variant="body"
|
size="smaller"
|
className="text-nowrap text-neutral-content-subtle text-right"
|
>
|
{file.size ? formatFileSize(file.size) : ""}
|
</Typography>
|
</td>
|
</tr>
|
);
|
})}
|
{files.uploading.map((file, idx) => {
|
const truncatedFilename = truncate(
|
file.name,
|
FILENAME_TRUNCATE_START,
|
FILENAME_TRUNCATE_END,
|
"...",
|
);
|
return (
|
<tr key={`${idx}-${file.name}`}>
|
<td className={importClass.elem("file-name")}>
|
<Tooltip title={file.name}>
|
<Typography variant="body" size="small" className="truncate">
|
{truncatedFilename}
|
</Typography>
|
</Tooltip>
|
</td>
|
<td>
|
<span className={importClass.elem("file-status").mod({ uploading: true })} />
|
</td>
|
<td className={importClass.elem("file-size")}> </td>
|
</tr>
|
);
|
})}
|
</tbody>
|
</table>
|
</SimpleCard>
|
</div>
|
)}
|
|
{ff.isFF(ff.FF_JSON_PREVIEW) && (
|
<div className="w-full h-full flex flex-col min-h-[400px]">
|
{projectConfigured ? (
|
<SimpleCard
|
title="预期输入预览"
|
className="w-full h-full overflow-hidden flex flex-col"
|
contentClassName="h-[calc(100%-48px)]"
|
flushContent
|
>
|
{sampleConfig.data ? (
|
<div className={importClass.elem("code-wrapper")}>
|
<CodeBlock
|
title="预期输入预览"
|
code={sampleConfig?.data ?? ""}
|
className="w-full h-full"
|
/>
|
</div>
|
) : sampleConfig.isLoading ? (
|
<div className="w-full flex justify-center py-12">
|
<Spinner className="h-6 w-6" />
|
</div>
|
) : sampleConfig.isError ? (
|
<div className="w-[calc(100%-24px)] text-lg text-negative-content bg-negative-background border m-3 rounded-md border-negative-border-subtle p-4">
|
出错了,无法加载示例数据。
|
</div>
|
) : null}
|
</SimpleCard>
|
) : (
|
<SimpleCard className="w-full h-full flex flex-col items-center justify-center text-center p-wide">
|
<div className="flex flex-col items-center gap-tight">
|
<div className="bg-primary-background rounded-largest p-tight flex items-center justify-center">
|
<IconCode className="w-6 h-6 text-primary-icon" />
|
</div>
|
<div className="flex flex-col items-center gap-tighter">
|
<div className="text-label-small text-neutral-content font-medium">查看 JSON 输入格式</div>
|
<div className="text-body-small text-neutral-content-subtler text-center">
|
请先设置您的{" "}
|
<Button
|
type="button"
|
look="string"
|
onClick={openConfig}
|
className="border-none bg-none p-0 m-0 text-primary-content underline"
|
>
|
标注配置
|
</Button>{" "}
|
以预览预期的 JSON 数据格式
|
</div>
|
</div>
|
</div>
|
</SimpleCard>
|
)}
|
</div>
|
)}
|
</div>
|
</Upload>
|
</main>
|
</div>
|
);
|
};
|