"""This file and its contents are licensed under the Apache License 2.0. Please see the included NOTICE for copyright information and LICENSE for a copy of the license. """ import hashlib import logging import os import shutil from copy import deepcopy from datetime import datetime import ujson as json from core import version from core.feature_flags import flag_set from core.utils.common import load_func from core.utils.io import get_all_files_from_dir, get_temp_dir, path_to_open_binary_file from django.conf import settings from django.db import models from django.db.models.signals import post_save from django.dispatch import receiver from django.utils.translation import gettext_lazy as _ from label_studio_sdk.converter import Converter from tasks.models import Annotation logger = logging.getLogger(__name__) ExportMixin = load_func(settings.EXPORT_MIXIN) class Export(ExportMixin, models.Model): class Status(models.TextChoices): CREATED = 'created', _('Created') IN_PROGRESS = 'in_progress', _('In progress') FAILED = 'failed', _('Failed') COMPLETED = 'completed', _('Completed') title = models.CharField( _('title'), blank=True, default='', max_length=2048, ) created_at = models.DateTimeField( _('created at'), auto_now_add=True, help_text='Creation time', ) file = models.FileField( upload_to=settings.DELAYED_EXPORT_DIR, null=True, ) md5 = models.CharField( _('md5 of file'), max_length=128, default='', ) finished_at = models.DateTimeField( _('finished at'), help_text='Complete or fail time', null=True, default=None, ) status = models.CharField( _('Export status'), max_length=64, choices=Status.choices, default=Status.CREATED, ) counters = models.JSONField( _('Exporting meta data'), default=dict, ) project = models.ForeignKey( 'projects.Project', related_name='exports', on_delete=models.CASCADE, ) created_by = models.ForeignKey( settings.AUTH_USER_MODEL, related_name='+', on_delete=models.SET_NULL, null=True, verbose_name=_('created by'), ) @receiver(post_save, sender=Export) def set_export_default_name(sender, instance, created, **kwargs): if created and not instance.title: instance.title = instance.get_default_title() instance.save() class DataExport(object): # TODO: deprecated @staticmethod def save_export_files(project, now, get_args, data, md5, name): """Generate two files: meta info and result file and store them locally for logging""" filename_results = os.path.join(settings.EXPORT_DIR, name + '.json') filename_info = os.path.join(settings.EXPORT_DIR, name + '-info.json') annotation_number = Annotation.objects.filter(project=project).count() try: platform_version = version.get_git_version() except: # noqa: E722 platform_version = 'none' logger.error('Version is not detected in save_export_files()') info = { 'project': { 'title': project.title, 'id': project.id, 'created_at': project.created_at.strftime('%Y-%m-%dT%H:%M:%SZ'), 'created_by': project.created_by.email, 'task_number': project.tasks.count(), 'annotation_number': annotation_number, }, 'platform': {'version': platform_version}, 'download': { 'GET': dict(get_args), 'time': now.strftime('%Y-%m-%dT%H:%M:%SZ'), 'result_filename': filename_results, 'md5': md5, }, } with open(filename_results, 'w', encoding='utf-8') as f: f.write(data) with open(filename_info, 'w', encoding='utf-8') as f: json.dump(info, f, ensure_ascii=False) return filename_results @staticmethod def get_export_formats(project): converter = Converter(config=project.get_parsed_config(), project_dir=None) formats = [] supported_formats = set(converter.supported_formats) for format, format_info in converter.all_formats().items(): format_info = deepcopy(format_info) format_info['name'] = format.name if format.name not in supported_formats: format_info['disabled'] = True formats.append(format_info) return sorted(formats, key=lambda f: f.get('disabled', False)) @staticmethod def generate_export_file(project, tasks, output_format, download_resources, get_args, hostname=None): """Generate export file and return it as an open file object. Be sure to close the file after using it, to avoid wasting disk space. """ # prepare for saving now = datetime.now() data = json.dumps(tasks, ensure_ascii=False) md5 = hashlib.md5(json.dumps(data).encode('utf-8')).hexdigest() # nosec name = 'project-' + str(project.id) + '-at-' + now.strftime('%Y-%m-%d-%H-%M') + f'-{md5[0:8]}' input_json = DataExport.save_export_files(project, now, get_args, data, md5, name) upload_dir = os.path.join(settings.MEDIA_ROOT, settings.UPLOAD_DIR) # ==== 详细调试日志 ==== logger.error('=' * 80) logger.error('[COCO DEBUG] Starting COCO export') logger.error(f'[COCO DEBUG] upload_dir: {upload_dir}') logger.error(f'[COCO DEBUG] download_resources: {download_resources}') logger.error(f'[COCO DEBUG] Task count: {len(tasks)}') # 打印前3个任务的数据 for i, task in enumerate(tasks[:3]): logger.error(f'[COCO DEBUG] Task {i+1} data: {task.get("data")}') # 检查上传目录内容 if os.path.exists(upload_dir): subdirs = os.listdir(upload_dir) logger.error(f'[COCO DEBUG] upload_dir contents: {subdirs}') converter = Converter( config=project.get_parsed_config(), project_dir=None, upload_dir=upload_dir, download_resources=download_resources, access_token=project.organization.created_by.auth_token.key, hostname=hostname, ) logger.error(f'[COCO DEBUG] Converter initialized') with get_temp_dir() as tmp_dir: logger.error(f'[COCO DEBUG] Calling converter.convert...') converter.convert(input_json, tmp_dir, output_format, is_dir=False) logger.error(f'[COCO DEBUG] Converter.convert completed') # 检查输出 tmp_contents = os.listdir(tmp_dir) logger.error(f'[COCO DEBUG] tmp_dir contents: {tmp_contents}') # 检查images目录 images_dir = os.path.join(tmp_dir, 'images') if os.path.exists(images_dir): image_files = os.listdir(images_dir) logger.error(f'[COCO DEBUG] images/ has {len(image_files)} files: {image_files}') if len(image_files) == 0: logger.error('[COCO DEBUG] *** PROBLEM: images directory is EMPTY ***') else: logger.error('[COCO DEBUG] *** PROBLEM: images/ directory DOES NOT EXIST ***') logger.error('=' * 80) files = get_all_files_from_dir(tmp_dir) # if only one file is exported - no need to create archive if len(os.listdir(tmp_dir)) == 1: output_file = files[0] ext = os.path.splitext(output_file)[-1] content_type = f'application/{ext}' out = path_to_open_binary_file(output_file) filename = name + os.path.splitext(output_file)[-1] return out, content_type, filename # otherwise pack output directory into archive shutil.make_archive(tmp_dir, 'zip', tmp_dir) out = path_to_open_binary_file(os.path.abspath(tmp_dir + '.zip')) content_type = 'application/zip' filename = name + '.zip' return out, content_type, filename class ConvertedFormat(models.Model): class Status(models.TextChoices): CREATED = 'created', _('Created') IN_PROGRESS = 'in_progress', _('In progress') FAILED = 'failed', _('Failed') COMPLETED = 'completed', _('Completed') project = models.ForeignKey( 'projects.Project', null=True, related_name='export_conversions', on_delete=models.CASCADE, ) organization = models.ForeignKey( 'organizations.Organization', null=True, on_delete=models.CASCADE, related_name='export_conversions', ) export = models.ForeignKey( Export, related_name='converted_formats', on_delete=models.CASCADE, help_text='Export snapshot for this converted file', ) file = models.FileField( upload_to=settings.DELAYED_EXPORT_DIR, null=True, ) status = models.CharField( max_length=64, choices=Status.choices, default=Status.CREATED, ) traceback = models.TextField(null=True, blank=True, help_text='Traceback report in case of errors') export_type = models.CharField(max_length=64) created_at = models.DateTimeField( _('created at'), null=True, auto_now_add=True, help_text='Creation time', ) updated_at = models.DateTimeField( _('updated at'), null=True, auto_now_add=True, help_text='Updated time', ) finished_at = models.DateTimeField( _('finished at'), help_text='Complete or fail time', null=True, default=None, ) created_by = models.ForeignKey( settings.AUTH_USER_MODEL, related_name='+', on_delete=models.SET_NULL, null=True, verbose_name=_('created by'), ) def delete(self, *args, **kwargs): if flag_set('ff_back_dev_4664_remove_storage_file_on_export_delete_29032023_short'): if self.file: self.file.delete() super().delete(*args, **kwargs)