"""This file and its contents are licensed under the Apache License 2.0. Please see the included NOTICE for copyright information and LICENSE for a copy of the license. """ import hashlib import logging import os import shutil from copy import deepcopy from datetime import datetime import ujson as json from core import version from core.feature_flags import flag_set from core.utils.common import load_func from core.utils.io import get_all_files_from_dir, get_temp_dir, path_to_open_binary_file from django.conf import settings from django.db import models from django.db.models.signals import post_save from django.dispatch import receiver from django.utils.translation import gettext_lazy as _ from label_studio_sdk.converter import Converter from tasks.models import Annotation logger = logging.getLogger(__name__) ExportMixin = load_func(settings.EXPORT_MIXIN) class Export(ExportMixin, models.Model): class Status(models.TextChoices): CREATED = 'created', _('Created') IN_PROGRESS = 'in_progress', _('In progress') FAILED = 'failed', _('Failed') COMPLETED = 'completed', _('Completed') title = models.CharField( _('title'), blank=True, default='', max_length=2048, ) created_at = models.DateTimeField( _('created at'), auto_now_add=True, help_text='Creation time', ) file = models.FileField( upload_to=settings.DELAYED_EXPORT_DIR, null=True, ) md5 = models.CharField( _('md5 of file'), max_length=128, default='', ) finished_at = models.DateTimeField( _('finished at'), help_text='Complete or fail time', null=True, default=None, ) status = models.CharField( _('Export status'), max_length=64, choices=Status.choices, default=Status.CREATED, ) counters = models.JSONField( _('Exporting meta data'), default=dict, ) project = models.ForeignKey( 'projects.Project', related_name='exports', on_delete=models.CASCADE, ) created_by = models.ForeignKey( settings.AUTH_USER_MODEL, related_name='+', on_delete=models.SET_NULL, null=True, verbose_name=_('created by'), ) @receiver(post_save, sender=Export) def set_export_default_name(sender, instance, created, **kwargs): if created and not instance.title: instance.title = instance.get_default_title() instance.save() class DataExport(object): # TODO: deprecated @staticmethod def save_export_files(project, now, get_args, data, md5, name): """Generate two files: meta info and result file and store them locally for logging""" filename_results = os.path.join(settings.EXPORT_DIR, name + '.json') filename_info = os.path.join(settings.EXPORT_DIR, name + '-info.json') annotation_number = Annotation.objects.filter(project=project).count() try: platform_version = version.get_git_version() except: # noqa: E722 platform_version = 'none' logger.error('Version is not detected in save_export_files()') info = { 'project': { 'title': project.title, 'id': project.id, 'created_at': project.created_at.strftime('%Y-%m-%dT%H:%M:%SZ'), 'created_by': project.created_by.email, 'task_number': project.tasks.count(), 'annotation_number': annotation_number, }, 'platform': {'version': platform_version}, 'download': { 'GET': dict(get_args), 'time': now.strftime('%Y-%m-%dT%H:%M:%SZ'), 'result_filename': filename_results, 'md5': md5, }, } with open(filename_results, 'w', encoding='utf-8') as f: f.write(data) with open(filename_info, 'w', encoding='utf-8') as f: json.dump(info, f, ensure_ascii=False) return filename_results @staticmethod def get_export_formats(project): converter = Converter(config=project.get_parsed_config(), project_dir=None) formats = [] supported_formats = set(converter.supported_formats) for format, format_info in converter.all_formats().items(): format_info = deepcopy(format_info) format_info['name'] = format.name if format.name not in supported_formats: format_info['disabled'] = True formats.append(format_info) return sorted(formats, key=lambda f: f.get('disabled', False)) @staticmethod def generate_export_file(project, tasks, output_format, download_resources, get_args, hostname=None): """Generate export file and return it as an open file object. Be sure to close the file after using it, to avoid wasting disk space. """ # prepare for saving now = datetime.now() data = json.dumps(tasks, ensure_ascii=False) md5 = hashlib.md5(json.dumps(data).encode('utf-8')).hexdigest() # nosec name = 'project-' + str(project.id) + '-at-' + now.strftime('%Y-%m-%d-%H-%M') + f'-{md5[0:8]}' input_json = DataExport.save_export_files(project, now, get_args, data, md5, name) converter = Converter( config=project.get_parsed_config(), project_dir=None, upload_dir=os.path.join(settings.MEDIA_ROOT, settings.UPLOAD_DIR), download_resources=download_resources, access_token=project.organization.created_by.auth_token.key, hostname=hostname, ) with get_temp_dir() as tmp_dir: converter.convert(input_json, tmp_dir, output_format, is_dir=False) files = get_all_files_from_dir(tmp_dir) # if only one file is exported - no need to create archive if len(os.listdir(tmp_dir)) == 1: output_file = files[0] ext = os.path.splitext(output_file)[-1] content_type = f'application/{ext}' out = path_to_open_binary_file(output_file) filename = name + os.path.splitext(output_file)[-1] return out, content_type, filename # otherwise pack output directory into archive shutil.make_archive(tmp_dir, 'zip', tmp_dir) out = path_to_open_binary_file(os.path.abspath(tmp_dir + '.zip')) content_type = 'application/zip' filename = name + '.zip' return out, content_type, filename class ConvertedFormat(models.Model): class Status(models.TextChoices): CREATED = 'created', _('Created') IN_PROGRESS = 'in_progress', _('In progress') FAILED = 'failed', _('Failed') COMPLETED = 'completed', _('Completed') project = models.ForeignKey( 'projects.Project', null=True, related_name='export_conversions', on_delete=models.CASCADE, ) organization = models.ForeignKey( 'organizations.Organization', null=True, on_delete=models.CASCADE, related_name='export_conversions', ) export = models.ForeignKey( Export, related_name='converted_formats', on_delete=models.CASCADE, help_text='Export snapshot for this converted file', ) file = models.FileField( upload_to=settings.DELAYED_EXPORT_DIR, null=True, ) status = models.CharField( max_length=64, choices=Status.choices, default=Status.CREATED, ) traceback = models.TextField(null=True, blank=True, help_text='Traceback report in case of errors') export_type = models.CharField(max_length=64) created_at = models.DateTimeField( _('created at'), null=True, auto_now_add=True, help_text='Creation time', ) updated_at = models.DateTimeField( _('updated at'), null=True, auto_now_add=True, help_text='Updated time', ) finished_at = models.DateTimeField( _('finished at'), help_text='Complete or fail time', null=True, default=None, ) created_by = models.ForeignKey( settings.AUTH_USER_MODEL, related_name='+', on_delete=models.SET_NULL, null=True, verbose_name=_('created by'), ) def delete(self, *args, **kwargs): if flag_set('ff_back_dev_4664_remove_storage_file_on_export_delete_29032023_short'): if self.file: self.file.delete() super().delete(*args, **kwargs)