"""This file and its contents are licensed under the Apache License 2.0. Please see the included NOTICE for copyright information and LICENSE for a copy of the license. """ import json import logging from typing import Any, Mapping, Optional from annoying.fields import AutoOneToOneField from core.current_request import CurrentContext from core.label_config import ( check_control_in_config_by_regex, check_toname_in_config_by_regex, config_line_stipped, extract_data_types, get_all_control_tag_tuples, get_all_labels, get_all_object_tag_names, get_all_types, get_annotation_tuple, get_original_fromname_by_regex, get_sample_task, validate_label_config, ) from core.utils.common import ( create_hash, get_attr_or_item, load_func, merge_labels_counters, ) from core.utils.db import batch_update_with_retry, fast_first, has_column_cached from django.conf import settings from django.contrib.postgres.search import SearchVectorField from django.core.validators import MaxLengthValidator, MinLengthValidator from django.db import connection, models, transaction from django.db.models import Avg, BooleanField, Case, Count, GeneratedField, JSONField, Max, Q, Sum, Value, When from django.db.models.expressions import RawSQL from django.utils.functional import cached_property from django.utils.translation import gettext_lazy as _ from fsm.models import FsmHistoryStateModel from fsm.project_transitions import update_project_state_after_task_change from fsm.queryset_mixins import FSMStateQuerySetMixin from label_studio_sdk._extensions.label_studio_tools.core.label_config import parse_config from labels_manager.models import Label from projects.functions import ( annotate_finished_task_number, annotate_ground_truth_number, annotate_num_tasks_with_annotations, annotate_skipped_annotations_number, annotate_task_number, annotate_total_annotations_number, annotate_total_predictions_number, annotate_useful_annotation_number, ) from projects.functions.utils import make_queryset_from_iterable from projects.signals import ProjectSignals from rest_framework.exceptions import ValidationError from tasks.models import ( Annotation, AnnotationDraft, Prediction, Q_task_finished_annotations, Task, bulk_update_stats_project_tasks, ) logger = logging.getLogger(__name__) class ProjectQuerySet(models.QuerySet): pass class ProjectQuerySetWithFSM(FSMStateQuerySetMixin, ProjectQuerySet): pass class ProjectManager(models.Manager): """ Manager for Project model. Provides: - User-scoped filtering - Counter annotations for project statistics - FSM state annotation support """ COUNTER_FIELDS = [ 'task_number', 'finished_task_number', 'total_predictions_number', 'total_annotations_number', 'num_tasks_with_annotations', 'useful_annotation_number', 'ground_truth_number', 'skipped_annotations_number', ] ANNOTATED_FIELDS = { 'task_number': annotate_task_number, 'finished_task_number': annotate_finished_task_number, 'total_predictions_number': annotate_total_predictions_number, 'total_annotations_number': annotate_total_annotations_number, 'num_tasks_with_annotations': annotate_num_tasks_with_annotations, 'useful_annotation_number': annotate_useful_annotation_number, 'ground_truth_number': annotate_ground_truth_number, 'skipped_annotations_number': annotate_skipped_annotations_number, } def get_queryset(self): """Return ProjectQuerySet with FSM state annotation support""" return ProjectQuerySetWithFSM(self.model, using=self._db) def for_user(self, user): return self.get_queryset().filter(organization=user.active_organization) def with_state(self): """ Return queryset with FSM state annotated. Example: projects = Project.objects.with_state().filter(organization=org) for project in projects: print(project.state) # No N+1 queries! """ return self.get_queryset().with_state() def with_counts(self, fields=None): return self.with_counts_annotate(self.get_queryset(), fields=fields) @staticmethod def with_counts_annotate(queryset, fields=None, exclude=None): available_fields = ProjectManager.ANNOTATED_FIELDS if fields is None: to_annotate = available_fields else: to_annotate = {field: available_fields[field] for field in fields if field in available_fields} if exclude: to_annotate = {field: func for field, func in to_annotate.items() if field not in exclude} for _, annotate_func in to_annotate.items(): # noqa: F402 queryset = annotate_func(queryset) return queryset class ProjectVisibleManager(ProjectManager): """Default manager that hides soft-deleted projects (deleted_at IS NULL).""" def get_queryset(self): qs = super().get_queryset() # Avoid referencing columns that might not exist during early migrations if has_column_cached(self.model._meta.db_table, 'deleted_at'): return qs.filter(deleted_at__isnull=True) return qs ProjectMixin = load_func(settings.PROJECT_MIXIN) # LSE recalculate all stats recalculate_all_stats = load_func(settings.RECALCULATE_ALL_STATS) class Project(ProjectMixin, FsmHistoryStateModel): class SkipQueue(models.TextChoices): # requeue to the end of the same annotator’s queue => annotator gets this task at the end of the queue REQUEUE_FOR_ME = 'REQUEUE_FOR_ME', 'Requeue for me' # requeue skipped tasks back to the common queue, excluding skipping annotator [current default] => another annotator gets this task REQUEUE_FOR_OTHERS = 'REQUEUE_FOR_OTHERS', 'Requeue for others' # ignore skipped tasks => skip is a valid annotation, task is completed (finished=True) IGNORE_SKIPPED = 'IGNORE_SKIPPED', 'Ignore skipped' # Managers: default (visible only) and explicit unfiltered objects = ProjectVisibleManager() all_objects = ProjectManager() __original_label_config = None title = models.CharField( _('title'), null=True, blank=True, default='', max_length=settings.PROJECT_TITLE_MAX_LEN, help_text=f'Project name. Must be between {settings.PROJECT_TITLE_MIN_LEN} and {settings.PROJECT_TITLE_MAX_LEN} characters long.', validators=[ MinLengthValidator(settings.PROJECT_TITLE_MIN_LEN), MaxLengthValidator(settings.PROJECT_TITLE_MAX_LEN), ], ) description = models.TextField( _('description'), blank=True, null=True, default='', help_text='Project description' ) organization = models.ForeignKey( 'organizations.Organization', on_delete=models.CASCADE, related_name='projects', null=True ) label_config = models.TextField( _('label config'), blank=True, null=True, default='', help_text='Label config in XML format. See more about it in documentation', ) parsed_label_config = models.JSONField( _('parsed label config'), blank=True, null=True, default=None, help_text='Parsed label config in JSON format. See more about it in documentation', ) label_config_hash = models.BigIntegerField(null=True, default=None) expert_instruction = models.TextField( _('expert instruction'), blank=True, null=True, default='', help_text='Labeling instructions in HTML format' ) show_instruction = models.BooleanField( _('show instruction'), default=False, help_text='Show instructions to the annotator before they start' ) show_skip_button = models.BooleanField( _('show skip button'), default=True, help_text='Show a skip button in interface and allow annotators to skip the task', ) enable_empty_annotation = models.BooleanField( _('enable empty annotation'), default=True, help_text='Allow annotators to submit empty annotations' ) reveal_preannotations_interactively = models.BooleanField( _('reveal_preannotations_interactively'), default=False, help_text='Reveal pre-annotations interactively' ) show_annotation_history = models.BooleanField( _('show annotation history'), default=False, help_text='Show annotation history to annotator' ) show_collab_predictions = models.BooleanField( _('show predictions to annotator'), default=True, help_text='If set, the annotator can view model predictions' ) # evaluate is the wrong word here. correct should be retrieve_predictions_automatically # deprecated evaluate_predictions_automatically = models.BooleanField( _('evaluate predictions automatically'), default=False, help_text='Retrieve and display predictions when loading a task', ) token = models.CharField(_('token'), max_length=256, default=create_hash, null=True, blank=True) result_count = models.IntegerField( _('result count'), default=0, help_text='Total results inside of annotations counter' ) color = models.CharField(_('color'), max_length=16, default='#FFFFFF', null=True, blank=True) created_by = models.ForeignKey( settings.AUTH_USER_MODEL, related_name='created_projects', on_delete=models.SET_NULL, null=True, verbose_name=_('created by'), ) maximum_annotations = models.IntegerField( _('maximum annotation number'), default=1, help_text='Maximum number of annotations for one task. ' 'If the number of annotations per task is equal or greater ' 'to this value, the task is completed (is_labeled=True)', ) min_annotations_to_start_training = models.IntegerField( _('min_annotations_to_start_training'), default=0, help_text='Minimum number of completed tasks after which model training is started', ) control_weights = JSONField( _('control weights'), null=True, default=dict, help_text='Dict of weights for each control tag in metric calculation. Each control tag (e.g. label or choice) will ' "have it's own key in control weight dict with weight for each label and overall weight." 'For example, if bounding box annotation with control tag named my_bbox should be included with 0.33 weight in agreement calculation, ' 'and the first label Car should be twice more important than Airplaine, then you have to need the specify: ' "{'my_bbox': {'type': 'RectangleLabels', 'labels': {'Car': 1.0, 'Airplaine': 0.5}, 'overall': 0.33}", ) # Welcome reader! You might be wondering how `model_version` is # set and used; let's explain. `model_version` can either be set # to the prediction `model_version` associated with the # `tasks.Prediction` model, or to the ML backend title. Yes, # understandably, this can be confusing. However, this appears to # be the best approach we currently have for improving the # experience while maintaining backward compatibility. model_version = models.TextField( _('model version'), blank=True, null=True, default='', help_text='Machine learning model version' ) data_types = JSONField(_('data_types'), default=dict, null=True) is_draft = models.BooleanField( _('is draft'), default=False, help_text='Whether or not the project is in the middle of being created' ) is_published = models.BooleanField( _('published'), default=False, help_text='Whether or not the project is published to annotators' ) created_at = models.DateTimeField(_('created at'), auto_now_add=True) updated_at = models.DateTimeField(_('updated at'), auto_now=True) SEQUENCE = 'Sequential sampling' UNIFORM = 'Uniform sampling' UNCERTAINTY = 'Uncertainty sampling' SAMPLING_CHOICES = ( (SEQUENCE, 'Tasks are ordered by Data manager ordering'), (UNIFORM, 'Tasks are chosen randomly'), (UNCERTAINTY, 'Tasks are chosen according to model uncertainty scores (active learning mode)'), ) sampling = models.CharField(max_length=100, choices=SAMPLING_CHOICES, null=True, default=SEQUENCE) skip_queue = models.CharField( max_length=100, choices=SkipQueue.choices, null=True, default=SkipQueue.REQUEUE_FOR_OTHERS ) show_ground_truth_first = models.BooleanField( _('show ground truth first'), default=False, help_text='Onboarding mode (true): show ground truth tasks first in the labeling stream', ) show_overlap_first = models.BooleanField(_('show overlap first'), default=False) overlap_cohort_percentage = models.IntegerField(_('overlap_cohort_percentage'), default=100) task_data_login = models.CharField( _('task_data_login'), max_length=256, blank=True, null=True, help_text='Task data credentials: login' ) task_data_password = models.CharField( _('task_data_password'), max_length=256, blank=True, null=True, help_text='Task data credentials: password' ) pinned_at = models.DateTimeField(_('pinned at'), null=True, default=None, help_text='Pinned date and time') custom_task_lock_ttl = models.IntegerField( _('custom_task_lock_ttl'), null=True, default=None, help_text='Custom task lock TTL in seconds. If not set, the default value is used', ) # Soft-delete lifecycle (OSS fields, used by LSE logic) deleted_at = models.DateTimeField(_('deleted at'), null=True, blank=True) deleted_by = models.ForeignKey( settings.AUTH_USER_MODEL, related_name='deleted_projects', on_delete=models.SET_NULL, null=True, blank=True, db_index=False, verbose_name=_('deleted by'), ) purge_at = models.DateTimeField(_('purge at'), null=True, blank=True) def __init__(self, *args, **kwargs): super(Project, self).__init__(*args, **kwargs) # This check is required because deferred fields cause issues with evaluating lazy (deferred) fields if read directly, which means that any attempt to optimize a queryset involving projects # will result in a performance regression as it will n+1 or in some cases cause an infinite loop. deferred_fields = self.get_deferred_fields() self.__original_label_config = self.label_config if 'label_config' not in deferred_fields else None self.__maximum_annotations = self.maximum_annotations if 'maximum_annotations' not in deferred_fields else None self.__overlap_cohort_percentage = ( self.overlap_cohort_percentage if 'overlap_cohort_percentage' not in deferred_fields else None ) self.__skip_queue = self.skip_queue if 'skip_queue' not in deferred_fields else None # TODO: once bugfix with incorrect data types in List # logging.warning('! Please, remove code below after patching of all projects (extract_data_types)') if ( 'label_config' not in deferred_fields and self.label_config is not None and 'data_types' not in deferred_fields ): data_types = extract_data_types(self.label_config) if self.data_types != data_types: self.data_types = data_types @property def num_tasks(self): return self.tasks.count() @property def ml_backend(self): return fast_first(self.ml_backends.all()) @property def should_retrieve_predictions(self): """Returns true if the model was set to be used""" if self.show_collab_predictions: ml = self.ml_backend if ml: return ml.title == self.model_version return False @property def num_annotations(self): return Annotation.objects.filter(project=self).count() @property def num_drafts(self): return AnnotationDraft.objects.filter(task__project=self).count() @property def has_predictions(self): return self.get_current_predictions().exists() @property def has_any_predictions(self): return Prediction.objects.filter(Q(project=self.id)).exists() @property def business(self): return self.created_by.business @property def is_private(self): return None @property def secure_mode(self): return False @property def one_object_in_label_config(self): return len(self.data_types) <= 1 @property def get_labeled_count(self): return self.tasks.filter(is_labeled=True).count() @property def get_collected_count(self): return self.tasks.count() @property def get_total_possible_count(self): """ Tasks has overlap - how many tc should be accepted possible count = sum [ t.overlap for t in tasks] :return: N int total amount of Annotations that should be submitted """ if self.tasks.count() == 0: return 0 return self.tasks.aggregate(Sum('overlap'))['overlap__sum'] @property def get_available_for_labeling(self): return self.get_collected_count - self.get_labeled_count @property def need_annotators(self): return self.maximum_annotations - self.num_annotators @classmethod def find_by_invite_url(cls, url): token = url.strip('/').split('/')[-1] if len(token): return Project.objects.get(token=token) else: raise KeyError(f"Can't find Project by invite URL: {url}") def reset_token(self): self.token = create_hash() self.save(update_fields=['token']) def add_collaborator(self, user): created = False with transaction.atomic(): try: ProjectMember.objects.get(user=user, project=self) except ProjectMember.DoesNotExist: ProjectMember.objects.create(user=user, project=self) created = True else: logger.debug(f'Project membership {self} for user {user} already exists') return created def has_collaborator(self, user): return ProjectMember.objects.filter(user=user, project=self).exists() def has_collaborator_enabled(self, user): membership = ProjectMember.objects.filter(user=user, project=self) return membership.exists() and membership.first().enabled def _update_tasks_states( self, maximum_annotations_changed, overlap_cohort_percentage_changed, tasks_number_changed ): """ Update tasks states after settings change :param maximum_annotations_changed: If maximum_annotations param changed :param overlap_cohort_percentage_changed: If cohort_percentage param changed :param tasks_number_changed: If tasks number changed in project """ logger.info( f'Starting _update_tasks_states with params: Project {str(self)} maximum_annotations ' f'{self.maximum_annotations} and percentage {self.overlap_cohort_percentage}' ) # if only maximum annotations parameter is tweaked if maximum_annotations_changed and not overlap_cohort_percentage_changed: # if there are tasks with overlap > 1 and maximum annotations has not been set to 1, preserve the cohort. # but if maximum_annotations is set to 1, then all tasks should be affected (since there is no longer a distinct cohort) tasks_with_overlap = self.tasks.filter(overlap__gt=1) if self.maximum_annotations > 1 else self.tasks.all() if tasks_with_overlap.exists(): # if there is a part with overlapped tasks, affect only them tasks_with_overlap.update(overlap=self.maximum_annotations) elif self.overlap_cohort_percentage < 100: self._rearrange_overlap_cohort() else: # otherwise affect all tasks self.tasks.update(overlap=self.maximum_annotations) tasks_with_overlap = self.tasks.all() # update is_labeled after change bulk_update_stats_project_tasks(tasks_with_overlap, project=self) # if cohort slider is tweaked elif overlap_cohort_percentage_changed: if self.maximum_annotations == 1: if maximum_annotations_changed: self.tasks.update(overlap=1) bulk_update_stats_project_tasks(self.tasks.all(), project=self) else: logger.info( f'Project {str(self)}: cohort percentage was changed but maximum annotations was not and is 1; taking no action' ) else: self._rearrange_overlap_cohort() # if adding/deleting tasks and cohort settings are applied elif tasks_number_changed and self.overlap_cohort_percentage < 100 and self.maximum_annotations > 1: self._rearrange_overlap_cohort() if tasks_number_changed: # FSM: Recalculate project state after task deletion or import user = CurrentContext.get_user() update_project_state_after_task_change(self, user=user) def _batch_update_with_retry(self, queryset, batch_size=500, max_retries=3, **update_fields): batch_update_with_retry(queryset, batch_size, max_retries, **update_fields) def _rearrange_overlap_cohort(self): """ Rearrange overlap depending on annotation count in tasks """ all_project_tasks = Task.objects.filter(project=self) max_annotations = self.maximum_annotations must_tasks = int(self.tasks.count() * self.overlap_cohort_percentage / 100 + 0.5) logger.info( f'Starting _rearrange_overlap_cohort with params: Project {str(self)} maximum_annotations ' f'{max_annotations} and percentage {self.overlap_cohort_percentage}' ) tasks_with_max_annotations = all_project_tasks.annotate( anno=Count('annotations', filter=Q_task_finished_annotations & Q(annotations__ground_truth=False)) ).filter(anno__gte=max_annotations) tasks_with_min_annotations = all_project_tasks.exclude(id__in=tasks_with_max_annotations) # check how many tasks left to finish left_must_tasks = max(must_tasks - tasks_with_max_annotations.count(), 0) logger.info(f'Required tasks {must_tasks} and left required tasks {left_must_tasks}') if left_must_tasks > 0: # if there are unfinished tasks update tasks with count(annotations) >= overlap ids = list(tasks_with_max_annotations.values_list('id', flat=True)) self._batch_update_with_retry( all_project_tasks.filter(id__in=ids), overlap=max_annotations, is_labeled=True ) # order other tasks by count(annotations) tasks_with_min_annotations = ( tasks_with_min_annotations.annotate(anno=Count('annotations')).order_by('-anno').distinct() ) # assign overlap depending on annotation count # assign max_annotations and update is_labeled ids = list(tasks_with_min_annotations[:left_must_tasks].values_list('id', flat=True)) self._batch_update_with_retry(all_project_tasks.filter(id__in=ids), overlap=max_annotations) # assign 1 to left ids = list(tasks_with_min_annotations[left_must_tasks:].values_list('id', flat=True)) min_tasks_to_update = all_project_tasks.filter(id__in=ids) self._batch_update_with_retry(min_tasks_to_update, overlap=1) else: ids = list(tasks_with_max_annotations.values_list('id', flat=True)) self._batch_update_with_retry(all_project_tasks.filter(id__in=ids), overlap=max_annotations) ids = list(tasks_with_min_annotations.values_list('id', flat=True)) self._batch_update_with_retry(all_project_tasks.filter(id__in=ids), overlap=1) # update is labeled after tasks rearrange overlap bulk_update_stats_project_tasks(all_project_tasks, project=self) def remove_tasks_by_file_uploads(self, file_upload_ids): self.tasks.filter(file_upload_id__in=file_upload_ids).delete() def advance_onboarding(self): """Move project to next onboarding step""" po_qs = self.steps_left.order_by('step__order') count = po_qs.count() if count: po = po_qs.first() po.finished = True po.save() return count != 1 def created_at_prettify(self): return self.created_at.strftime('%d %b %Y %H:%M:%S') def onboarding_step_finished(self, step): """Mark specific step as finished""" pos = ProjectOnboardingSteps.objects.get(code=step) po = ProjectOnboarding.objects.get(project=self, step=pos) po.finished = True po.save() return po def data_types_json(self): return json.dumps(self.data_types) def available_data_keys(self): return sorted(list(self.data_types.keys())) @classmethod def validate_label_config(cls, config_string): validate_label_config(config_string) def validate_config(self, config_string, strict=False): self.validate_label_config(config_string) if not hasattr(self, 'summary'): return with transaction.atomic(): # Lock summary for update to avoid race conditions summary = ProjectSummary.objects.select_for_update().get(project=self) if self.num_tasks == 0: logger.debug(f'Project {self} has no tasks: nothing to validate here. Ensure project summary is empty') summary.reset() return # validate data columns consistency fields_from_config = get_all_object_tag_names(config_string) if not fields_from_config: logger.debug('Data fields not found in labeling config') return # TODO: DEV-2939 Add validation for fields addition in label config """fields_from_config = {field.split('[')[0] for field in fields_from_config} # Repeater tag support fields_from_data = set(self.summary.common_data_columns) fields_from_data.discard(settings.DATA_UNDEFINED_NAME) if fields_from_data and not fields_from_config.issubset(fields_from_data): different_fields = list(fields_from_config.difference(fields_from_data)) raise ValidationError( f'These fields are not present in the data: {",".join(different_fields)}' )""" if self.num_annotations == 0 and self.num_drafts == 0: logger.debug( f'Project {self} has no annotations and drafts: nothing to validate here. ' f'Ensure annotations-related project summary is empty' ) summary.reset(tasks_data_based=False) return # validate annotations consistency annotations_from_config = set(get_all_control_tag_tuples(config_string)) if not annotations_from_config: logger.debug('Annotation schema is not found in config') return annotations_from_data = set(self.summary.created_annotations) if annotations_from_data and not annotations_from_data.issubset(annotations_from_config): different_annotations = list(annotations_from_data.difference(annotations_from_config)) diff_str = [] for ann_tuple in different_annotations: from_name, to_name, t = ann_tuple.split('|') # TODO tags that operate as both object and control tags; should be special registry/logic for them if from_name == to_name and t.lower() == 'chatmessage': continue if t.lower() == 'textarea': # avoid textarea to_name check (see DEV-1598) continue if ( not check_control_in_config_by_regex(config_string, from_name) or not check_toname_in_config_by_regex(config_string, to_name) or t not in get_all_types(config_string) ): diff_str.append( f'{self.summary.created_annotations[ann_tuple]} ' f'with from_name={from_name}, to_name={to_name}, type={t}' ) if len(diff_str) > 0: diff_str = '\n'.join(diff_str) raise ValidationError( f'Created annotations are incompatible with provided labeling schema, we found:\n{diff_str}' ) # validate labels consistency labels_from_config, dynamic_label_from_config = get_all_labels(config_string) created_labels = merge_labels_counters(self.summary.created_labels, self.summary.created_labels_drafts) def display_count(count: int, type: str) -> Optional[str]: """Helper for displaying pluralized sources of validation errors, eg "1 draft" or "3 annotations" """ if not count: return None return f'{count} {type}{"s" if count > 1 else ""}' for control_tag_from_data, labels_from_data in created_labels.items(): # Check if labels created in annotations, and their control tag has been removed if ( labels_from_data and ( (control_tag_from_data not in labels_from_config) and (control_tag_from_data not in dynamic_label_from_config) ) and not check_control_in_config_by_regex(config_string, control_tag_from_data) ): raise ValidationError( f'There are {sum(labels_from_data.values(), 0)} annotation(s) created with tag ' f'"{control_tag_from_data}", you can\'t remove it' ) labels_from_config_by_tag = set( labels_from_config[get_original_fromname_by_regex(config_string, control_tag_from_data)] ) parsed_config = parse_config(config_string) tag_types = [tag_info['type'] for _, tag_info in parsed_config.items()] # DEV-1990 Workaround for Video labels as there are no labels in VideoRectangle tag if 'VideoRectangle' in tag_types: for key in labels_from_config: labels_from_config_by_tag |= set(labels_from_config[key]) if 'Taxonomy' in tag_types: custom_tags = Label.objects.filter(links__project=self).values_list('value', flat=True) flat_custom_tags = set([item for sublist in custom_tags for item in sublist]) labels_from_config_by_tag |= flat_custom_tags # check if labels from is subset if config labels if not set(labels_from_data).issubset(set(labels_from_config_by_tag)): different_labels = list(set(labels_from_data).difference(labels_from_config_by_tag)) diff_str = '' for label in different_labels: annotation_label_count = self.summary.created_labels.get(control_tag_from_data, {}).get(label, 0) draft_label_count = self.summary.created_labels_drafts.get(control_tag_from_data, {}).get(label, 0) annotation_display_count = display_count(annotation_label_count, 'annotation') draft_display_count = display_count(draft_label_count, 'draft') display = [disp for disp in [annotation_display_count, draft_display_count] if disp] if display: diff_str += f'{label} ({", ".join(display)})\n' if (strict is True) and ( (control_tag_from_data not in dynamic_label_from_config) and ( not check_control_in_config_by_regex( config_string, control_tag_from_data, filter=dynamic_label_from_config.keys() ) ) ): # raise error if labels not dynamic and not in regex rules raise ValidationError( f'These labels still exist in annotations or drafts:\n{diff_str}' f'Please add labels to tag with name="{str(control_tag_from_data)}".' ) else: logger.info(f'project_id={self.id} inconsistent labels in config and annotations: {diff_str}') def _label_config_has_changed(self): return self.label_config != self.__original_label_config @property def label_config_is_not_default(self): return self.label_config != Project._meta.get_field('label_config').default def should_none_model_version(self, model_version): """ Returns True if the model version provided matches the object's model version, or no model version is set for the object but model version exists in ML backend. """ return self.model_version == model_version or self.ml_backend_in_model_version def delete_predictions(self, model_version=None): """ Deletes the predictions based on the provided model version. If no model version is provided, it deletes all the predictions for this project. :param model_version: Identifier of the model version (default is None) :type model_version: str, optional :return: Dictionary with count of deleted predictions :rtype: dict """ params = {'project': self} if model_version: params.update({'model_version': model_version}) predictions = Prediction.objects.filter(**params) with transaction.atomic(): # If we are deleting specific model_version then we need # to remove that from the project if self.should_none_model_version(model_version): self.model_version = None self.save(update_fields=['model_version']) _, deleted_map = predictions.delete() count = deleted_map.get('tasks.Prediction', 0) return {'deleted_predictions': count} def get_updated_weights(self): outputs = self.get_parsed_config() control_weights = {} exclude_control_types = ('Filter',) def get_label(label): label_value = self.control_weights.get(control_name, {}).get('labels', {}).get(label) return label_value if label_value is not None else 1.0 def get_overall(name): weights = self.control_weights.get(name, None) if not weights: return 1.0 else: weight = weights.get('overall', None) return weight if weight is not None else 1.0 for control_name in outputs: control_type = outputs[control_name]['type'] if control_type in exclude_control_types: continue control_weights[control_name] = { 'overall': get_overall(control_name), 'type': control_type, 'labels': {label: get_label(label) for label in outputs[control_name].get('labels', [])}, } return control_weights def save(self, *args, update_fields=None, recalc=True, **kwargs): exists = True if self.pk else False project_with_config_just_created = not exists and self.label_config label_config_has_changed = self._label_config_has_changed() logger.debug( f'Label config has changed: {label_config_has_changed}, original: {self.__original_label_config}, new: {self.label_config}' ) if label_config_has_changed or project_with_config_just_created: self.data_types = extract_data_types(self.label_config) self.parsed_label_config = parse_config(self.label_config) self.label_config_hash = hash(str(self.label_config)) if update_fields is not None: update_fields = {'data_types', 'parsed_label_config', 'label_config_hash'}.union(update_fields) if self.label_config and (self._label_config_has_changed() or not exists or not self.control_weights): self.control_weights = self.get_updated_weights() if update_fields is not None: update_fields = {'control_weights'}.union(update_fields) # If project is published and is draft, set is_draft to False if self.is_published and self.is_draft: self.is_draft = False if update_fields is not None: update_fields = {'is_published', 'is_draft'}.union(update_fields) super(Project, self).save(*args, update_fields=update_fields, **kwargs) if label_config_has_changed: # save the new label config for future comparison self.__original_label_config = self.label_config # if tasks are already imported, emit signal that project is configured and ready for labeling if self.num_tasks > 0: logger.debug(f'Sending post_label_config_and_import_tasks signal for project {self.id}') ProjectSignals.post_label_config_and_import_tasks.send(sender=Project, project=self) else: logger.debug( f'No tasks imported for project {self.id}, skipping post_label_config_and_import_tasks signal' ) if not exists: steps = ProjectOnboardingSteps.objects.all() objs = [ProjectOnboarding(project=self, step=step) for step in steps] ProjectOnboarding.objects.bulk_create(objs) # argument for recalculate project task stats if recalc: self.update_tasks_states( maximum_annotations_changed=self.__maximum_annotations != self.maximum_annotations, overlap_cohort_percentage_changed=self.__overlap_cohort_percentage != self.overlap_cohort_percentage, tasks_number_changed=False, ) self.__maximum_annotations = self.maximum_annotations self.__overlap_cohort_percentage = self.overlap_cohort_percentage if self.__skip_queue != self.skip_queue: bulk_update_stats_project_tasks( self.tasks.filter(Q(annotations__isnull=False) & Q(annotations__ground_truth=False)) ) if hasattr(self, 'summary'): with transaction.atomic(): # Lock summary for update to avoid race conditions summary = ProjectSummary.objects.select_for_update().get(project=self) # Ensure project.summary is consistent with current tasks / annotations if self.num_tasks == 0: summary.reset() elif self.num_annotations == 0 and self.num_drafts == 0: summary.reset(tasks_data_based=False) # Call dimensions postprocess if configured (LSE feature) dimensions_postprocess = load_func(settings.PROJECT_SAVE_DIMENSIONS_POSTPROCESS) if dimensions_postprocess is not None: dimensions_postprocess( project=self, created=not exists, label_config_has_changed=label_config_has_changed, ) # ============================================================================ # FSM Integration # ============================================================================ # Project uses FsmHistoryStateModel for FSM integration. All transition logic is defined # in projects/transitions.py with declarative triggers. No custom methods needed. def get_member_ids(self): if hasattr(self, 'team_link'): # project has defined team scope # TODO: avoid checking team but rather add all project members when creating a project return self.team_link.team.members.values_list('user', flat=True) else: from users.models import User # TODO: may want to return all users from organization return User.objects.none() def has_team_user(self, user): return hasattr(self, 'team_link') and self.team_link.team.has_user(user) def annotators(self): """Annotators connected to this project including team members""" from users.models import User member_ids = self.get_member_ids() team_members = User.objects.filter(id__in=member_ids).order_by('email') # add members from invited projects project_member_ids = self.members.values_list('user__id', flat=True) project_members = User.objects.filter(id__in=project_member_ids) annotators = team_members | project_members # set annotator.team_member=True if annotator is not an invited user annotators = annotators.annotate( team_member=Case( When(id__in=project_member_ids, then=Value(False)), default=Value(True), output_field=BooleanField(), ) ) return annotators def annotators_with_annotations(self, min_count=500): """Annotators with annotation number > min_number :param min_count: minimal annotation number to leave an annotators :return: filtered annotators """ annotators = self.annotators() q = Q(annotations__project=self) & Q_task_finished_annotations & Q(annotations__ground_truth=False) annotators = annotators.annotate(annotation_count=Count('annotations', filter=q, distinct=True)) return annotators.filter(annotation_count__gte=min_count) def labeled_tasks(self): return self.tasks.filter(is_labeled=True) def has_annotations(self): from tasks.models import Annotation # prevent cycling imports return Annotation.objects.filter(Q(project=self) & Q(ground_truth=False)).count() > 0 # [TODO] this should be a template tag or something like this @property def label_config_line(self): c = self.label_config return config_line_stipped(c) def get_sample_task(self, label_config=None): config = label_config or self.label_config task, _, _ = get_sample_task(config) return task def eta(self): """ Show eta for project to be finished eta = avg task annotations finish time * remain annotations task has overlap = amount of task annotations to consider as finished (is_labeled) remain annotations = sum ( task annotations to be done to fulfill each unfinished task overlap) :return: time in seconds """ # finished tasks * overlap finished_tasks = Task.objects.filter(project=self.id, is_labeled=True) # one could make more than need to overlap min_n_finished_annotations = sum([ft.overlap for ft in finished_tasks]) annotations_unfinished_tasks = Annotation.objects.filter( project=self.id, task__is_labeled=False, ground_truth=False, result__isnull=False ).count() # get minimum remain annotations total_annotations_needed = self.get_total_possible_count annotations_remain = total_annotations_needed - min_n_finished_annotations - annotations_unfinished_tasks # get average time of all finished TC finished_annotations = Annotation.objects.filter( Q(project=self.id) & Q(ground_truth=False), result__isnull=False ).values('lead_time') avg_lead_time = finished_annotations.aggregate(avg_lead_time=Avg('lead_time'))['avg_lead_time'] if avg_lead_time is None: return None return avg_lead_time * annotations_remain def finished(self): return not self.tasks.filter(is_labeled=False).exists() def annotations_lead_time(self): annotations = Annotation.objects.filter(Q(project=self.id) & Q(ground_truth=False)) return annotations.aggregate(avg_lead_time=Avg('lead_time'))['avg_lead_time'] @staticmethod def django_settings(): return settings @staticmethod def max_tasks_file_size(): return settings.TASKS_MAX_FILE_SIZE def get_parsed_config(self): if self.parsed_label_config is None: try: self.parsed_label_config = parse_config(self.label_config) self.save(update_fields=['parsed_label_config']) except Exception as e: logger.error(f'Error parsing label config for project {self.id}: {e}', exc_info=True) return {} return self.parsed_label_config def get_counters(self): """Method to get extra counters data from Manager method with_counts()""" result = {} for field in ProjectManager.COUNTER_FIELDS: value = getattr(self, field, None) if value is not None: result[field] = value return result def get_model_versions(self, with_counters=False, extended=False, limit=None): """ Get model_versions from project predictions. :param with_counters: Boolean, if True, counts predictions for each version. Default is False. :param extended: Boolean, if True, returns additional information. Default is False. :return: Dict or list containing model versions and their count predictions. """ predictions = Prediction.objects.filter(project=self) model_versions = ( predictions.values('model_version') .annotate(count=Count('model_version'), latest=Max('created_at')) .order_by('-latest') ) if extended: return list(model_versions) else: if limit: model_versions = model_versions[:limit] output = {r['model_version']: r['count'] for r in model_versions} # Ensure that self.model_version exists in output if self.model_version and self.model_version not in output: if limit and len(output) < limit: output[self.model_version] = 0 elif not limit: output[self.model_version] = 0 # Return as per requirement return output if with_counters else list(output.keys()) def get_ml_backends(self, *args, **kwargs): from ml.models import MLBackend return MLBackend.objects.filter(project=self, **kwargs) def has_ml_backend(self, *args, **kwargs): return self.get_ml_backends(**kwargs).exists() @property def ml_backend_in_model_version(self): """ Returns True if the ml_backend title matches this model version. If this model version is not set, Returns False """ return bool(self.model_version and self.has_ml_backend(title=self.model_version)) def update_ml_backends_state(self): """ Updates the state of all ml_backends associated with this instance. :return: List of updated MLBackend instances. """ ml_backends = self.get_ml_backends() for mlb in ml_backends: mlb.update_state() return ml_backends def get_active_ml_backends(self): from ml.models import MLBackendState return self.get_ml_backends(state=MLBackendState.CONNECTED) @cached_property def get_all_import_storage_objects(self): from io_storages.models import get_storage_classes storage_objects = [] for storage_class in get_storage_classes('import'): storage_objects += list(storage_class.objects.filter(project=self)) return storage_objects @cached_property def get_all_export_storage_objects(self): from io_storages.models import get_storage_classes storage_objects = [] for storage_class in get_storage_classes('export'): storage_objects += list(storage_class.objects.filter(project=self)) return storage_objects @cached_property def multipage_labeling_values(self): """ Check if the project's label config contains an Image tag with a valueList attribute, which indicates multipage labeling. """ config = self.get_parsed_config() values = [] for tag in config.values(): for object_tag in tag.get('inputs', []): if object_tag.get('type') == 'Image': if object_tag.get('valueList') is not None: values.append(object_tag.get('valueList')) return values def resolve_storage_uri(self, url: str) -> Optional[Mapping[str, Any]]: from io_storages.functions import get_storage_by_url storage_objects = self.get_all_import_storage_objects storage = get_storage_by_url(url, storage_objects) if storage: return { 'url': storage.generate_http_url(url), 'presign_ttl': storage.presign_ttl, } def _update_tasks_counters_and_is_labeled(self, task_ids, from_scratch=True): """ Update tasks counters and is_labeled in batches of size settings.BATCH_SIZE. :param task_ids: List of task ids to be updated :param from_scratch: Skip calculated tasks :return: Count of updated tasks """ from tasks.functions import update_tasks_counters num_tasks_updated = 0 page_idx = 0 while task_ids_slice := task_ids[page_idx * settings.BATCH_SIZE : (page_idx + 1) * settings.BATCH_SIZE]: with transaction.atomic(): # If counters are updated, is_labeled must be updated as well. Hence, if either fails, we # will roll back. queryset = make_queryset_from_iterable(task_ids_slice) num_tasks_updated += update_tasks_counters(queryset, from_scratch) bulk_update_stats_project_tasks(queryset, self) page_idx += 1 return num_tasks_updated def _update_tasks_counters_and_task_states( self, queryset, maximum_annotations_changed, overlap_cohort_percentage_changed, tasks_number_changed, from_scratch=True, recalculate_stats_counts: Optional[Mapping[str, int]] = None, ): """ Update tasks counters and update tasks states (rearrange and/or is_labeled) :param queryset: Tasks to update queryset :param from_scratch: Skip calculated tasks :return: Count of updated tasks """ from tasks.functions import update_tasks_counters queryset = make_queryset_from_iterable(queryset) objs = update_tasks_counters(queryset, from_scratch) self._update_tasks_states(maximum_annotations_changed, overlap_cohort_percentage_changed, tasks_number_changed) if recalculate_all_stats and recalculate_stats_counts: recalculate_all_stats(self.id, **recalculate_stats_counts) return objs def get_max_annotation_result_size(self): """Get the maximum annotation result size for this project""" # For SQLite, return 0 (no annotations to consider) if settings.DJANGO_DB == settings.DJANGO_DB_SQLITE: return 0 # Using raw SQL to ensure we use the specific index annotation_proj_result_octlen_idx # which is optimized for this query pattern (project_id, octet_length DESC) with connection.cursor() as cursor: cursor.execute( """ SELECT id, octet_length(result::text) AS bytes FROM task_completion WHERE project_id = %s ORDER BY octet_length(result::text) DESC LIMIT 1 """, [self.id], ) row = cursor.fetchone() if not row or not row[1]: return 0 return row[1] def get_task_batch_size(self): """Calculate optimal batch size based on task data size and annotation result size""" # For SQLite, use default MAX_TASK_BATCH_SIZE if settings.DJANGO_DB == settings.DJANGO_DB_SQLITE: return settings.MAX_TASK_BATCH_SIZE # Get maximum task data size using the optimized index max_task_size = 0 with connection.cursor() as cursor: cursor.execute( """ SELECT id, octet_length(data::text) AS bytes FROM task WHERE project_id = %s ORDER BY octet_length(data::text) DESC LIMIT 1 """, [self.id], ) row = cursor.fetchone() if row and row[1]: max_task_size = row[1] # Get maximum annotation result size using the new optimized index max_annotation_size = self.get_max_annotation_result_size() # Use the larger of the two sizes for batch calculation max_data_size = max(max_task_size, max_annotation_size) if max_data_size == 0: return settings.MAX_TASK_BATCH_SIZE batch_size = settings.TASK_DATA_PER_BATCH // max_data_size if batch_size > settings.MAX_TASK_BATCH_SIZE: batch_size = settings.MAX_TASK_BATCH_SIZE elif batch_size < 1: batch_size = 1 logger.info( f'Project {self.id}: max task size {max_task_size} bytes, ' f'max annotation size {max_annotation_size} bytes, ' f'calculated batch size {batch_size}' ) return batch_size def __str__(self): return f'{self.title} (id={self.id})' or _('Business number %d') % self.pk if connection.vendor == 'postgresql': search_vector = GeneratedField( expression=RawSQL( "setweight(to_tsvector('english', COALESCE(CAST(id AS TEXT), '')), 'A') || " "setweight(to_tsvector('english', COALESCE(title, '')), 'B') || " "setweight(to_tsvector('english', COALESCE(SUBSTRING(description, 1, 250000), '')), 'C')", params=[], output_field=SearchVectorField(), ), output_field=SearchVectorField(), db_persist=True, ) else: search_vector = models.TextField(null=True, blank=True) class Meta: db_table = 'project' indexes = [ models.Index(fields=['pinned_at', 'created_at']), ] # This index is added with an async migration # indexes.append(GinIndex(fields=['search_vector'], name='project_search_vector_idx')) class ProjectOnboardingSteps(models.Model): """ """ DATA_UPLOAD = 'DU' CONF_SETTINGS = 'CF' PUBLISH = 'PB' INVITE_EXPERTS = 'IE' STEPS_CHOICES = ( (DATA_UPLOAD, 'Import your data'), (CONF_SETTINGS, 'Configure settings'), (PUBLISH, 'Publish project'), (INVITE_EXPERTS, 'Invite collaborators'), ) code = models.CharField(max_length=2, choices=STEPS_CHOICES, null=True) title = models.CharField(_('title'), max_length=1000, null=False) description = models.TextField(_('description'), null=False) order = models.IntegerField(default=0) created_at = models.DateTimeField(_('created at'), auto_now_add=True) updated_at = models.DateTimeField(_('updated at'), auto_now=True) class Meta: ordering = ['order'] class ProjectOnboarding(models.Model): """ """ step = models.ForeignKey(ProjectOnboardingSteps, on_delete=models.CASCADE, related_name='po_through') project = models.ForeignKey(Project, on_delete=models.CASCADE) finished = models.BooleanField(default=False) created_at = models.DateTimeField(_('created at'), auto_now_add=True) updated_at = models.DateTimeField(_('updated at'), auto_now=True) def save(self, *args, **kwargs): super(ProjectOnboarding, self).save(*args, **kwargs) if ProjectOnboarding.objects.filter(project=self.project, finished=True).count() == 4: self.project.skip_onboarding = True self.project.save(recalc=False) class LabelStreamHistory(models.Model): user = models.ForeignKey( settings.AUTH_USER_MODEL, on_delete=models.CASCADE, related_name='histories', help_text='User ID' ) project = models.ForeignKey(Project, on_delete=models.CASCADE, related_name='histories', help_text='Project ID') data = models.JSONField(default=list) class Meta: constraints = [models.UniqueConstraint(fields=['user', 'project'], name='unique_history')] class ProjectMember(models.Model): user = models.ForeignKey( settings.AUTH_USER_MODEL, on_delete=models.CASCADE, related_name='project_memberships', help_text='User ID' ) project = models.ForeignKey(Project, on_delete=models.CASCADE, related_name='members', help_text='Project ID') enabled = models.BooleanField(default=True, help_text='Project member is enabled') created_at = models.DateTimeField(_('created at'), auto_now_add=True) updated_at = models.DateTimeField(_('updated at'), auto_now=True) class ProjectSummary(models.Model): project = AutoOneToOneField(Project, primary_key=True, on_delete=models.CASCADE, related_name='summary') created_at = models.DateTimeField(_('created at'), auto_now_add=True, help_text='Creation time') # { col1: task_count_with_col1, col2: task_count_with_col2 } all_data_columns = JSONField( _('all data columns'), null=True, default=dict, help_text='All data columns found in imported tasks' ) # [col1, col2] common_data_columns = JSONField( _('common data columns'), null=True, default=list, help_text='Common data columns found across imported tasks' ) # { (from_name, to_name, type): annotation_count } created_annotations = JSONField( _('created annotations'), null=True, default=dict, help_text='Unique annotation types identified by tuple (from_name, to_name, type)', ) # { from_name: {label1: task_count_with_label1, label2: task_count_with_label2} } created_labels = JSONField(_('created labels'), null=True, default=dict, help_text='Unique labels') created_labels_drafts = JSONField( _('created labels in drafts'), null=True, default=dict, help_text='Unique drafts labels' ) def has_permission(self, user): user.project = self.project # link for activity log return self.project.has_permission(user) def reset(self, tasks_data_based=True): if tasks_data_based: self.all_data_columns = {} self.common_data_columns = [] self.created_annotations = {} self.created_labels = {} self.created_labels_drafts = {} self.save() def update_data_columns(self, tasks): common_data_columns = set() all_data_columns = dict(self.all_data_columns) for task in tasks: try: task_data = get_attr_or_item(task, 'data') except KeyError: task_data = task task_data_keys = task_data.keys() for column in task_data_keys: all_data_columns[column] = all_data_columns.get(column, 0) + 1 if not common_data_columns: common_data_columns = set(task_data_keys) else: common_data_columns &= set(task_data_keys) self.all_data_columns = all_data_columns if not self.common_data_columns: self.common_data_columns = list(sorted(common_data_columns)) else: self.common_data_columns = list(sorted(set(self.common_data_columns) & common_data_columns)) self.save(update_fields=['all_data_columns', 'common_data_columns']) def remove_data_columns(self, tasks): all_data_columns = dict(self.all_data_columns) keys_to_remove = [] for task in tasks: task_data = get_attr_or_item(task, 'data') for key in task_data.keys(): if key in all_data_columns: all_data_columns[key] -= 1 if all_data_columns[key] == 0: keys_to_remove.append(key) all_data_columns.pop(key) self.all_data_columns = all_data_columns if keys_to_remove: common_data_columns = list(self.common_data_columns) for key in keys_to_remove: if key in common_data_columns: common_data_columns.remove(key) self.common_data_columns = common_data_columns self.save( update_fields=[ 'all_data_columns', 'common_data_columns', ] ) def _get_annotation_key(self, result): result_type = result.get('type', None) if result_type in ('relation', 'pairwise', None): return None if 'from_name' not in result or 'to_name' not in result: logger.error( 'Unexpected annotation.result format: "from_name" or "to_name" not found', extra={'sentry_skip': True}, ) return None result_from_name = result['from_name'] key = get_annotation_tuple(result_from_name, result['to_name'], result_type or '') return key def _get_labels(self, result): result_type = result.get('type') # DEV-1990 Workaround for Video labels as there are no labels in VideoRectangle tag if result_type in ['videorectangle']: result_type = 'labels' result_value = result['value'].get(result_type) if not result_value or not isinstance(result_value, list) or result_type == 'text': # Non-list values are not labels. TextArea list values (texts) are not labels too. return [] # Labels are stored in list labels = [] for label in result_value: if result_type == 'taxonomy' and isinstance(label, list): for label_ in label: labels.append(str(label_)) else: labels.append(str(label)) return labels def update_created_annotations_and_labels(self, annotations): created_annotations = dict(self.created_annotations) labels = dict(self.created_labels) for annotation in annotations: results = get_attr_or_item(annotation, 'result') or [] if not isinstance(results, list): continue for result in results: # aggregate annotation types key = self._get_annotation_key(result) if not key: continue created_annotations[key] = created_annotations.get(key, 0) + 1 from_name = result['from_name'] # aggregate labels if from_name not in self.created_labels: labels[from_name] = dict() for label in self._get_labels(result): labels[from_name][label] = labels[from_name].get(label, 0) + 1 logger.debug(f'summary.created_annotations = {created_annotations}') logger.debug(f'summary.created_labels = {labels}') self.created_annotations = created_annotations self.created_labels = labels self.save(update_fields=['created_annotations', 'created_labels']) def remove_created_annotations_and_labels(self, annotations): # we are going to remove all annotations, so we'll reset the corresponding fields on the summary remove_all_annotations = self.project.annotations.count() == len(annotations) created_annotations, created_labels = ( ({}, {}) if remove_all_annotations else (dict(self.created_annotations), dict(self.created_labels)) ) if not remove_all_annotations: for annotation in annotations: results = get_attr_or_item(annotation, 'result') or [] if not isinstance(results, list): continue for result in results: # reduce annotation counters key = self._get_annotation_key(result) if key in created_annotations: created_annotations[key] -= 1 if created_annotations[key] == 0: created_annotations.pop(key) # reduce labels counters from_name = result.get('from_name', None) if from_name not in created_labels: continue for label in self._get_labels(result): label = str(label) if label in created_labels[from_name]: created_labels[from_name][label] -= 1 if created_labels[from_name][label] == 0: created_labels[from_name].pop(label) if not created_labels[from_name]: created_labels.pop(from_name) logger.debug(f'summary.created_annotations = {created_annotations}') logger.debug(f'summary.created_labels = {created_labels}') self.created_annotations = created_annotations self.created_labels = created_labels self.save(update_fields=['created_annotations', 'created_labels']) def update_created_labels_drafts(self, drafts): labels = dict(self.created_labels_drafts) for draft in drafts: results = get_attr_or_item(draft, 'result') or [] if not isinstance(results, list): continue for result in results: if 'from_name' not in result: continue from_name = result['from_name'] # aggregate labels if from_name not in self.created_labels_drafts: labels[from_name] = dict() for label in self._get_labels(result): labels[from_name][label] = labels[from_name].get(label, 0) + 1 logger.debug(f'update summary.created_labels_drafts = {labels}') self.created_labels_drafts = labels self.save(update_fields=['created_labels_drafts']) def remove_created_drafts_and_labels(self, drafts): # we are going to remove all drafts, so we'll reset the corresponding field on the summary remove_all_drafts = AnnotationDraft.objects.filter(task__project=self.project).count() == len(drafts) labels = {} if remove_all_drafts else dict(self.created_labels_drafts) if not remove_all_drafts: for draft in drafts: results = get_attr_or_item(draft, 'result') or [] if not isinstance(results, list): continue for result in results: # reduce labels counters from_name = result.get('from_name', None) if from_name not in labels: continue for label in self._get_labels(result): label = str(label) if label in labels[from_name]: labels[from_name][label] -= 1 if labels[from_name][label] == 0: labels[from_name].pop(label) if not labels[from_name]: labels.pop(from_name) logger.debug(f'summary.created_labels_drafts = {labels}') self.created_labels_drafts = labels self.save(update_fields=['created_labels_drafts']) class ProjectImport(models.Model): class Status(models.TextChoices): CREATED = 'created', _('Created') IN_PROGRESS = 'in_progress', _('In progress') FAILED = 'failed', _('Failed') COMPLETED = 'completed', _('Completed') project = models.ForeignKey('projects.Project', null=True, related_name='imports', on_delete=models.CASCADE) preannotated_from_fields = models.JSONField(null=True, blank=True) commit_to_project = models.BooleanField(default=False) return_task_ids = models.BooleanField(default=False) status = models.CharField(max_length=64, choices=Status.choices, default=Status.CREATED) url = models.CharField(max_length=2048, null=True, blank=True) traceback = models.TextField(null=True, blank=True) error = models.TextField(null=True, blank=True) created_at = models.DateTimeField(_('created at'), null=True, auto_now_add=True, help_text='Creation time') updated_at = models.DateTimeField(_('updated at'), null=True, auto_now_add=True, help_text='Updated time') finished_at = models.DateTimeField(_('finished at'), help_text='Complete or fail time', null=True, default=None) task_count = models.IntegerField(default=0) annotation_count = models.IntegerField(default=0) prediction_count = models.IntegerField(default=0) duration = models.IntegerField(default=0) file_upload_ids = models.JSONField(default=list) could_be_tasks_list = models.BooleanField(default=False) found_formats = models.JSONField(default=list) data_columns = models.JSONField(default=list) tasks = models.JSONField(blank=True, null=True) task_ids = models.JSONField(default=list) def has_permission(self, user): return self.project.has_permission(user) class ProjectReimport(models.Model): class Status(models.TextChoices): CREATED = 'created', _('Created') IN_PROGRESS = 'in_progress', _('In progress') FAILED = 'failed', _('Failed') COMPLETED = 'completed', _('Completed') project = models.ForeignKey('projects.Project', null=True, related_name='reimports', on_delete=models.CASCADE) status = models.CharField(max_length=64, choices=Status.choices, default=Status.CREATED) error = models.TextField(null=True, blank=True) task_count = models.IntegerField(default=0) annotation_count = models.IntegerField(default=0) prediction_count = models.IntegerField(default=0) duration = models.IntegerField(default=0) file_upload_ids = models.JSONField(default=list) files_as_tasks_list = models.BooleanField(default=False) found_formats = models.JSONField(default=list) data_columns = models.JSONField(default=list) traceback = models.TextField(null=True, blank=True) def has_permission(self, user): return self.project.has_permission(user)