label-studio.git

"""This file and its contents are licensed under the Apache License 2.0. Please see the included NOTICE for copyright information and LICENSE for a copy of the license.
"""
import bleach
from constants import SAFE_HTML_ATTRIBUTES, SAFE_HTML_TAGS
from django.db.models import Q
from fsm.serializer_fields import FSMStateField
from label_studio_sdk.label_interface import LabelInterface
from label_studio_sdk.label_interface.control_tags import (
    BrushLabelsTag,
    BrushTag,
    ChoicesTag,
    DateTimeTag,
    EllipseLabelsTag,
    EllipseTag,
    HyperTextLabelsTag,
    KeyPointLabelsTag,
    KeyPointTag,
    LabelsTag,
    NumberTag,
    ParagraphLabelsTag,
    PolygonLabelsTag,
    PolygonTag,
    RatingTag,
    RectangleLabelsTag,
    RectangleTag,
    TaxonomyTag,
    TextAreaTag,
    TimeSeriesLabelsTag,
    VideoRectangleTag,
)
from projects.models import Project, ProjectImport, ProjectOnboarding, ProjectReimport, ProjectSummary
from rest_flex_fields import FlexFieldsModelSerializer
from rest_framework import serializers
from rest_framework.serializers import SerializerMethodField
from tasks.models import Task
from users.serializers import UserSimpleSerializer
 
 
class CreatedByFromContext:
    requires_context = True
 
    def __call__(self, serializer_field):
        return serializer_field.context.get('created_by')
 
 
class ProjectSerializer(FlexFieldsModelSerializer):
    """Serializer get numbers from project queryset annotation,
    make sure, that you use correct one(Project.objects.with_counts())
    """
 
    task_number = serializers.IntegerField(default=None, read_only=True, help_text='Total task number in project')
    total_annotations_number = serializers.IntegerField(
        default=None,
        read_only=True,
        help_text='Total annotations number in project including '
        'skipped_annotations_number and ground_truth_number.',
    )
    total_predictions_number = serializers.IntegerField(
        default=None,
        read_only=True,
        help_text='Total predictions number in project including '
        'skipped_annotations_number, ground_truth_number, and '
        'useful_annotation_number.',
    )
    useful_annotation_number = serializers.IntegerField(
        default=None,
        read_only=True,
        help_text='Useful annotation number in project not including '
        'skipped_annotations_number and ground_truth_number. '
        'Total annotations = annotation_number + '
        'skipped_annotations_number + ground_truth_number',
    )
    ground_truth_number = serializers.IntegerField(
        default=None, read_only=True, help_text='Honeypot annotation number in project'
    )
    skipped_annotations_number = serializers.IntegerField(
        default=None, read_only=True, help_text='Skipped by collaborators annotation number in project'
    )
    num_tasks_with_annotations = serializers.IntegerField(
        default=None, read_only=True, help_text='Tasks with annotations count'
    )
 
    created_by = UserSimpleSerializer(default=CreatedByFromContext(), help_text='Project owner')
 
    parsed_label_config = serializers.JSONField(
        default=None, read_only=True, help_text='JSON-formatted labeling configuration'
    )
    start_training_on_annotation_update = SerializerMethodField(
        default=None, read_only=False, help_text='Start model training after any annotations are submitted or updated'
    )
    config_has_control_tags = SerializerMethodField(
        default=None, read_only=True, help_text='Flag to detect is project ready for labeling'
    )
    config_suitable_for_bulk_annotation = serializers.SerializerMethodField(
        default=None, read_only=True, help_text='Flag to detect is project ready for bulk annotation'
    )
    finished_task_number = serializers.IntegerField(default=None, read_only=True, help_text='Finished tasks')
 
    queue_total = serializers.SerializerMethodField()
    queue_done = serializers.SerializerMethodField()
    state = FSMStateField(read_only=True)  # FSM state - automatically uses annotation if present
 
    @property
    def user_id(self):
        try:
            return self.context['request'].user.id
        except KeyError:
            return next(iter(self.context['user_cache']))
 
    @staticmethod
    def get_config_has_control_tags(project) -> bool:
        return len(project.get_parsed_config()) > 0
 
    @staticmethod
    def get_config_suitable_for_bulk_annotation(project) -> bool:
        li = LabelInterface(project.label_config)
 
        # List of tags that should not be present
        disallowed_tags = [
            LabelsTag,
            BrushTag,
            BrushLabelsTag,
            EllipseTag,
            EllipseLabelsTag,
            KeyPointTag,
            KeyPointLabelsTag,
            PolygonTag,
            PolygonLabelsTag,
            RectangleTag,
            RectangleLabelsTag,
            HyperTextLabelsTag,
            ParagraphLabelsTag,
            TimeSeriesLabelsTag,
            VideoRectangleTag,
        ]
 
        # Return False if any disallowed tag is present
        for tag_class in disallowed_tags:
            if li.find_tags_by_class(tag_class):
                return False
 
        # Check perRegion/perItem for expanded list of tags, plus value="no" for Choices/Taxonomy
        allowed_tags_for_checks = [ChoicesTag, TaxonomyTag, DateTimeTag, NumberTag, RatingTag, TextAreaTag]
        for tag_class in allowed_tags_for_checks:
            tags = li.find_tags_by_class(tag_class)
            for tag in tags:
                per_region = tag.attr.get('perRegion', 'false').lower() == 'true'
                per_item = tag.attr.get('perItem', 'false').lower() == 'true'
                if per_region or per_item:
                    return False
                # For ChoicesTag and TaxonomyTag, the value attribute must not be set at all
                if tag_class in [ChoicesTag, TaxonomyTag]:
                    if 'value' in tag.attr:
                        return False
 
        # For TaxonomyTag, check labeling and apiUrl
        taxonomy_tags = li.find_tags_by_class(TaxonomyTag)
        for tag in taxonomy_tags:
            labeling = tag.attr.get('labeling', 'false').lower() == 'true'
            if labeling:
                return False
            api_url = tag.attr.get('apiUrl', None)
            if api_url is not None:
                return False
 
        # If all checks pass, return True
        return True
 
    @staticmethod
    def get_parsed_label_config(project):
        return project.get_parsed_config()
 
    def get_start_training_on_annotation_update(self, instance) -> bool:
        # FIXME: remake this logic with start_training_on_annotation_update
        return True if instance.min_annotations_to_start_training else False
 
    def to_internal_value(self, data):
        # FIXME: remake this logic with start_training_on_annotation_update
        initial_data = data
        data = super().to_internal_value(data)
 
        if 'start_training_on_annotation_update' in initial_data:
            data['min_annotations_to_start_training'] = int(initial_data['start_training_on_annotation_update'])
 
        if 'expert_instruction' in initial_data:
            data['expert_instruction'] = bleach.clean(
                initial_data['expert_instruction'], tags=SAFE_HTML_TAGS, attributes=SAFE_HTML_ATTRIBUTES
            )
 
        return data
 
    def validate_color(self, value):
        # color : "#FF4C25"
        if value.startswith('#') and len(value) == 7:
            try:
                int(value[1:], 16)
                return value
            except ValueError:
                pass
        raise serializers.ValidationError('Color must be in "#RRGGBB" format')
 
    class Meta:
        model = Project
        extra_kwargs = {
            'memberships': {'required': False},
            'title': {'required': False},
            'created_by': {'required': False},
        }
        fields = [
            'id',
            'title',
            'description',
            'label_config',
            'expert_instruction',
            'show_instruction',
            'show_skip_button',
            'enable_empty_annotation',
            'show_annotation_history',
            'organization',
            'color',
            'maximum_annotations',
            'is_published',
            'model_version',
            'is_draft',
            'created_by',
            'created_at',
            'min_annotations_to_start_training',
            'start_training_on_annotation_update',
            'show_collab_predictions',
            'num_tasks_with_annotations',
            'task_number',
            'useful_annotation_number',
            'ground_truth_number',
            'skipped_annotations_number',
            'total_annotations_number',
            'total_predictions_number',
            'sampling',
            'show_ground_truth_first',
            'show_overlap_first',
            'overlap_cohort_percentage',
            'task_data_login',
            'task_data_password',
            'control_weights',
            'parsed_label_config',
            'evaluate_predictions_automatically',
            'config_has_control_tags',
            'skip_queue',
            'reveal_preannotations_interactively',
            'pinned_at',
            'finished_task_number',
            'queue_total',
            'queue_done',
            'config_suitable_for_bulk_annotation',
            'state',
        ]
 
    def validate_label_config(self, value):
        if self.instance is None:
            # No project created yet
            Project.validate_label_config(value)
        else:
            # Existing project is updated
            self.instance.validate_config(value)
        return value
 
    def validate_model_version(self, value):
        """Custom model_version validation"""
        p = self.instance
 
        # Only run the validation if model_version is about to change
        # and it contains a string
        if p is not None and p.model_version != value and value != '':
            # that model_version should either match live ml backend
            # or match version in predictions
 
            if p.ml_backends.filter(title=value).union(p.predictions.filter(project=p, model_version=value)).exists():
                return value
            else:
                raise serializers.ValidationError(
                    "Model version doesn't exist either as live model or as static predictions."
                )
 
        return value
 
    def update(self, instance, validated_data):
        if validated_data.get('show_collab_predictions') is False:
            instance.model_version = ''
 
        return super().update(instance, validated_data)
 
    def get_queue_total(self, project) -> int:
        remain = project.tasks.filter(
            Q(is_labeled=False) & ~Q(annotations__completed_by_id=self.user_id)
            | Q(annotations__completed_by_id=self.user_id)
        ).distinct()
        return remain.count()
 
    def get_queue_done(self, project) -> int:
        tasks_filter = {
            'project': project,
            'annotations__completed_by_id': self.user_id,
        }
 
        if project.skip_queue == project.SkipQueue.REQUEUE_FOR_ME:
            tasks_filter['annotations__was_cancelled'] = False
 
        already_done_tasks = Task.objects.filter(**tasks_filter)
        result = already_done_tasks.distinct().count()
 
        return result
 
 
class ProjectCountsSerializer(ProjectSerializer):
    class Meta:
        model = Project
        fields = [
            'id',
            'task_number',
            'finished_task_number',
            'total_predictions_number',
            'total_annotations_number',
            'num_tasks_with_annotations',
            'useful_annotation_number',
            'ground_truth_number',
            'skipped_annotations_number',
        ]
 
 
class ProjectOnboardingSerializer(serializers.ModelSerializer):
    class Meta:
        model = ProjectOnboarding
        fields = '__all__'
 
 
class ProjectLabelConfigSerializer(serializers.Serializer):
    label_config = serializers.CharField(help_text=Project.label_config.field.help_text)
 
    def validate_label_config(self, config):
        Project.validate_label_config(config)
        return config
 
 
class ProjectSummarySerializer(serializers.ModelSerializer):
    class Meta:
        model = ProjectSummary
        fields = '__all__'
 
 
class ProjectImportSerializer(serializers.ModelSerializer):
    class Meta:
        model = ProjectImport
        fields = [
            'id',
            'project',
            'preannotated_from_fields',
            'commit_to_project',
            'return_task_ids',
            'status',
            'url',
            'error',
            'created_at',
            'updated_at',
            'finished_at',
            'task_count',
            'annotation_count',
            'prediction_count',
            'duration',
            'file_upload_ids',
            'could_be_tasks_list',
            'found_formats',
            'data_columns',
            'tasks',
            'task_ids',
        ]
 
 
class ProjectReimportSerializer(serializers.ModelSerializer):
    class Meta:
        model = ProjectReimport
        fields = [
            'id',
            'project',
            'status',
            'error',
            'task_count',
            'annotation_count',
            'prediction_count',
            'duration',
            'file_upload_ids',
            'files_as_tasks_list',
            'found_formats',
            'data_columns',
        ]
 
 
class ProjectModelVersionExtendedSerializer(serializers.Serializer):
    model_version = serializers.CharField()
    count = serializers.IntegerField()
    latest = serializers.DateTimeField()
 
 
class ProjectModelVersionParamsSerializer(serializers.Serializer):
    extended = serializers.BooleanField(required=False, default=False)
    include_live_models = serializers.BooleanField(required=False, default=False)
    limit = serializers.IntegerField(required=False, default=None)
 
 
class GetFieldsSerializer(serializers.Serializer):
    include = serializers.CharField(
        required=False,
        help_text=(
            'Comma-separated list of count fields to include in the response to optimize performance. '
            'Available fields: task_number, finished_task_number, total_predictions_number, '
            'total_annotations_number, num_tasks_with_annotations, useful_annotation_number, '
            'ground_truth_number, skipped_annotations_number. If not specified, all count fields are included.'
        ),
    )
    filter = serializers.CharField(
        required=False,
        default='all',
        help_text=(
            "Filter projects by pinned status. Use 'pinned_only' to return only pinned projects, "
            "'exclude_pinned' to return only non-pinned projects, or 'all' to return all projects."
        ),
    )
    search = serializers.CharField(
        required=False, default=None, help_text='Search term for project title and description'
    )
 
    def validate_include(self, value):
        if value is not None:
            value = value.split(',')
        return value
 
    def validate_filter(self, value):
        if value in ['all', 'pinned_only', 'exclude_pinned']:
            return value