import contextlib import datetime import io import json import os import pathlib from core.utils.io import get_data_dir from core.utils.params import get_env from data_import.models import FileUpload from data_manager.models import Filter, FilterGroup, View from django.core.files.base import File from io_storages.azure_blob.models import AzureBlobExportStorage, AzureBlobImportStorage from io_storages.gcs.models import GCSExportStorage, GCSImportStorage from io_storages.redis.models import RedisExportStorage, RedisImportStorage from io_storages.s3.models import S3ExportStorage, S3ImportStorage from ml.models import MLBackend from tasks.models import Annotation, Prediction, Task @contextlib.contextmanager def suppress_autotime(model, fields): """allow to keep original created_at value for auto_now_add=True field""" _original_values = {} for field in model._meta.local_fields: if field.name in fields: _original_values[field.name] = {'auto_now': field.auto_now, 'auto_now_add': field.auto_now_add} field.auto_now = False field.auto_now_add = False try: yield finally: for field in model._meta.local_fields: if field.name in fields: field.auto_now = _original_values[field.name]['auto_now'] field.auto_now_add = _original_values[field.name]['auto_now_add'] def _migrate_tasks(project_path, project): """Migrate tasks from json file to database objects""" tasks_path = project_path / 'tasks.json' with io.open(os.path.abspath(tasks_path), encoding='utf-8') as t: tasks_data = json.load(t) for task_id, task_data in tasks_data.items(): task = Task.objects.create(data=task_data.get('data', {}), project=project) # migrate annotations annotations_path = project_path / 'completions' / '{}.json'.format(task_id) if annotations_path.exists(): with io.open(os.path.abspath(annotations_path), encoding='utf-8') as c: annotations_data = json.load(c) for annotation in annotations_data['completions']: task_annotation = Annotation( result=annotation['result'], task=task, lead_time=annotation['lead_time'], was_cancelled=annotation.get('was_cancelled', False), completed_by=project.created_by, ) with suppress_autotime(task_annotation, ['created_at']): task_annotation.created_at = datetime.datetime.fromtimestamp( annotation['created_at'], tz=datetime.datetime.now().astimezone().tzinfo ) task_annotation.save() # migrate predictions predictions_data = task_data.get('predictions', []) for prediction in predictions_data: task_prediction = Prediction( result=prediction['result'], task=task, score=prediction.get('score'), project=task.project, ) with suppress_autotime(task_prediction, ['created_at']): task_prediction.created_at = datetime.datetime.fromtimestamp( prediction['created_at'], tz=datetime.datetime.now().astimezone().tzinfo ) task_prediction.save() def _migrate_tabs(project_path, project): """Migrate tabs from tabs.json to Views table""" tabs_path = project_path / 'tabs.json' if tabs_path.exists(): with io.open(os.path.abspath(tabs_path), encoding='utf-8') as t: tabs_data = json.load(t) for tab in tabs_data['tabs']: view = View.objects.create(project=project) tab['id'] = view.id ordering = tab.pop('ordering', None) selected_items = tab.pop('selectedItems', None) # migrate filters filter_group = None filters = tab.pop('filters', None) if filters is not None: filter_group = FilterGroup.objects.create(conjunction=filters.get('conjunction', 'and')) if 'items' in filters: for f in filters['items']: view_filter = Filter.objects.create( **{ 'column': f.get('filter', ''), 'operator': f.get('operator', ''), 'type': f.get('type', ''), 'value': f.get('value', {}), } ) filter_group.filters.add(view_filter) hidden_columns = {'explore': [], 'labeling': []} hidden_columns_data = tab.pop('hiddenColumns', None) # apply naming change to tabs internal data if hidden_columns_data is not None: for c in hidden_columns_data.get('explore', []): hidden_columns['explore'].append(c.replace('completion', 'annotation')) for c in hidden_columns_data.get('labeling', []): hidden_columns['labeling'].append(c.replace('completion', 'annotation')) tab['hiddenColumns'] = hidden_columns view.data = tab view.ordering = ordering view.selected_items = selected_items view.filter_group = filter_group view.save() def _migrate_storages(project, config): """Migrate source and target storages from config.json to database""" # source storages migration source = config.get('source', None) if source: if source.get('type') == 'gcs': params = source.get('params', {}) GCSImportStorage.objects.create( project=project, bucket=source.get('path'), prefix=params.get('prefix'), regex_filter=params.get('regex'), use_blob_urls=params.get('use_blob_urls'), ) elif source.get('type') == 'azure-blob': params = source.get('params', {}) AzureBlobImportStorage.objects.create( project=project, container=source.get('path'), prefix=params.get('prefix'), regex_filter=params.get('regex'), use_blob_urls=params.get('use_blob_urls'), ) elif source.get('type') == 's3': params = source.get('params', {}) S3ImportStorage.objects.create( project=project, bucket=source.get('path'), prefix=params.get('prefix'), regex_filter=params.get('regex'), use_blob_urls=params.get('use_blob_urls'), region_name=params.get('region'), ) elif source.get('type') == 'redis': params = source.get('params', {}) RedisImportStorage.objects.create( project=project, path=source.get('path'), host=params.get('host'), port=params.get('port'), password=params.get('password'), db=params.get('db', 1), ) # target storages migration target = config.get('target', None) if target: if target.get('type') == 'gcs': params = target.get('params', {}) GCSExportStorage.objects.create( project=project, bucket=target.get('path'), prefix=params.get('prefix'), regex_filter=params.get('regex'), use_blob_urls=params.get('use_blob_urls'), ) elif target.get('type') == 'azure-blob': params = target.get('params', {}) AzureBlobExportStorage.objects.create( project=project, container=target.get('path'), prefix=params.get('prefix'), regex_filter=params.get('regex'), use_blob_urls=params.get('use_blob_urls'), ) elif target.get('type') == 's3': params = target.get('params', {}) S3ExportStorage.objects.create( project=project, bucket=target.get('path'), prefix=params.get('prefix'), regex_filter=params.get('regex'), use_blob_urls=params.get('use_blob_urls'), region_name=params.get('region'), ) elif target.get('type') == 'redis': params = target.get('params', {}) RedisExportStorage.objects.create( project=project, path=target.get('path'), host=params.get('host'), port=params.get('port'), password=params.get('password'), db=params.get('db', 1), ) def _migrate_ml_backends(project, config): """Migrate ml backend settings from config.json to database""" ml_backends = config.get('ml_backends', []) for ml_backend in ml_backends: MLBackend.objects.create(project=project, url=ml_backend.get('url'), title=ml_backend.get('name')) def _migrate_uploaded_files(project, project_path): """Migrate files uploaded by user""" source_upload_path = project_path / 'upload' if not source_upload_path.exists(): return target_upload_path = pathlib.Path(get_env('LABEL_STUDIO_BASE_DATA_DIR', get_data_dir())) / 'upload' if not target_upload_path.exists(): os.makedirs(str(target_upload_path), exist_ok=True) src_files = os.listdir(str(source_upload_path)) for file_name in src_files: full_file_name = os.path.join(str(source_upload_path), file_name) with open(full_file_name, 'rb') as f: FileUpload.objects.create(user=project.created_by, project=project, file=File(f, name=file_name)) def migrate_existing_project(project_path, project, config): """Migration projects from previous version of Label Studio""" _migrate_tasks(project_path, project) _migrate_tabs(project_path, project) _migrate_storages(project, config) _migrate_ml_backends(project, config) _migrate_uploaded_files(project, project_path)