import json import boto3 import pytest from io_storages.models import S3ImportStorage from moto import mock_s3 from projects.tests.factories import ProjectFactory from rest_framework.test import APIClient from tests.conftest import set_feature_flag_envvar # noqa: F401 @pytest.mark.django_db class TestStoragePredictionValidation: """Test prediction validation in cloud storage imports.""" @pytest.fixture def project(self): """Create a project with a label config for prediction validation.""" return ProjectFactory( label_config=""" """ ) @pytest.fixture def api_client(self): """Create API client for testing.""" return APIClient() def test_storage_import_with_valid_prediction(self, project, api_client, set_feature_flag_envvar): """Test that storage import accepts valid predictions.""" # Setup API client api_client.force_authenticate(user=project.created_by) # Create valid task data with prediction valid_task_data = { 'data': {'text': 'This is a positive review'}, 'predictions': [ { 'result': [ { 'from_name': 'sentiment', 'to_name': 'text', 'type': 'choices', 'value': {'choices': ['positive']}, } ], 'score': 0.95, 'model_version': 'v1.0', } ], } with mock_s3(): # Setup S3 bucket and test data s3 = boto3.client('s3', region_name='us-east-1') bucket_name = 'pytest-s3-prediction-validation' s3.create_bucket(Bucket=bucket_name) # Put valid test data into S3 s3.put_object(Bucket=bucket_name, Key='valid_prediction.json', Body=json.dumps([valid_task_data])) # Create storage and sync storage = S3ImportStorage( project=project, bucket=bucket_name, aws_access_key_id='example', aws_secret_access_key='example', use_blob_urls=False, ) storage.save() storage.sync() # Verify task was created tasks_response = api_client.get(f'/api/tasks?project={project.id}') assert tasks_response.status_code == 200 tasks = tasks_response.json()['tasks'] assert len(tasks) == 1 # Verify prediction was created predictions_response = api_client.get(f'/api/predictions?task={tasks[0]["id"]}') assert predictions_response.status_code == 200 predictions = predictions_response.json() assert len(predictions) == 1 def test_storage_import_with_invalid_prediction(self, project, api_client, set_feature_flag_envvar): """Test that storage import rejects invalid predictions.""" # Setup API client api_client.force_authenticate(user=project.created_by) # Create invalid task data with prediction (wrong from_name) invalid_task_data = { 'data': {'text': 'This is a positive review'}, 'predictions': [ { 'result': [ { 'from_name': 'nonexistent_tag', # Invalid from_name 'to_name': 'text', 'type': 'choices', 'value': {'choices': ['positive']}, } ], 'score': 0.95, 'model_version': 'v1.0', } ], } with mock_s3(): # Setup S3 bucket and test data s3 = boto3.client('s3', region_name='us-east-1') bucket_name = 'pytest-s3-prediction-validation' s3.create_bucket(Bucket=bucket_name) # Put invalid test data into S3 s3.put_object(Bucket=bucket_name, Key='invalid_prediction.json', Body=json.dumps([invalid_task_data])) # Create storage and sync storage = S3ImportStorage( project=project, bucket=bucket_name, aws_access_key_id='example', aws_secret_access_key='example', use_blob_urls=False, ) storage.save() storage.sync() # Verify task was NOT created due to validation failure tasks_response = api_client.get(f'/api/tasks?project={project.id}') assert tasks_response.status_code == 200 tasks = tasks_response.json()['tasks'] assert len(tasks) == 0 # No tasks should be created when predictions are invalid