Bin
2025-12-17 21f0498f62ada55651f4d232327e15fc47f498b1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import json
 
import boto3
import pytest
from io_storages.models import S3ImportStorage
from moto import mock_s3
from projects.tests.factories import ProjectFactory
from rest_framework.test import APIClient
from tests.conftest import set_feature_flag_envvar  # noqa: F401
 
 
@pytest.mark.django_db
class TestStoragePredictionValidation:
    """Test prediction validation in cloud storage imports."""
 
    @pytest.fixture
    def project(self):
        """Create a project with a label config for prediction validation."""
        return ProjectFactory(
            label_config="""
            <View>
              <Text name="text" value="$text"/>
              <Choices name="sentiment" toName="text">
                <Choice value="positive"/>
                <Choice value="negative"/>
              </Choices>
            </View>
            """
        )
 
    @pytest.fixture
    def api_client(self):
        """Create API client for testing."""
        return APIClient()
 
    def test_storage_import_with_valid_prediction(self, project, api_client, set_feature_flag_envvar):
        """Test that storage import accepts valid predictions."""
        # Setup API client
        api_client.force_authenticate(user=project.created_by)
 
        # Create valid task data with prediction
        valid_task_data = {
            'data': {'text': 'This is a positive review'},
            'predictions': [
                {
                    'result': [
                        {
                            'from_name': 'sentiment',
                            'to_name': 'text',
                            'type': 'choices',
                            'value': {'choices': ['positive']},
                        }
                    ],
                    'score': 0.95,
                    'model_version': 'v1.0',
                }
            ],
        }
 
        with mock_s3():
            # Setup S3 bucket and test data
            s3 = boto3.client('s3', region_name='us-east-1')
            bucket_name = 'pytest-s3-prediction-validation'
            s3.create_bucket(Bucket=bucket_name)
 
            # Put valid test data into S3
            s3.put_object(Bucket=bucket_name, Key='valid_prediction.json', Body=json.dumps([valid_task_data]))
 
            # Create storage and sync
            storage = S3ImportStorage(
                project=project,
                bucket=bucket_name,
                aws_access_key_id='example',
                aws_secret_access_key='example',
                use_blob_urls=False,
            )
            storage.save()
            storage.sync()
 
            # Verify task was created
            tasks_response = api_client.get(f'/api/tasks?project={project.id}')
            assert tasks_response.status_code == 200
            tasks = tasks_response.json()['tasks']
            assert len(tasks) == 1
 
            # Verify prediction was created
            predictions_response = api_client.get(f'/api/predictions?task={tasks[0]["id"]}')
            assert predictions_response.status_code == 200
            predictions = predictions_response.json()
            assert len(predictions) == 1
 
    def test_storage_import_with_invalid_prediction(self, project, api_client, set_feature_flag_envvar):
        """Test that storage import rejects invalid predictions."""
        # Setup API client
        api_client.force_authenticate(user=project.created_by)
 
        # Create invalid task data with prediction (wrong from_name)
        invalid_task_data = {
            'data': {'text': 'This is a positive review'},
            'predictions': [
                {
                    'result': [
                        {
                            'from_name': 'nonexistent_tag',  # Invalid from_name
                            'to_name': 'text',
                            'type': 'choices',
                            'value': {'choices': ['positive']},
                        }
                    ],
                    'score': 0.95,
                    'model_version': 'v1.0',
                }
            ],
        }
 
        with mock_s3():
            # Setup S3 bucket and test data
            s3 = boto3.client('s3', region_name='us-east-1')
            bucket_name = 'pytest-s3-prediction-validation'
            s3.create_bucket(Bucket=bucket_name)
 
            # Put invalid test data into S3
            s3.put_object(Bucket=bucket_name, Key='invalid_prediction.json', Body=json.dumps([invalid_task_data]))
 
            # Create storage and sync
            storage = S3ImportStorage(
                project=project,
                bucket=bucket_name,
                aws_access_key_id='example',
                aws_secret_access_key='example',
                use_blob_urls=False,
            )
            storage.save()
            storage.sync()
 
            # Verify task was NOT created due to validation failure
            tasks_response = api_client.get(f'/api/tasks?project={project.id}')
            assert tasks_response.status_code == 200
            tasks = tasks_response.json()['tasks']
            assert len(tasks) == 0  # No tasks should be created when predictions are invalid