Bin
2025-12-17 1d710f844b65d9bfdf986a71a3b924cd70598a41
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
"""This file and its contents are licensed under the Apache License 2.0. Please see the included NOTICE for
copyright information and LICENSE for a copy of the license.
"""
 
import os
from pathlib import Path
 
import pytest
from django.urls import reverse
from io_storages.localfiles.models import LocalFilesImportStorage
from projects.models import Project
 
 
def _create_storage(project, path):
    return LocalFilesImportStorage.objects.create(
        project=project,
        path=path,
        use_blob_urls=True,
    )
 
 
def _create_dataset_file(settings, tmp_path, filename, content):
    dataset_dir = tmp_path / 'test_upload_data'
    dataset_dir.mkdir(exist_ok=True)
    test_file = dataset_dir / filename
    test_file.write_text(content, encoding='utf-8')
    relative_path = test_file.relative_to(Path(settings.LOCAL_FILES_DOCUMENT_ROOT)).as_posix()
    return dataset_dir, relative_path, content.encode('utf-8')
 
 
@pytest.mark.django_db
def test_localfiles_data_allows_trailing_slash_in_storage_path(
    business_client,
    project_id,
    settings,
    tmp_path,
):
    """Ensure /data/local-files/ works when LocalFilesImportStorage.path has a trailing slash.
 
    This test validates step by step:
    - Creating a temporary LOCAL_FILES_DOCUMENT_ROOT with a nested dataset directory
    - Saving LocalFilesImportStorage.path with a trailing slash pointing to that dataset
    - Requesting a file from /data/local-files/?d= using a path relative to the document root
    - Verifying that the endpoint returns 200 instead of 404 and serves the file correctly
 
    Critical validation: users should be able to configure "Absolute local path" with or without a trailing
    slash in Local Storage settings without breaking media loading in the labeling UI.
    """
    # Setup: enable local files and point document root to a temporary directory
    settings.LOCAL_FILES_SERVING_ENABLED = True
    settings.LOCAL_FILES_DOCUMENT_ROOT = str(tmp_path)
 
    project = Project.objects.get(pk=project_id)
 
    dataset_dir = tmp_path / 'test_upload_data'
    dataset_dir.mkdir()
    test_file = dataset_dir / 'test_image.txt'
    test_file.write_text('content', encoding='utf-8')
 
    # Store path with a trailing slash to mirror common user configuration
    storage_path_with_slash = str(dataset_dir) + os.sep
    _create_storage(project, storage_path_with_slash)
 
    relative_path = test_file.relative_to(Path(settings.LOCAL_FILES_DOCUMENT_ROOT)).as_posix()
    url = reverse('storages:localfiles_data') + f'?d={relative_path}'
 
    response = business_client.get(url)
 
    assert response.status_code == 200
 
 
@pytest.mark.django_db
def test_localfiles_data_allows_backslash_paths(
    business_client,
    project_id,
    settings,
    tmp_path,
):
    """Ensure storages saved with Windows-style separators keep working."""
    settings.LOCAL_FILES_SERVING_ENABLED = True
    settings.LOCAL_FILES_DOCUMENT_ROOT = str(tmp_path)
 
    project = Project.objects.get(pk=project_id)
 
    dataset_dir = tmp_path / 'test_upload_data'
    dataset_dir.mkdir()
    test_file = dataset_dir / 'test_image.txt'
    test_file.write_text('content', encoding='utf-8')
 
    windows_style_path = str(dataset_dir).replace('/', '\\') + '\\'
    _create_storage(project, windows_style_path)
 
    relative_path = test_file.relative_to(Path(settings.LOCAL_FILES_DOCUMENT_ROOT)).as_posix()
    url = reverse('storages:localfiles_data') + f'?d={relative_path}'
 
    response = business_client.get(url)
 
    assert response.status_code == 200
 
 
@pytest.mark.django_db
def test_localfiles_data_sets_weak_etag_header(
    business_client,
    project_id,
    settings,
    tmp_path,
):
    """Verify first download emits weak ETag along with file payload.
 
    This test validates step by step:
    - Enabling local file serving and writing a sample file under the doc root
    - Linking the project to that directory via LocalFilesImportStorage
    - Requesting the file through /data/local-files and capturing the response
    - Confirming a 200 status, the presence of a weak ETag header, and that the streamed bytes match disk
 
    Critical validation: clients must receive a deterministic weak ETag so they
    can re-use cached media without hitting the server again.
    """
    settings.LOCAL_FILES_SERVING_ENABLED = True
    settings.LOCAL_FILES_DOCUMENT_ROOT = str(tmp_path)
 
    project = Project.objects.get(pk=project_id)
    dataset_dir, relative_path, expected_bytes = _create_dataset_file(
        settings=settings,
        tmp_path=tmp_path,
        filename='etag.txt',
        content='etag-content',
    )
    _create_storage(project, str(dataset_dir))
 
    url = reverse('storages:localfiles_data') + f'?d={relative_path}'
    response = business_client.get(url)
 
    body = b''.join(response.streaming_content)
    assert response.status_code == 200
    assert response.has_header('ETag')
    assert response['ETag'].startswith('W/"')
    assert body == expected_bytes
 
 
@pytest.mark.django_db
def test_localfiles_data_returns_not_modified_for_matching_etag(
    business_client,
    project_id,
    settings,
    tmp_path,
):
    """Ensure cached clients receive 304 when sending a matching If-None-Match.
 
    This test validates step by step:
    - Serving a file once to obtain the server-generated weak ETag
    - Issuing a follow-up request that includes the same ETag in If-None-Match
    - Verifying the server returns 304 Not Modified with the original ETag header
 
    Critical validation: prevents redundant file reads and bandwidth usage when
    assets remain unchanged.
    """
    settings.LOCAL_FILES_SERVING_ENABLED = True
    settings.LOCAL_FILES_DOCUMENT_ROOT = str(tmp_path)
 
    project = Project.objects.get(pk=project_id)
    dataset_dir, relative_path, _ = _create_dataset_file(
        settings=settings,
        tmp_path=tmp_path,
        filename='etag-304.txt',
        content='etag-304-content',
    )
    _create_storage(project, str(dataset_dir))
 
    url = reverse('storages:localfiles_data') + f'?d={relative_path}'
    first_response = business_client.get(url)
    etag = first_response['ETag']
 
    cached_response = business_client.get(url, HTTP_IF_NONE_MATCH=etag)
 
    assert cached_response.status_code == 304
    assert cached_response['ETag'] == etag