import pytest
|
|
from label_studio.tests.sdk.common import LABEL_CONFIG_AND_TASKS
|
|
pytestmark = pytest.mark.django_db
|
from label_studio_sdk.client import LabelStudio
|
|
|
@pytest.mark.parametrize('recursive_scan', [True, False])
|
def test_connect_and_sync_s3(django_live_url, business_client, recursive_scan):
|
"""Test S3 storage connection and sync with recursive scan parameter.
|
|
This test validates step by step:
|
- Creating a project with labeling configuration
|
- Setting up S3 import storage with recursive_scan parameter
|
- Verifying storage configuration and updates
|
- Triggering sync operation
|
- Validating task creation based on recursive_scan setting
|
|
Critical validation: When recursive_scan=False, only files in the root
|
directory should be imported (image1.jpg), while recursive_scan=True
|
imports all files including subdirectories.
|
"""
|
ls = LabelStudio(base_url=django_live_url, api_key=business_client.api_key)
|
p = ls.projects.create(title='New Project', label_config=LABEL_CONFIG_AND_TASKS['label_config'])
|
|
storage_resp = ls.import_storage.s3.create(
|
project=p.id, bucket='pytest-s3-images', regex_filter='.*', use_blob_urls=False, recursive_scan=recursive_scan
|
)
|
|
storage_id = storage_resp.id
|
|
storage = ls.import_storage.s3.get(id=storage_id)
|
assert storage.project == p.id
|
assert storage.bucket == 'pytest-s3-images'
|
assert storage.use_blob_urls is False
|
assert storage.recursive_scan == recursive_scan
|
|
ls.import_storage.s3.update(id=storage_id, use_blob_urls=True)
|
storage = ls.import_storage.s3.get(id=storage_id)
|
assert storage.use_blob_urls is True
|
|
resp = ls.import_storage.s3.sync(id=storage_id)
|
assert resp.status in ('initialized', 'queued', 'completed')
|
|
tasks = []
|
for task in ls.tasks.list(project=p.id):
|
tasks.append(task)
|
|
# Expected results based on recursive_scan parameter
|
if recursive_scan:
|
# Recursive scan should find all files including subdirectories
|
expected_files = {
|
'subdir/another/image2.jpg',
|
'subdir/image1.jpg',
|
'subdir/image2.jpg',
|
'image1.jpg',
|
}
|
else:
|
# Non-recursive scan should only find files in root directory
|
expected_files = {'image1.jpg'}
|
|
assert set(t.storage_filename for t in tasks) == expected_files
|