import fnmatch import logging import re import types from azure.storage.blob import BlobServiceClient from core.utils.params import get_env from django.conf import settings from io_storages.utils import parse_range logger = logging.getLogger(__name__) class AZURE(object): @staticmethod def download_stream_response(blob_client, total_size, content_type, range_header, properties, max_range_size=None): """Prepare Azure blob streaming response with unified range handling. Shared Azure Blob streaming helper used by both OSS Azure Blob and Enterprise Azure SPI providers. Responsibilities: - Parse and normalize HTTP Range requests (including special probes) - Configure Azure SDK streaming parameters - Generate a downloader with a unified ``iter_chunks`` API - Build response metadata (Content-Range, Content-Length, ETag, Last-Modified) Args: blob_client: Azure Blob SDK client for the target blob. total_size (int): Size of the blob in bytes. content_type (str|None): Blob content type. range_header (str|None): Incoming HTTP Range header, e.g. 'bytes=0-'. properties: Blob properties (for ETag/Last-Modified extraction). max_range_size (int|None): Optional override for initial open-ended range size. Returns: tuple: (downloader, resolved_content_type, metadata) """ resolved_content_type = content_type or 'application/octet-stream' streaming = True start, end = parse_range(range_header) if start is None and end is None: streaming = False start, end = 0, total_size elif start == 0 and end == 0: start, end = 0, 1 elif start == 0 and (end == '' or end is None): mr = max_range_size if max_range_size is not None else settings.RESOLVER_PROXY_MAX_RANGE_SIZE end = start + mr if start is None: start = 0 try: blob_client._config.max_single_get_size = 1024 # 1KB except Exception: pass if end is not None and end != '': length = end - start else: length = None if streaming: downloader = blob_client.download_blob(offset=start, length=length) else: length = total_size downloader = blob_client.download_blob() def _iter_chunks(self_downloader, chunk_size=1024 * 1024): try: self_downloader._config.max_chunk_get_size = chunk_size except Exception: pass total = 0 for chunk in self_downloader.chunks(): yield chunk total += len(chunk) if length is not None and total >= length: return downloader.iter_chunks = types.MethodType(_iter_chunks, downloader) downloader.close = types.MethodType(lambda self: None, downloader) if streaming and length is not None: actual_length = min(length, max(0, total_size - start)) content_length = actual_length else: content_length = length if length is not None else max(0, total_size - start) if length is not None: actual_end = min(start + length - 1, max(0, total_size - 1)) else: actual_end = max(0, total_size - 1) status_code = 206 if streaming else 200 metadata = { 'ETag': getattr(properties, 'etag', ''), 'ContentLength': content_length, 'ContentRange': f'bytes {start}-{actual_end}/{total_size or 0}', 'LastModified': getattr(properties, 'last_modified', None), 'StatusCode': status_code, } return downloader, resolved_content_type, metadata @classmethod def get_client_and_container(cls, container, account_name=None, account_key=None): # get account name and key from params or from environment variables account_name = str(account_name) if account_name else get_env('AZURE_BLOB_ACCOUNT_NAME') account_key = str(account_key) if account_key else get_env('AZURE_BLOB_ACCOUNT_KEY') # check that both account name and key are set if not account_name or not account_key: raise ValueError( 'Azure account name and key must be set using ' 'environment variables AZURE_BLOB_ACCOUNT_NAME and AZURE_BLOB_ACCOUNT_KEY' ) connection_string = ( 'DefaultEndpointsProtocol=https;AccountName=' + account_name + ';AccountKey=' + account_key + ';EndpointSuffix=core.windows.net' ) client = BlobServiceClient.from_connection_string(conn_str=connection_string) container = client.get_container_client(str(container)) return client, container @classmethod def get_blob_metadata(cls, url: str, container: str, account_name: str = None, account_key: str = None) -> dict: """ Get blob metadata by url :param url: Object key :param container: Azure container name :param account_name: Azure account name :param account_key: Azure account key :return: Object metadata dict("name": "value") """ _, container = cls.get_client_and_container(container, account_name=account_name, account_key=account_key) blob = container.get_blob_client(url) return dict(blob.get_blob_properties()) @classmethod def validate_pattern(cls, storage, pattern, glob_pattern=True): """ Validate pattern against Azure Blob Storage :param storage: AzureBlobStorage instance :param pattern: Pattern to validate :param glob_pattern: If True, pattern is a glob pattern, otherwise it is a regex pattern :return: Message if pattern is not valid, empty string otherwise """ logger.debug('Validating Azure Blob Storage pattern.') client, container = storage.get_client_and_container() if storage.prefix: generator = container.list_blob_names( name_starts_with=storage.prefix, results_per_page=settings.CLOUD_STORAGE_CHECK_FOR_RECORDS_PAGE_SIZE, timeout=settings.CLOUD_STORAGE_CHECK_FOR_RECORDS_TIMEOUT, ) else: generator = container.list_blob_names( results_per_page=settings.CLOUD_STORAGE_CHECK_FOR_RECORDS_PAGE_SIZE, timeout=settings.CLOUD_STORAGE_CHECK_FOR_RECORDS_TIMEOUT, ) # compile pattern to regex if glob_pattern: pattern = fnmatch.translate(pattern) regex = re.compile(str(pattern)) # match pattern against all keys in the container for index, key in enumerate(generator): # skip directories if key.endswith('/'): logger.debug(key + ' is skipped because it is a folder') continue if regex and regex.match(key): logger.debug(key + ' matches file pattern') return '' return 'No objects found matching the provided glob pattern'