Source code for pygeodes.utils.s3

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""This module deals with all things related to Amazon S3"""
# -----------------------------------------------------------------------------
# Copyright (c) 2024, CNES
#
# REFERENCES:
# https://cnes.fr/
# -----------------------------------------------------------------------------

# stdlib imports -------------------------------------------------------
import warnings

# third-party imports -----------------------------------------------
import boto3
from botocore.client import Config as botocore_config

# local imports ---------------------------------------------------
from pygeodes.utils.config import Config
from pygeodes.utils.consts import DATALAKE_URL
from pygeodes.utils.stac import Item
from pygeodes.utils.io import check_if_folder_already_contains_file
from pygeodes.utils.logger import logger


[docs]def create_boto3_client(conf: Config): conf.check_s3_config() parameters = conf.s3_parameters parameters["endpoint_url"] = DATALAKE_URL parameters["config"] = botocore_config(signature_version="s3v4") client = boto3.client("s3", **parameters) return client
[docs]def get_bucket_and_key_from_url(url: str): url = url.replace(DATALAKE_URL, "") split = url.split("/") bucket = split[0] key = "/".join(split[1:]) return bucket, key
[docs]def download_item(client, item: Item, outfile: str): name_for_same_file = check_if_folder_already_contains_file( outfile, item.data_asset.checksum ) if name_for_same_file is not None: warnings.warn( f"trying to download content at {outfile} but file with same content already exists in the same folder at {name_for_same_file}, skipping download" ) return name_for_same_file url = item.find("accessService:endpointURL") bucket, key = get_bucket_and_key_from_url(url) logger.debug(f"using {bucket=} and {key=}") client.download_file(Bucket=bucket, Key=key, Filename=outfile) print(f"Download from s3 completed at {outfile}") return outfile