Source code for pygeodes.utils.formatting
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""This module deals with all things related to formatting items or collections"""
# -----------------------------------------------------------------------------
# Copyright (c) 2024, CNES
#
# REFERENCES:
# https://cnes.fr/
# -----------------------------------------------------------------------------
# stdlib imports -------------------------------------------------------
from typing import List, Union
import warnings
import json
# third-party imports -----------------------------------------------
import geopandas as pd
from shapely.geometry import shape
# local imports ---------------------------------------------------
from pygeodes.utils.stac import Collection, Item
from pygeodes.utils.consts import (
COLUMNS_TO_KEEP_FORMATTING_COLLECTIONS,
COLUMNS_TO_KEEP_FORMATTING_ITEMS,
GEOPANDAS_DEFAULT_EPSG,
)
from pygeodes.utils.io import write_json
[docs]def get_from_dico_path(
path: str, dico: dict
): # maybe this function could be removed and replaced by a system using jsonpath-ng : https://pypi.org/project/jsonpath-ng/
"""This functions gets a value from a dictionnary using a system of dotted path
Parameters
----------
path : str
the dictionnary path
dico : dict
the dictionnary
Examples
--------
.. code-block:: python
dico = {"properties" : {"property_one" : 4}}
value = get_from_dico_path("properties.property_one",dico) # returns 4
Returns
-------
Any
the value at path, or None if not found
"""
if (from_dico := dico.get(path)) is not None:
return from_dico
components = path.split(".")
current_obj = dico
for component in components:
if current_obj is None:
return None
current_obj = current_obj.get(component)
return current_obj
[docs]def format_collections(
collections: Union[List[Collection], pd.GeoDataFrame],
columns_to_add: Union[set, List] = None,
) -> pd.GeoDataFrame:
"""This functions format a list of collections, or a ``GeoDataFrame`` by adding columns by name
Parameters
----------
collections : Union[List[Collection], pd.GeoDataFrame]
the list of collections to transform into a dataframe or ``GeoDataFrame`` to add columns to
columns_to_add : Union[set, List], optional
the columns to add, by default None
Returns
-------
pd.GeoDataFrame
the new dataframe
"""
columns_to_keep = COLUMNS_TO_KEEP_FORMATTING_COLLECTIONS # par défaut
if len(collections) == 0:
return None
if columns_to_add is not None:
if len(columns_to_add) > 0:
columns_to_add = set(columns_to_add)
columns_to_keep = columns_to_keep.union(columns_to_add)
if type(collections) is list:
df = pd.GeoDataFrame() # we turn a list of collections into a dataframe
else:
df = collections # we add columns to an already made dataframe
collections = df["collection"].values
for column_to_keep in columns_to_keep:
if column_to_keep == "dataType":
column_to_keep = "id" # because dataType is equivalent to id but dataType doesn't exist in json
values = [col.find(column_to_keep) for col in collections]
if all([value is None for value in values]):
warnings.warn(
f"Not adding column {column_to_keep} as no values were found, please be sure it's an existing column"
)
else:
df[column_to_keep] = values
df["collection"] = collections
return df
[docs]def format_items(
items: Union[List[Item], pd.GeoDataFrame],
columns_to_add: Union[set, List] = None,
) -> pd.GeoDataFrame:
"""This functions format a list of items, or a ``GeoDataFrame`` by adding columns by name
Parameters
----------
items : Union[List[Item], pd.GeoDataFrame]
the list of items to transform into a dataframe or ``GeoDataFrame`` to add columns to
columns_to_add : Union[set, List], optional
the columns to add, by default None
Returns
-------
pd.GeoDataFrame
the new dataframe
"""
columns_to_keep = COLUMNS_TO_KEEP_FORMATTING_ITEMS # par défaut
if len(items) == 0:
return None
if columns_to_add is not None:
if len(columns_to_add) > 0:
columns_to_add = set(columns_to_add)
columns_to_keep = columns_to_keep.union(columns_to_add)
if type(items) is list:
df = pd.GeoDataFrame()
else:
df = items
items = df["item"].values
for column_to_keep in columns_to_keep:
values = [item.find(column_to_keep) for item in items]
if all([value is None for value in values]):
warnings.warn(
f"Not adding column {column_to_keep} as no values were found, please be sure it's an existing column"
)
else:
df[column_to_keep] = values
df["item"] = items
df.set_geometry([shape(item.geometry) for item in items], inplace=True)
df.set_crs(epsg=GEOPANDAS_DEFAULT_EPSG, inplace=True) # to use explore
return df
[docs]def export_dataframe(dataframe: pd.GeoDataFrame, outfile: str) -> None:
"""This functions exports a ``pd.GeoDataFrame`` in a file
Parameters
----------
dataframe : pd.GeoDataFrame
the dataframe
outfile : str
the filepath
See Also
--------
load_dataframe : to load a dataframe from a file
"""
write_json(json.loads(dataframe.to_json()), outfile)
[docs]def load_dataframe(filepath: str):
"""This function loads a ``pd.GeoDataFrame`` from a file
Parameters
----------
filepath : str
the file
Returns
-------
pd.GeoDataFrame
the dataframe
See Also
--------
export_dataframe : to export a dataframe into a file
"""
from pygeodes.utils.stac import Item, Collection
df = pd.GeoDataFrame.from_file(filepath)
if "item" in df.columns:
df["item"] = [Item.from_dict(item) for item in df["item"].values]
elif "collection" in df.columns:
df["collection"] = [
Collection.from_dict(col) for col in df["collection"].values
]
return df