Source code for zcollection.data.variable
# Copyright (c) 2022-2026 CNES.
#
# All rights reserved. Use of this source code is governed by a
# BSD-style license that can be found in the LICENSE file.
"""Single, polymorphic Variable bound to a ``VariableSchema``.
A :class:`Variable` is an array plus its schema. The array can be a
:class:`numpy.ndarray` (eager) or any non-numpy array-like —
``dask.array.Array``, a Zarr ``AsyncArray`` proxy, or a custom lazy
backend. The :attr:`Variable.is_lazy` flag is true iff the underlying
array isn't a plain :class:`numpy.ndarray`.
"""
from typing import TYPE_CHECKING, Any
import math
import numpy
if TYPE_CHECKING:
from ..schema import VariableSchema
[docs]
class Variable:
"""A named array bound to a :class:`VariableSchema`.
``data`` may be a :class:`numpy.ndarray` (eager), any object exposing
a ``compute()`` method (dask-style lazy arrays), an arbitrary
array-like (anything :func:`numpy.asarray` accepts), or ``None``
(declared but not yet populated). :attr:`is_lazy` is true iff the
array isn't a plain :class:`numpy.ndarray`.
On construction the data is validated against the schema's number
of dimensions; dtype mismatches are *not* enforced (upcasts are
accepted silently).
Args:
schema: Variable schema describing dtype, dims and metadata.
data: Underlying array, or ``None`` for a placeholder variable.
Raises:
ValueError: If ``data`` exposes an ``ndim`` attribute that
disagrees with ``schema.ndim``.
"""
__slots__ = ("_data", "schema")
def __init__(self, schema: VariableSchema, data: Any) -> None:
"""Initialize a Variable."""
#: The variable schema describing dtype, dims and metadata.
self.schema = schema
# Underlying array (numpy / dask / array-like / None).
self._data = data
# Validate the data's ndim against the schema.
self._validate()
def _validate(self) -> None:
"""Reject data whose dim count doesn't match :attr:`schema`."""
if self._data is None:
return
ndim = getattr(self._data, "ndim", None)
if ndim is not None and ndim != self.schema.ndim:
raise ValueError(
f"variable {self.schema.name!r}: data has {ndim} dims, "
f"schema declares {self.schema.ndim}"
)
# dtype check: allow upcasting silently for now; strict mode optional.
return None
# Public, frozen-ish accessors -----------------------------------
@property
def name(self) -> str:
"""Return the variable name."""
return self.schema.name
@property
def dimensions(self) -> tuple[str, ...]:
"""Return the dimension names."""
return self.schema.dimensions
@property
def dtype(self) -> numpy.dtype:
"""Return the numpy dtype."""
return self.schema.dtype
@property
def shape(self) -> tuple[int, ...]:
"""Return the shape of the underlying data.
Returns ``()`` when :attr:`data` is ``None`` or has no ``shape``
attribute (the empty tuple makes the variable look like a 0-D
scalar to size-aware consumers).
"""
return tuple(getattr(self._data, "shape", ()))
@property
def ndim(self) -> int:
"""Return the number of dimensions."""
return self.schema.ndim
@property
def fill_value(self) -> Any:
"""Return the schema fill value."""
return self.schema.fill_value
@property
def attrs(self) -> dict[str, Any]:
"""Return a fresh copy of the schema attributes.
The returned dict is detached: mutating it does not affect the
underlying schema.
"""
return dict(self.schema.attrs)
@property
def is_lazy(self) -> bool:
"""Return whether the underlying data isn't a plain ``numpy.ndarray``.
True for dask arrays, Zarr ``AsyncArray`` proxies, and anything
else that isn't a concrete in-memory numpy buffer; false when
the data is already materialised. ``data is None`` also returns
true (the placeholder is treated as not-yet-eager).
"""
return not isinstance(self._data, numpy.ndarray)
@property
def data(self) -> Any:
"""Return the underlying array as-is (no materialisation)."""
return self._data
@property
def nbytes(self) -> int:
"""Return the uncompressed byte size of the underlying data.
Computed as ``prod(shape) * dtype.itemsize`` — the same
convention as :attr:`numpy.ndarray.nbytes`. Ignores any
compression or sharding the variable might carry on disk.
Returns ``0`` for placeholder variables (``data is None``).
"""
return (
0
if self._data is None
else math.prod(self.shape) * self.dtype.itemsize
)
[docs]
def to_numpy(self) -> numpy.ndarray:
"""Materialise the data as a numpy array.
Dispatches in three cases:
- already a :class:`numpy.ndarray` → returned as-is (no copy).
- has a ``compute()`` method (dask-style lazy arrays) → call it
and return the materialised result.
- otherwise → :func:`numpy.asarray` on the data.
Calling this on a Variable with ``data is None`` produces
``numpy.array(None, dtype=object)``, which is rarely useful;
guard against that case at the caller.
"""
d = self._data
if isinstance(d, numpy.ndarray):
return d
if hasattr(d, "compute"):
return d.compute()
return numpy.asarray(d)
def __repr__(self) -> str:
"""Return a multi-line, xarray-like representation of the variable."""
from ._repr import variable_repr
return variable_repr(self)