"""Grid associated to site-based data.
This Grid type actually represents the absence of a grid. It is meant to describe a
collection of unrelated sites or locations. Spatial reduction operations are only defined
for both axes (axis = 'both'). Coordinates are longitude and latitude.
"""
import copy
import warnings
import pandas as pd
from canopy.core.grid.grid_abc import Grid
from canopy.core.grid.grid_empty import GridEmpty
from canopy.core.grid.spatial_axis import SpatialAxis, LonAxis, LatAxis
from canopy.core.grid.registry import register_grid, register_gridop, check_gridop
from typing_extensions import Self
grid_type = 'sites'
def _check_labels_unique(df: pd.DataFrame) -> bool:
labels = df.index.get_level_values('label')
lons = df.index.get_level_values('lon')
lats = df.index.get_level_values('lat')
index_unique = pd.MultiIndex.from_tuples([(label, lon, lat) for label, lon, lat in zip(labels, lons, lats)]).unique()
index_unique.names = ['label', 'lon', 'lat']
if len(index_unique) != len(index_unique.droplevel('label').unique()) \
or len(index_unique) != len(index_unique.get_level_values('label').unique()):
return False
return True
[docs]
@register_grid
class GridSites(Grid):
_grid_type: str = grid_type
_xaxis_key: str = 'lon'
_yaxis_key: str = 'lat'
_xaxis: SpatialAxis = LonAxis
_yaxis: SpatialAxis = LatAxis
def __init__(self, gridop: str | None = None):
super().__init__(xaxis_gridop = gridop, yaxis_gridop = gridop)
[docs]
def reduce(self, gridop: str, axis_key: str) -> Grid:
check_gridop(grid_type, gridop, axis_key)
return GridSites(gridop=gridop)
[docs]
def crop(self, df: pd.DataFrame) -> Grid:
# This grid type contains no information about the coordinates of the sites,
# so I can just return a copy. Other checks (for example on the index level names)
# are performed elsewhere (e.g. Field constructor)
if df.empty:
return GridEmpty()
else:
return copy.deepcopy(self)
[docs]
def is_compatible(self, other: Grid) -> bool:
return isinstance(other, GridSites) or isinstance(other, GridEmpty)
def __add__(self, other: Grid) -> Grid:
if not self.is_compatible(other):
raise ValueError("Grids are not compatible.")
return GridSites()
[docs]
@classmethod
def from_frame(cls, df: pd.DataFrame,
gridop: None | str = None) -> 'GridSites':
if gridop is not None:
if ('lon' in df.index.names or 'lat' in df.index.names):
raise ValueError("A gridop was specified, but the supplied DataFrame has a 'lon' and/or a 'lat' axis.")
else:
if not 'lon' in df.index.names or not 'lat' in df.index.names:
raise ValueError("No 'lon' or 'lat' levels found in the supplied DataFrame index, and no gridop was specified.")
if 'label' in df.index.names and not _check_labels_unique(df):
raise ValueError("There is no one-to-one correspondence between labels and coordinate pairs.")
return GridSites(gridop=gridop)
[docs]
def validate_frame(self, df: pd.DataFrame) -> bool:
grid_suits_frame = True
# This should always be the case
assert self.gridops['lon'] == self.gridops['lat']
gridop = self.gridops['lon']
if gridop is not None:
if 'lon' in df.index.names or 'lat' in df.index.names:
warnings.warn("The grid is reduced, but the dataframe's index has levels 'lon' or 'lat'.")
grid_suits_frame = False
else:
if 'lon' not in df.index.names or 'lat' not in df.index.names:
warnings.warn("The grid is not reduced, but no 'lon' and/or 'lat' level(s) were found in the dataframe's index.")
grid_suits_frame = False
if 'label' in df.index.names:
if not(_check_labels_unique(df)):
warnings.warn("There is no one-to-one correspondence between labels and coordinate pairs.")
grid_suits_frame = False
return grid_suits_frame
# ---------------
# GRID OPERATIONS
# ---------------
[docs]
@register_gridop(grid_type, 'av', 'both')
def av_both(df: pd.DataFrame, grid: Grid) -> pd.DataFrame:
"""Spatially average the data.
On this 'grid', all sites count the same for the average.
Parameters
----------
df : pd.DataFrame
The pandas DataFrame whose data is to be averaged.
grid : GridSites
A GridSites object.
Returns
-------
A reduced pandas DataFrame
"""
group_levels = ['time']
df_red = df.groupby(group_levels).mean()
return df_red
[docs]
@register_gridop(grid_type, 'sum', 'both')
def sum_both(df: pd.DataFrame, grid: Grid) -> pd.DataFrame:
"""Spatially aggregate the data.
On this 'grid', all sites count the same for the sum.
Parameters
----------
df : pd.DataFrame
The pandas DataFrame whose data is to be aggregated.
grid : GridSites
A GridSites object.
Returns
-------
A reduced pandas DataFrame
"""
group_levels = ['time']
df_red = df.groupby(group_levels).sum()
return df_red