"""Difference map visualization."""
from typing import Optional, List, Any
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import t as t_distribution
import canopy as cp
from canopy.visualization.multiple_figs import create_wrapper_from_locals
from canopy.visualization.visualization_helpers import handle_figure_output
from .map_helpers import (
get_metadata,
safe_make_raster,
calculate_bins,
plot_map,
)
[docs]
def make_diff_map(
field_a: cp.Field,
field_b: cp.Field,
layer: str,
output_file: Optional[str] = None,
timeop: Optional[str] = 'av',
cb_label: Optional[str] = None,
title: Optional[str] = None,
unit: Optional[str] = None,
n_classes: Optional[int] = 4,
classification: List[float] | str = "linear",
palette: Optional[str] = None,
custom_palette: Optional[str] = None,
orientation: Optional[str] = 'horizontal',
cb_label_rotation: float = 0,
extend: Optional[str] = "neither",
proj: Optional[str] = "Robinson",
force_zero: Optional[bool] = False,
dark_mode: Optional[bool] = False,
transparent: Optional[bool] = False,
stats_annotation: Optional[bool] = False,
x_fig: Optional[float] = 10,
y_fig: Optional[float] = 10,
percentage: Optional[bool] = False,
nonsig: Optional[bool] = False,
subfig=None,
return_fig: bool = False,
) -> Optional[plt.Figure]:
"""
Create a difference map from two fields (field_b - field_a). Same arguments as make_simple_map except the ones below.
Parameters
----------
field_a, field_b : cp.Field
First and second Field objects for computing the difference (field_b - field_a).
percentage : bool, optional
If True, compute proportional difference in %. Default is False.
nonsig : bool, optional
If True, overlay hatched areas to indicate regions where
differences between the two fields are not statistically
significant (p-value >= 0.05). These are areas where the
observed differences could be explained by interannual
variability rather than representing a true difference
between the two climate runs. Default is False.
"""
# If return_fig is True, create a wrapper function and return it
if return_fig:
return create_wrapper_from_locals(make_diff_map, locals())
for name, f in [("field_a", field_a), ("field_b", field_b)]:
if f.grid.is_reduced("lat") and f.grid.is_reduced("lon"):
raise ValueError(
f"{name} has reduced latitude and longitude. Map requires unreduced spatial dimensions."
)
# Retrieve metadata
cb_label, unit = get_metadata(field_a, cb_label, unit)
# If classification is provided as a list, update n_classes to match its length minus one
if isinstance(classification, list) and len(classification)-1 != n_classes:
n_classes = len(classification) - 1
# Apply time reduction and create rasters
raster_a = safe_make_raster(field_a, layer, timeop=timeop)
raster_b = safe_make_raster(field_b, layer, timeop=timeop)
# Compute the difference
if percentage:
with np.errstate(divide='ignore', invalid='ignore'):
diff = np.where(raster_a.vmap != 0, (raster_b.vmap - raster_a.vmap) / raster_a.vmap * 100, 0)
else:
diff = raster_b.vmap - raster_a.vmap
# Compute hatched areas
nonsig_mask = None
if nonsig:
nonsig_mask = _calculate_significance_mask(field_a, field_b, layer, raster_a)
# Calculate bins for the difference map
bins = calculate_bins(np.abs(diff), n_classes, classification, force_zero, diff_map=True)
# Plot the map
fig = plot_map(raster_a.x, raster_a.y, diff, False, False, output_file, cb_label, title, unit,
n_classes, bins, palette, custom_palette, orientation, cb_label_rotation, extend, proj,
dark_mode, stats_annotation, x_fig, y_fig, nonsig_mask, hist=True,
subfig=subfig)
return handle_figure_output(fig, output_file=output_file, transparent=transparent, subfig=subfig)
def _calculate_significance_mask(
field_a: cp.Field,
field_b: cp.Field,
layer: str,
raster_a: Any,
) -> np.ndarray:
"""
Compute a mask indicating areas where differences between two fields
are not statistically significant (p-value >= 0.05).
"""
# Extract and align time series
df_a = field_a.data[layer].unstack(level='time')
df_b = field_b.data[layer].unstack(level='time')
df_a_aligned, df_b_aligned = df_a.align(df_b, join='inner')
# Calculate statistics
mean_a, mean_b = df_a_aligned.mean(axis=1), df_b_aligned.mean(axis=1)
var_a, var_b = df_a_aligned.var(axis=1, ddof=1), df_b_aligned.var(axis=1, ddof=1)
# Calculate t-statistics and p-values
s_p = np.sqrt((var_a + var_b)/2)
t_stat = (mean_a - mean_b) / s_p
p_values = 2 * (1 - t_distribution.cdf(np.abs(t_stat), df=2*len(cp.make_lines(field_a).index)-2))
# Convert to grid format
p_grid = pd.Series(p_values, index=mean_a.index).reset_index().pivot(index='lat', columns='lon', values=0).reindex(
index=raster_a.y, columns=raster_a.x).values
# Create significance mask (p < 0.05 is significant, mask shows non-significant areas)
return p_grid >= 0.05