Source code for canopy.visualization.map.diff_map

"""Difference map visualization."""

from typing import Optional, List, Any

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import t as t_distribution

import canopy as cp
from canopy.visualization.multiple_figs import create_wrapper_from_locals
from canopy.visualization.visualization_helpers import handle_figure_output

from .map_helpers import (
    get_metadata,
    safe_make_raster,
    calculate_bins,
    plot_map,
)

[docs] def make_diff_map( field_a: cp.Field, field_b: cp.Field, layer: str, output_file: Optional[str] = None, timeop: Optional[str] = 'av', cb_label: Optional[str] = None, title: Optional[str] = None, unit: Optional[str] = None, n_classes: Optional[int] = 4, classification: List[float] | str = "linear", palette: Optional[str] = None, custom_palette: Optional[str] = None, orientation: Optional[str] = 'horizontal', cb_label_rotation: float = 0, extend: Optional[str] = "neither", proj: Optional[str] = "Robinson", force_zero: Optional[bool] = False, dark_mode: Optional[bool] = False, transparent: Optional[bool] = False, stats_annotation: Optional[bool] = False, x_fig: Optional[float] = 10, y_fig: Optional[float] = 10, percentage: Optional[bool] = False, nonsig: Optional[bool] = False, subfig=None, return_fig: bool = False, ) -> Optional[plt.Figure]: """ Create a difference map from two fields (field_b - field_a). Same arguments as make_simple_map except the ones below. Parameters ---------- field_a, field_b : cp.Field First and second Field objects for computing the difference (field_b - field_a). percentage : bool, optional If True, compute proportional difference in %. Default is False. nonsig : bool, optional If True, overlay hatched areas to indicate regions where differences between the two fields are not statistically significant (p-value >= 0.05). These are areas where the observed differences could be explained by interannual variability rather than representing a true difference between the two climate runs. Default is False. """ # If return_fig is True, create a wrapper function and return it if return_fig: return create_wrapper_from_locals(make_diff_map, locals()) for name, f in [("field_a", field_a), ("field_b", field_b)]: if f.grid.is_reduced("lat") and f.grid.is_reduced("lon"): raise ValueError( f"{name} has reduced latitude and longitude. Map requires unreduced spatial dimensions." ) # Retrieve metadata cb_label, unit = get_metadata(field_a, cb_label, unit) # If classification is provided as a list, update n_classes to match its length minus one if isinstance(classification, list) and len(classification)-1 != n_classes: n_classes = len(classification) - 1 # Apply time reduction and create rasters raster_a = safe_make_raster(field_a, layer, timeop=timeop) raster_b = safe_make_raster(field_b, layer, timeop=timeop) # Compute the difference if percentage: with np.errstate(divide='ignore', invalid='ignore'): diff = np.where(raster_a.vmap != 0, (raster_b.vmap - raster_a.vmap) / raster_a.vmap * 100, 0) else: diff = raster_b.vmap - raster_a.vmap # Compute hatched areas nonsig_mask = None if nonsig: nonsig_mask = _calculate_significance_mask(field_a, field_b, layer, raster_a) # Calculate bins for the difference map bins = calculate_bins(np.abs(diff), n_classes, classification, force_zero, diff_map=True) # Plot the map fig = plot_map(raster_a.x, raster_a.y, diff, False, False, output_file, cb_label, title, unit, n_classes, bins, palette, custom_palette, orientation, cb_label_rotation, extend, proj, dark_mode, stats_annotation, x_fig, y_fig, nonsig_mask, hist=True, subfig=subfig) return handle_figure_output(fig, output_file=output_file, transparent=transparent, subfig=subfig)
def _calculate_significance_mask( field_a: cp.Field, field_b: cp.Field, layer: str, raster_a: Any, ) -> np.ndarray: """ Compute a mask indicating areas where differences between two fields are not statistically significant (p-value >= 0.05). """ # Extract and align time series df_a = field_a.data[layer].unstack(level='time') df_b = field_b.data[layer].unstack(level='time') df_a_aligned, df_b_aligned = df_a.align(df_b, join='inner') # Calculate statistics mean_a, mean_b = df_a_aligned.mean(axis=1), df_b_aligned.mean(axis=1) var_a, var_b = df_a_aligned.var(axis=1, ddof=1), df_b_aligned.var(axis=1, ddof=1) # Calculate t-statistics and p-values s_p = np.sqrt((var_a + var_b)/2) t_stat = (mean_a - mean_b) / s_p p_values = 2 * (1 - t_distribution.cdf(np.abs(t_stat), df=2*len(cp.make_lines(field_a).index)-2)) # Convert to grid format p_grid = pd.Series(p_values, index=mean_a.index).reset_index().pivot(index='lat', columns='lon', values=0).reindex( index=raster_a.y, columns=raster_a.x).values # Create significance mask (p < 0.05 is significant, mask shows non-significant areas) return p_grid >= 0.05