Source code for canopy.visualization.map.diff_map

"""Difference map visualization."""

from typing import Optional, List, Any

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import t as t_distribution

import canopy as cp
from canopy.visualization.multiple_figs import create_wrapper_from_locals
from canopy.visualization.visualization_helpers import handle_figure_output

from .map_helpers import (
    get_metadata,
    safe_make_raster,
    calculate_bins,
    plot_map,
)


[docs]
def make_diff_map(
    field_a: cp.Field,
    field_b: cp.Field,
    layer: str,
    output_file: Optional[str] = None,
    timeop: Optional[str] = 'av',
    cb_label: Optional[str] = None,
    title: Optional[str] = None,
    unit: Optional[str] = None,
    n_classes: Optional[int] = 4,
    classification: List[float] | str = "linear",
    palette: Optional[str] = None,
    custom_palette: Optional[str] = None,
    orientation: Optional[str] = 'horizontal',
    cb_label_rotation: float = 0,
    extend: Optional[str] = "neither",
    proj: Optional[str] = "Robinson",
    force_zero: Optional[bool] = False,
    dark_mode: Optional[bool] = False,
    transparent: Optional[bool] = False,
    stats_annotation: Optional[bool] = False,
    x_fig: Optional[float] = 10,
    y_fig: Optional[float] = 10,
    percentage: Optional[bool] = False,
    nonsig: Optional[bool] = False,
    subfig=None,
    return_fig: bool = False,
) -> Optional[plt.Figure]:
    """
    Create a difference map from two fields (field_b - field_a). Same arguments as make_simple_map except the ones below.

    Parameters
    ----------
    field_a, field_b : cp.Field
        First and second Field objects for computing the difference (field_b - field_a).
    percentage : bool, optional
        If True, compute proportional difference in %. Default is False.
    nonsig : bool, optional
        If True, overlay hatched areas to indicate regions where 
        differences between the two fields are not statistically 
        significant (p-value >= 0.05). These are areas where the 
        observed differences could be explained by interannual 
        variability rather than representing a true difference 
        between the two climate runs. Default is False.
    """
    # If return_fig is True, create a wrapper function and return it
    if return_fig:
        return create_wrapper_from_locals(make_diff_map, locals())

    for name, f in [("field_a", field_a), ("field_b", field_b)]:
        if f.grid.is_reduced("lat") and f.grid.is_reduced("lon"):
            raise ValueError(
                f"{name} has reduced latitude and longitude. Map requires unreduced spatial dimensions."
            )

    # Retrieve metadata
    cb_label, unit = get_metadata(field_a, cb_label, unit)

    # If classification is provided as a list, update n_classes to match its length minus one
    if isinstance(classification, list) and len(classification)-1 != n_classes:
        n_classes = len(classification) - 1

    # Apply time reduction and create rasters
    raster_a = safe_make_raster(field_a, layer, timeop=timeop)
    raster_b = safe_make_raster(field_b, layer, timeop=timeop)

    # Compute the difference
    if percentage:
        with np.errstate(divide='ignore', invalid='ignore'):
            diff = np.where(raster_a.vmap != 0, (raster_b.vmap - raster_a.vmap) / raster_a.vmap * 100, 0)
    else:
        diff = raster_b.vmap - raster_a.vmap

    # Compute hatched areas
    nonsig_mask = None
    if nonsig:
        nonsig_mask = _calculate_significance_mask(field_a, field_b, layer, raster_a)

    # Calculate bins for the difference map
    bins = calculate_bins(np.abs(diff), n_classes, classification, force_zero, diff_map=True)

    # Plot the map
    fig = plot_map(raster_a.x, raster_a.y, diff, False, False, output_file, cb_label, title, unit,
                   n_classes, bins, palette, custom_palette, orientation, cb_label_rotation, extend, proj, 
                   dark_mode, stats_annotation, x_fig, y_fig, nonsig_mask, hist=True,      
                   subfig=subfig)
    
    return handle_figure_output(fig, output_file=output_file, transparent=transparent, subfig=subfig)



def _calculate_significance_mask(
    field_a: cp.Field,
    field_b: cp.Field,
    layer: str,
    raster_a: Any,
) -> np.ndarray:
    """
    Compute a mask indicating areas where differences between two fields
    are not statistically significant (p-value >= 0.05).
    """
    # Extract and align time series
    df_a = field_a.data[layer].unstack(level='time')
    df_b = field_b.data[layer].unstack(level='time')
    df_a_aligned, df_b_aligned = df_a.align(df_b, join='inner')

    # Calculate statistics
    mean_a, mean_b = df_a_aligned.mean(axis=1), df_b_aligned.mean(axis=1)
    var_a, var_b = df_a_aligned.var(axis=1, ddof=1), df_b_aligned.var(axis=1, ddof=1)

    # Calculate t-statistics and p-values
    s_p = np.sqrt((var_a + var_b)/2)
    t_stat = (mean_a - mean_b) / s_p
    p_values = 2 * (1 - t_distribution.cdf(np.abs(t_stat), df=2*len(cp.make_lines(field_a).index)-2))

    # Convert to grid format
    p_grid = pd.Series(p_values, index=mean_a.index).reset_index().pivot(index='lat', columns='lon', values=0).reindex(
        index=raster_a.y, columns=raster_a.x).values

    # Create significance mask (p < 0.05 is significant, mask shows non-significant areas)
    return p_grid >= 0.05