2024-05-09 15:14:01 +08:00

264 lines
9.7 KiB
Python

"""
Flexibly subset time(s) and/or vegetation type(s) from an xarray Dataset or DataArray.
"""
import re
import numpy as np
import xarray as xr
from ctsm.crop_calendars.cropcal_utils import vegtype_str2int, is_each_vegtype
def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=True, **kwargs):
"""
Flexibly subset time(s) and/or vegetation type(s) from an xarray Dataset or DataArray.
- Keyword arguments like dimension=selection.
- Selections can be individual values or slice()s.
- Optimize memory usage by beginning keyword argument list with the selections that will result
in the largest reduction of object size.
- Use dimension "vegtype" to extract patches of designated vegetation type (can be string or
integer).
- Can also do dimension=function---e.g., time=np.mean will take the mean over the time
dimension.
"""
# Setup
havewarned = False
delimiter = "__"
for key, selection in kwargs.items():
if callable(selection):
xr_object = handle_callable(xr_object, key, selection)
elif key == "vegtype":
xr_object = handle_vegtype(xr_object, patches1d_itype_veg, selection)
else:
# Parse selection type, if provided
if delimiter in key:
key, selection_type = key.split(delimiter)
# Check type of selection
else:
is_inefficient = False
if isinstance(selection, slice):
this_type = set_type_from_slice(selection)
elif isinstance(selection, np.ndarray):
selection, is_inefficient, this_type = set_type_from_ndarray(selection)
else:
this_type = type(selection)
warn_about_this_seltype_interp = warn_about_seltype_interp
if this_type == list and isinstance(selection[0], str):
selection_type = "values"
warn_about_this_seltype_interp = False
elif this_type == int:
selection_type = "indices"
else:
selection_type = "values"
if warn_about_this_seltype_interp:
do_warn_about_seltype_interp(
havewarned, delimiter, key, selection_type, is_inefficient, this_type
)
# Trim along relevant 1d axes
if isinstance(xr_object, xr.Dataset) and key in ["lat", "lon"]:
xr_object = trim_along_relevant_1d_axes(xr_object, selection, selection_type, key)
# Perform selection
xr_object = perform_selection(xr_object, key, selection, selection_type)
return xr_object
def perform_selection(xr_object, key, selection, selection_type):
"""
Perform selection
"""
if selection_type == "indices":
# Have to select like this instead of with index directly because otherwise assign_coords()
# will throw an error. Not sure why.
if isinstance(selection, int):
# Single integer? Turn it into a slice.
selection = slice(selection, selection + 1)
elif (
isinstance(selection, np.ndarray)
and not selection.dtype.kind in np.typecodes["AllInteger"]
):
selection = selection.astype(int)
xr_object = xr_object.isel({key: selection})
elif selection_type == "values":
xr_object = xr_object.sel({key: selection})
else:
raise TypeError(f"selection_type {selection_type} not recognized")
return xr_object
def trim_along_relevant_1d_axes(xr_object, selection, selection_type, key):
"""
Trim along relevant 1d axes
"""
if selection_type == "indices":
incl_coords = xr_object[key].values[selection]
elif selection_type == "values":
if isinstance(selection, slice):
incl_coords = xr_object.sel({key: selection}, drop=False)[key].values
else:
incl_coords = selection
else:
raise TypeError(f"selection_type {selection_type} not recognized")
if key == "lat":
this_xy = "jxy"
elif key == "lon":
this_xy = "ixy"
else:
raise KeyError(
f"Key '{key}' not recognized: What 1d_ suffix should I use for variable name?"
)
pattern = re.compile(f"1d_{this_xy}")
matches = [x for x in list(xr_object.keys()) if pattern.search(x) is not None]
for var in matches:
if len(xr_object[var].dims) != 1:
raise RuntimeError(
f"Expected {var} to have 1 dimension, but it has"
f" {len(xr_object[var].dims)}: {xr_object[var].dims}"
)
dim = xr_object[var].dims[0]
# print(f"Variable {var} has dimension {dim}")
coords = xr_object[key].values[xr_object[var].values.astype(int) - 1]
# print(f"{dim} size before: {xr_object.sizes[dim]}")
ok_ind = []
new_1d_this_xy = []
for i, member in enumerate(coords):
if member in incl_coords:
ok_ind = ok_ind + [i]
new_1d_this_xy = new_1d_this_xy + [(incl_coords == member).nonzero()[0] + 1]
xr_object = xr_object.isel({dim: ok_ind})
new_1d_this_xy = np.array(new_1d_this_xy).squeeze()
xr_object[var].values = new_1d_this_xy
# print(f"{dim} size after: {xr_object.sizes[dim]}")
return xr_object
def do_warn_about_seltype_interp(
havewarned, delimiter, key, selection_type, is_inefficient, this_type
):
"""
Suggest suppressing selection type interpretation warnings
"""
if not havewarned:
print(
"xr_flexsel(): Suppress all 'selection type interpretation' messages by specifying"
+ "warn_about_seltype_interp=False"
)
havewarned = True
if is_inefficient:
extra = " This will also improve efficiency for large selections."
else:
extra = ""
print(
f"xr_flexsel(): Selecting {key} as {selection_type} because selection was"
f" interpreted as {this_type}. If not correct, specify selection type"
" ('indices' or 'values') in keyword like"
f" '{key}{delimiter}SELECTIONTYPE=...' instead of '{key}=...'.{extra}"
)
def set_type_from_ndarray(selection):
"""
Sets selection type if given a Numpy array
"""
if selection.dtype.kind in np.typecodes["AllInteger"]:
this_type = int
else:
is_inefficient = True
this_type = None
for member in selection:
if member < 0 or member % 1 > 0:
if isinstance(member, int):
this_type = "values"
else:
this_type = type(member)
break
if this_type is None:
this_type = int
selection = selection.astype(int)
return selection, is_inefficient, this_type
def set_type_from_slice(selection):
"""
Sets selection type if given a slice
"""
slice_members = []
if selection == slice(0):
raise ValueError("slice(0) will be empty")
if selection.start is not None:
slice_members = slice_members + [selection.start]
if selection.stop is not None:
slice_members = slice_members + [selection.stop]
if selection.step is not None:
slice_members = slice_members + [selection.step]
if not slice_members:
raise TypeError("slice is all None?")
this_type = int
for member in slice_members:
if member < 0 or not isinstance(member, int):
this_type = "values"
break
return this_type
def handle_vegtype(xr_object, patches1d_itype_veg, selection):
"""
Handle selection "vegtype
"""
# Convert to list, if needed
if not isinstance(selection, list):
selection = [selection]
# Convert to indices, if needed
if isinstance(selection[0], str):
selection = vegtype_str2int(selection)
# Get list of boolean(s)
if isinstance(selection[0], int):
if isinstance(patches1d_itype_veg, type(None)):
patches1d_itype_veg = xr_object.patches1d_itype_veg.values
elif isinstance(patches1d_itype_veg, xr.core.dataarray.DataArray):
patches1d_itype_veg = patches1d_itype_veg.values
is_vegtype = is_each_vegtype(patches1d_itype_veg, selection, "ok_exact")
elif isinstance(selection[0], bool):
if len(selection) != len(xr_object.patch):
raise ValueError(
"If providing boolean 'vegtype' argument to xr_flexsel(), it must be the"
f" same length as xr_object.patch ({len(selection)} vs."
f" {len(xr_object.patch)})"
)
is_vegtype = selection
else:
raise TypeError(f"Not sure how to handle 'vegtype' of type {type(selection[0])}")
xr_object = xr_object.isel(patch=[i for i, x in enumerate(is_vegtype) if x])
if "ivt" in xr_object:
xr_object = xr_object.isel(ivt=is_each_vegtype(xr_object.ivt.values, selection, "ok_exact"))
return xr_object
def handle_callable(xr_object, key, selection):
"""
Handle selection that's a callable
"""
# It would have been really nice to do selection(xr_object, axis=key), but numpy methods and
# xarray methods disagree on "axis" vs. "dimension." So instead, just do this manually.
if selection == np.mean: # pylint: disable=comparison-with-callable
try:
xr_object = xr_object.mean(dim=key)
except: # pylint: disable=raise-missing-from
raise ValueError(
f"Failed to take mean of dimension {key}. Try doing so outside of xr_flexsel()."
)
else:
raise ValueError(f"xr_flexsel() doesn't recognize function {selection}")
return xr_object