264 lines
9.7 KiB
Python
264 lines
9.7 KiB
Python
"""
|
|
Flexibly subset time(s) and/or vegetation type(s) from an xarray Dataset or DataArray.
|
|
"""
|
|
import re
|
|
import numpy as np
|
|
import xarray as xr
|
|
|
|
from ctsm.crop_calendars.cropcal_utils import vegtype_str2int, is_each_vegtype
|
|
|
|
|
|
def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=True, **kwargs):
|
|
"""
|
|
Flexibly subset time(s) and/or vegetation type(s) from an xarray Dataset or DataArray.
|
|
|
|
- Keyword arguments like dimension=selection.
|
|
- Selections can be individual values or slice()s.
|
|
- Optimize memory usage by beginning keyword argument list with the selections that will result
|
|
in the largest reduction of object size.
|
|
- Use dimension "vegtype" to extract patches of designated vegetation type (can be string or
|
|
integer).
|
|
- Can also do dimension=function---e.g., time=np.mean will take the mean over the time
|
|
dimension.
|
|
"""
|
|
# Setup
|
|
havewarned = False
|
|
delimiter = "__"
|
|
|
|
for key, selection in kwargs.items():
|
|
if callable(selection):
|
|
xr_object = handle_callable(xr_object, key, selection)
|
|
|
|
elif key == "vegtype":
|
|
xr_object = handle_vegtype(xr_object, patches1d_itype_veg, selection)
|
|
|
|
else:
|
|
# Parse selection type, if provided
|
|
if delimiter in key:
|
|
key, selection_type = key.split(delimiter)
|
|
|
|
# Check type of selection
|
|
else:
|
|
is_inefficient = False
|
|
if isinstance(selection, slice):
|
|
this_type = set_type_from_slice(selection)
|
|
elif isinstance(selection, np.ndarray):
|
|
selection, is_inefficient, this_type = set_type_from_ndarray(selection)
|
|
else:
|
|
this_type = type(selection)
|
|
|
|
warn_about_this_seltype_interp = warn_about_seltype_interp
|
|
if this_type == list and isinstance(selection[0], str):
|
|
selection_type = "values"
|
|
warn_about_this_seltype_interp = False
|
|
elif this_type == int:
|
|
selection_type = "indices"
|
|
else:
|
|
selection_type = "values"
|
|
|
|
if warn_about_this_seltype_interp:
|
|
do_warn_about_seltype_interp(
|
|
havewarned, delimiter, key, selection_type, is_inefficient, this_type
|
|
)
|
|
|
|
# Trim along relevant 1d axes
|
|
if isinstance(xr_object, xr.Dataset) and key in ["lat", "lon"]:
|
|
xr_object = trim_along_relevant_1d_axes(xr_object, selection, selection_type, key)
|
|
|
|
# Perform selection
|
|
xr_object = perform_selection(xr_object, key, selection, selection_type)
|
|
|
|
return xr_object
|
|
|
|
|
|
def perform_selection(xr_object, key, selection, selection_type):
|
|
"""
|
|
Perform selection
|
|
"""
|
|
if selection_type == "indices":
|
|
# Have to select like this instead of with index directly because otherwise assign_coords()
|
|
# will throw an error. Not sure why.
|
|
if isinstance(selection, int):
|
|
# Single integer? Turn it into a slice.
|
|
selection = slice(selection, selection + 1)
|
|
elif (
|
|
isinstance(selection, np.ndarray)
|
|
and not selection.dtype.kind in np.typecodes["AllInteger"]
|
|
):
|
|
selection = selection.astype(int)
|
|
xr_object = xr_object.isel({key: selection})
|
|
elif selection_type == "values":
|
|
xr_object = xr_object.sel({key: selection})
|
|
else:
|
|
raise TypeError(f"selection_type {selection_type} not recognized")
|
|
return xr_object
|
|
|
|
|
|
def trim_along_relevant_1d_axes(xr_object, selection, selection_type, key):
|
|
"""
|
|
Trim along relevant 1d axes
|
|
"""
|
|
if selection_type == "indices":
|
|
incl_coords = xr_object[key].values[selection]
|
|
elif selection_type == "values":
|
|
if isinstance(selection, slice):
|
|
incl_coords = xr_object.sel({key: selection}, drop=False)[key].values
|
|
else:
|
|
incl_coords = selection
|
|
else:
|
|
raise TypeError(f"selection_type {selection_type} not recognized")
|
|
if key == "lat":
|
|
this_xy = "jxy"
|
|
elif key == "lon":
|
|
this_xy = "ixy"
|
|
else:
|
|
raise KeyError(
|
|
f"Key '{key}' not recognized: What 1d_ suffix should I use for variable name?"
|
|
)
|
|
pattern = re.compile(f"1d_{this_xy}")
|
|
matches = [x for x in list(xr_object.keys()) if pattern.search(x) is not None]
|
|
for var in matches:
|
|
if len(xr_object[var].dims) != 1:
|
|
raise RuntimeError(
|
|
f"Expected {var} to have 1 dimension, but it has"
|
|
f" {len(xr_object[var].dims)}: {xr_object[var].dims}"
|
|
)
|
|
dim = xr_object[var].dims[0]
|
|
# print(f"Variable {var} has dimension {dim}")
|
|
coords = xr_object[key].values[xr_object[var].values.astype(int) - 1]
|
|
# print(f"{dim} size before: {xr_object.sizes[dim]}")
|
|
ok_ind = []
|
|
new_1d_this_xy = []
|
|
for i, member in enumerate(coords):
|
|
if member in incl_coords:
|
|
ok_ind = ok_ind + [i]
|
|
new_1d_this_xy = new_1d_this_xy + [(incl_coords == member).nonzero()[0] + 1]
|
|
xr_object = xr_object.isel({dim: ok_ind})
|
|
new_1d_this_xy = np.array(new_1d_this_xy).squeeze()
|
|
xr_object[var].values = new_1d_this_xy
|
|
# print(f"{dim} size after: {xr_object.sizes[dim]}")
|
|
return xr_object
|
|
|
|
|
|
def do_warn_about_seltype_interp(
|
|
havewarned, delimiter, key, selection_type, is_inefficient, this_type
|
|
):
|
|
"""
|
|
Suggest suppressing selection type interpretation warnings
|
|
"""
|
|
if not havewarned:
|
|
print(
|
|
"xr_flexsel(): Suppress all 'selection type interpretation' messages by specifying"
|
|
+ "warn_about_seltype_interp=False"
|
|
)
|
|
havewarned = True
|
|
if is_inefficient:
|
|
extra = " This will also improve efficiency for large selections."
|
|
else:
|
|
extra = ""
|
|
print(
|
|
f"xr_flexsel(): Selecting {key} as {selection_type} because selection was"
|
|
f" interpreted as {this_type}. If not correct, specify selection type"
|
|
" ('indices' or 'values') in keyword like"
|
|
f" '{key}{delimiter}SELECTIONTYPE=...' instead of '{key}=...'.{extra}"
|
|
)
|
|
|
|
|
|
def set_type_from_ndarray(selection):
|
|
"""
|
|
Sets selection type if given a Numpy array
|
|
"""
|
|
if selection.dtype.kind in np.typecodes["AllInteger"]:
|
|
this_type = int
|
|
else:
|
|
is_inefficient = True
|
|
this_type = None
|
|
for member in selection:
|
|
if member < 0 or member % 1 > 0:
|
|
if isinstance(member, int):
|
|
this_type = "values"
|
|
else:
|
|
this_type = type(member)
|
|
break
|
|
if this_type is None:
|
|
this_type = int
|
|
selection = selection.astype(int)
|
|
return selection, is_inefficient, this_type
|
|
|
|
|
|
def set_type_from_slice(selection):
|
|
"""
|
|
Sets selection type if given a slice
|
|
"""
|
|
slice_members = []
|
|
if selection == slice(0):
|
|
raise ValueError("slice(0) will be empty")
|
|
if selection.start is not None:
|
|
slice_members = slice_members + [selection.start]
|
|
if selection.stop is not None:
|
|
slice_members = slice_members + [selection.stop]
|
|
if selection.step is not None:
|
|
slice_members = slice_members + [selection.step]
|
|
if not slice_members:
|
|
raise TypeError("slice is all None?")
|
|
this_type = int
|
|
for member in slice_members:
|
|
if member < 0 or not isinstance(member, int):
|
|
this_type = "values"
|
|
break
|
|
return this_type
|
|
|
|
|
|
def handle_vegtype(xr_object, patches1d_itype_veg, selection):
|
|
"""
|
|
Handle selection "vegtype
|
|
"""
|
|
# Convert to list, if needed
|
|
if not isinstance(selection, list):
|
|
selection = [selection]
|
|
|
|
# Convert to indices, if needed
|
|
if isinstance(selection[0], str):
|
|
selection = vegtype_str2int(selection)
|
|
|
|
# Get list of boolean(s)
|
|
if isinstance(selection[0], int):
|
|
if isinstance(patches1d_itype_veg, type(None)):
|
|
patches1d_itype_veg = xr_object.patches1d_itype_veg.values
|
|
elif isinstance(patches1d_itype_veg, xr.core.dataarray.DataArray):
|
|
patches1d_itype_veg = patches1d_itype_veg.values
|
|
is_vegtype = is_each_vegtype(patches1d_itype_veg, selection, "ok_exact")
|
|
elif isinstance(selection[0], bool):
|
|
if len(selection) != len(xr_object.patch):
|
|
raise ValueError(
|
|
"If providing boolean 'vegtype' argument to xr_flexsel(), it must be the"
|
|
f" same length as xr_object.patch ({len(selection)} vs."
|
|
f" {len(xr_object.patch)})"
|
|
)
|
|
is_vegtype = selection
|
|
else:
|
|
raise TypeError(f"Not sure how to handle 'vegtype' of type {type(selection[0])}")
|
|
xr_object = xr_object.isel(patch=[i for i, x in enumerate(is_vegtype) if x])
|
|
if "ivt" in xr_object:
|
|
xr_object = xr_object.isel(ivt=is_each_vegtype(xr_object.ivt.values, selection, "ok_exact"))
|
|
|
|
return xr_object
|
|
|
|
|
|
def handle_callable(xr_object, key, selection):
|
|
"""
|
|
Handle selection that's a callable
|
|
"""
|
|
# It would have been really nice to do selection(xr_object, axis=key), but numpy methods and
|
|
# xarray methods disagree on "axis" vs. "dimension." So instead, just do this manually.
|
|
if selection == np.mean: # pylint: disable=comparison-with-callable
|
|
try:
|
|
xr_object = xr_object.mean(dim=key)
|
|
except: # pylint: disable=raise-missing-from
|
|
raise ValueError(
|
|
f"Failed to take mean of dimension {key}. Try doing so outside of xr_flexsel()."
|
|
)
|
|
else:
|
|
raise ValueError(f"xr_flexsel() doesn't recognize function {selection}")
|
|
return xr_object
|