clm5/python/ctsm/crop_calendars/convert_axis_time2gs.py
2024-05-09 15:14:01 +08:00

623 lines
22 KiB
Python

"""
Convert time*mxharvests axes to growingseason axis
"""
import warnings
import sys
import numpy as np
import xarray as xr
try:
import pandas as pd
except ModuleNotFoundError:
pass
def pym_to_pg(pym_array, quiet=False):
"""
In convert_axis_time2gs(), convert year x month array to growingseason axis
"""
pg_array = np.reshape(pym_array, (pym_array.shape[0], -1))
ok_pg = pg_array[~np.isnan(pg_array)]
if not quiet:
print(
f"{ok_pg.size} included; unique N seasons = "
+ f"{np.unique(np.sum(~np.isnan(pg_array), axis=1))}"
)
return pg_array
def ignore_lastyear_complete_season(pg_array, excl, mxharvests):
"""
Helper function for convert_axis_time2gs()
"""
tmp_l = pg_array[:, :-mxharvests]
tmp_r = pg_array[:, -mxharvests:]
tmp_r[np.where(excl)] = np.nan
pg_array = np.concatenate((tmp_l, tmp_r), axis=1)
return pg_array
def convert_axis_time2gs_setup(this_ds, verbose):
"""
Various setup steps for convert_axis_time2gs_setup()
"""
# How many non-NaN patch-seasons do we expect to have once we're done organizing things?
n_patch = this_ds.dims["patch"]
# Because some patches will be planted in the last year but not complete, we have to ignore any
# finalyear-planted seasons that do complete.
n_gs = this_ds.dims["time"] - 1
expected_valid = n_patch * n_gs
mxharvests = this_ds.dims["mxharvests"]
if verbose:
print(
f"Start: discrepancy of {np.sum(~np.isnan(this_ds.HDATES.values)) - expected_valid} "
+ "patch-seasons"
)
# Set all non-positive date values to NaN. These are seasons that were never harvested
# (or never started): "non-seasons."
if this_ds.HDATES.dims != ("time", "mxharvests", "patch"):
raise RuntimeError(
"This code relies on HDATES dims ('time', 'mxharvests', 'patch'), not "
+ f"{this_ds.HDATES.dims}"
)
hdates_ymp = this_ds.HDATES.copy().where(this_ds.HDATES > 0).values
hdates_pym = np.transpose(hdates_ymp.copy(), (2, 0, 1))
sdates_ymp = this_ds.SDATES_PERHARV.copy().where(this_ds.SDATES_PERHARV > 0).values
sdates_pym = np.transpose(sdates_ymp.copy(), (2, 0, 1))
with np.errstate(invalid="ignore"):
hdates_pym[hdates_pym <= 0] = np.nan
return n_patch, n_gs, expected_valid, mxharvests, hdates_ymp, hdates_pym, sdates_ymp, sdates_pym
def set_up_ds_with_gs_axis(ds_in):
"""
Set up empty Dataset with time axis as "gs" (growing season) instead of what CLM puts out.
Includes all the same variables as the input dataset, minus any that had dimensions mxsowings or
mxharvests.
"""
# Get the data variables to include in the new dataset
data_vars = {}
for var in ds_in.data_vars:
if not any(x in ["mxsowings", "mxharvests"] for x in ds_in[var].dims):
data_vars[var] = ds_in[var]
# Set up the new dataset
gs_years = [t.year - 1 for t in ds_in.time.values[:-1]]
coords = ds_in.coords
coords["gs"] = gs_years
ds_out = xr.Dataset(data_vars=data_vars, coords=coords, attrs=ds_in.attrs)
return ds_out
def print_onepatch_wrong_n_gs(
patch_index,
this_ds_orig,
sdates_ymp,
hdates_ymp,
sdates_pym,
hdates_pym,
sdates_pym2,
hdates_pym2,
sdates_pym3,
hdates_pym3,
sdates_pg,
hdates_pg,
sdates_pg2,
hdates_pg2,
):
"""
Print information about a patch (for debugging)
"""
print(
f"patch {patch_index}: {this_ds_orig.patches1d_itype_veg_str.values[patch_index]}, lon "
f"{this_ds_orig.patches1d_lon.values[patch_index]} lat "
f"{this_ds_orig.patches1d_lat.values[patch_index]}"
)
print("Original SDATES (per sowing):")
print(this_ds_orig.SDATES.values[:, :, patch_index])
print("Original HDATES (per harvest):")
print(this_ds_orig.HDATES.values[:, :, patch_index])
if "pandas" in sys.modules:
def print_pandas_ymp(msg, cols, arrs_tuple):
print(f"{msg} ({np.sum(~np.isnan(arrs_tuple[0]))})")
mxharvests = arrs_tuple[0].shape[1]
arrs_list2 = []
cols2 = []
for harvest_index in np.arange(mxharvests):
for i, array in enumerate(arrs_tuple):
arrs_list2.append(array[:, harvest_index])
cols2.append(cols[i] + str(harvest_index))
arrs_tuple2 = tuple(arrs_list2)
dataframe = pd.DataFrame(np.stack(arrs_tuple2, axis=1))
dataframe.columns = cols2
print(dataframe)
print_pandas_ymp(
"Original",
["sdate", "hdate"],
(
this_ds_orig.SDATES_PERHARV.values[:, :, patch_index],
this_ds_orig.HDATES.values[:, :, patch_index],
),
)
print_pandas_ymp(
"Masked",
["sdate", "hdate"],
(sdates_ymp[:, :, patch_index], hdates_ymp[:, :, patch_index]),
)
print_pandas_ymp(
'After "Ignore harvests from before this output began"',
["sdate", "hdate"],
(
np.transpose(sdates_pym, (1, 2, 0))[:, :, patch_index],
np.transpose(hdates_pym, (1, 2, 0))[:, :, patch_index],
),
)
print_pandas_ymp(
'After "In years with no sowing, pretend the first no-harvest is meaningful"',
["sdate", "hdate"],
(
np.transpose(sdates_pym2, (1, 2, 0))[:, :, patch_index],
np.transpose(hdates_pym2, (1, 2, 0))[:, :, patch_index],
),
)
print_pandas_ymp(
(
'After "In years with sowing that are followed by inactive years, check whether the'
" last sowing was harvested before the patch was deactivated. If not, pretend the"
' LAST no-harvest is meaningful."'
),
["sdate", "hdate"],
(
np.transpose(sdates_pym3, (1, 2, 0))[:, :, patch_index],
np.transpose(hdates_pym3, (1, 2, 0))[:, :, patch_index],
),
)
def print_pandas_pg(msg, cols, arrs_tuple):
print(f"{msg} ({np.sum(~np.isnan(arrs_tuple[0]))})")
arrs_list = list(arrs_tuple)
for i, array in enumerate(arrs_tuple):
arrs_list[i] = np.reshape(array, (-1))
arrs_tuple2 = tuple(arrs_list)
dataframe = pd.DataFrame(np.stack(arrs_tuple2, axis=1))
dataframe.columns = cols
print(dataframe)
print_pandas_pg(
"Same, but converted to gs axis",
["sdate", "hdate"],
(sdates_pg[patch_index, :], hdates_pg[patch_index, :]),
)
print_pandas_pg(
(
'After "Ignore any harvests that were planted in the final year, because some cells'
' will have incomplete growing seasons for the final year"'
),
["sdate", "hdate"],
(sdates_pg2[patch_index, :], hdates_pg2[patch_index, :]),
)
else:
print("Couldn't import pandas, so not displaying example bad patch ORIGINAL.")
def print_nopandas(array_1, array_2, msg):
print(msg)
if array_1.ndim == 1:
# I don't know why these aren't side-by-side!
print(np.stack((array_1, array_2), axis=1))
else:
print(np.concatenate((array_1, array_2), axis=1))
print_nopandas(sdates_ymp[:, :, patch_index], hdates_ymp[:, :, patch_index], "Masked:")
print_nopandas(
np.transpose(sdates_pym, (1, 2, 0))[:, :, patch_index],
np.transpose(hdates_pym, (1, 2, 0))[:, :, patch_index],
'After "Ignore harvests from before this output began"',
)
print_nopandas(
np.transpose(sdates_pym2, (1, 2, 0))[:, :, patch_index],
np.transpose(hdates_pym2, (1, 2, 0))[:, :, patch_index],
'After "In years with no sowing, pretend the first no-harvest is meaningful"',
)
print_nopandas(
np.transpose(sdates_pym3, (1, 2, 0))[:, :, patch_index],
np.transpose(hdates_pym3, (1, 2, 0))[:, :, patch_index],
(
'After "In years with sowing that are followed by inactive years, check whether the'
" last sowing was harvested before the patch was deactivated. If not, pretend the"
' LAST [easier to implement!] no-harvest is meaningful."'
),
)
print_nopandas(
sdates_pg[patch_index, :], hdates_pg[patch_index, :], "Same, but converted to gs axis"
)
print_nopandas(
sdates_pg2[patch_index, :],
hdates_pg2[patch_index, :],
(
'After "Ignore any harvests that were planted in the final year, because some cells'
' will have incomplete growing seasons for the final year"'
),
)
print("\n\n")
def handle_years_with_no_sowing(this_ds, mxharvests, hdates_pym, sdates_pym):
"""
In years with no sowing, pretend the first no-harvest is meaningful, unless that was
intentionally ignored earlier in convert_axis_time2gs().
"""
sdates_orig_ymp = this_ds.SDATES.copy().values
sdates_orig_pym = np.transpose(sdates_orig_ymp.copy(), (2, 0, 1))
hdates_pym2 = hdates_pym.copy()
sdates_pym2 = sdates_pym.copy()
with np.errstate(invalid="ignore"):
sdates_gt_0 = sdates_orig_pym > 0
nosow_py = np.all(~sdates_gt_0, axis=2)
nosow_py_1st = nosow_py & np.isnan(hdates_pym[:, :, 0])
where_nosow_py_1st = np.where(nosow_py_1st)
hdates_pym2[where_nosow_py_1st[0], where_nosow_py_1st[1], 0] = -np.inf
sdates_pym2[where_nosow_py_1st[0], where_nosow_py_1st[1], 0] = -np.inf
for harvest_index in np.arange(mxharvests - 1):
if harvest_index == 0:
continue
if harvest_index == 1:
print("Warning: Untested with mxharvests > 2")
where_nosow_py = np.where(
nosow_py
& ~np.any(np.isnan(hdates_pym[:, :, 0:harvest_index]), axis=2)
& np.isnan(hdates_pym[:, :, harvest_index])
)
hdates_pym2[where_nosow_py[0], where_nosow_py[1], harvest_index + 1] = -np.inf
sdates_pym2[where_nosow_py[0], where_nosow_py[1], harvest_index + 1] = -np.inf
return sdates_orig_pym, hdates_pym2, sdates_pym2
def handle_years_with_sowing_then_inactive(
verbose,
n_patch,
n_gs,
expected_valid,
mxharvests,
inactive_py,
sdates_orig_pym,
hdates_pym2,
sdates_pym2,
):
"""
In years with sowing that are followed by inactive years, check whether the last sowing was
harvested before the patch was deactivated. If not, pretend the LAST [easier to implement!]
no-harvest is meaningful.
"""
sdates_orig_masked_pym = sdates_orig_pym.copy()
with np.errstate(invalid="ignore"):
sdates_le_0 = sdates_orig_masked_pym <= 0
sdates_orig_masked_pym[np.where(sdates_le_0)] = np.nan
with warnings.catch_warnings():
warnings.filterwarnings(action="ignore", message="All-NaN slice encountered")
last_sdate_first_n_gs_py = np.nanmax(sdates_orig_masked_pym[:, :-1, :], axis=2)
last_hdate_first_n_gs_py = np.nanmax(hdates_pym2[:, :-1, :], axis=2)
with np.errstate(invalid="ignore"):
hdate_lt_sdate = last_hdate_first_n_gs_py < last_sdate_first_n_gs_py
last_sowing_not_harvested_sameyear_first_n_gs_py = hdate_lt_sdate | np.isnan(
last_hdate_first_n_gs_py
)
inactive_last_n_gs_py = inactive_py[:, 1:]
last_sowing_never_harvested_first_n_gs_py = (
last_sowing_not_harvested_sameyear_first_n_gs_py & inactive_last_n_gs_py
)
last_sowing_never_harvested_py = np.concatenate(
(last_sowing_never_harvested_first_n_gs_py, np.full((n_patch, 1), False)), axis=1
)
last_sowing_never_harvested_pym = np.concatenate(
(
np.full((n_patch, n_gs + 1, mxharvests - 1), False),
np.expand_dims(last_sowing_never_harvested_py, axis=2),
),
axis=2,
)
where_last_sowing_never_harvested_pym = last_sowing_never_harvested_pym
hdates_pym3 = hdates_pym2.copy()
sdates_pym3 = sdates_pym2.copy()
hdates_pym3[where_last_sowing_never_harvested_pym] = -np.inf
sdates_pym3[where_last_sowing_never_harvested_pym] = -np.inf
hdates_pg = pym_to_pg(hdates_pym3.copy(), quiet=~verbose)
sdates_pg = pym_to_pg(sdates_pym3.copy(), quiet=True)
if verbose:
print(
"After 'In years with no sowing, pretend the first no-harvest is meaningful: "
+ f"discrepancy of {np.sum(~np.isnan(hdates_pg)) - expected_valid} patch-seasons"
)
return hdates_pym3, sdates_pym3, hdates_pg, sdates_pg
def ignore_harvests_planted_in_final_year(
this_ds, verbose, n_gs, expected_valid, mxharvests, hdates_pg, sdates_pg
):
"""
Ignore any harvests that were planted in the final year, because some cells will have
incomplete growing seasons for the final year.
"""
with np.errstate(invalid="ignore"):
hdates_ge_sdates = hdates_pg[:, -mxharvests:] >= sdates_pg[:, -mxharvests:]
lastyear_complete_season = hdates_ge_sdates | np.isinf(hdates_pg[:, -mxharvests:])
hdates_pg2 = ignore_lastyear_complete_season(
hdates_pg.copy(), lastyear_complete_season, mxharvests
)
sdates_pg2 = ignore_lastyear_complete_season(
sdates_pg.copy(), lastyear_complete_season, mxharvests
)
is_valid = ~np.isnan(hdates_pg2)
is_fake = np.isneginf(hdates_pg2)
is_fake = np.reshape(is_fake[is_valid], (this_ds.dims["patch"], n_gs))
discrepancy = np.sum(is_valid) - expected_valid
unique_n_seasons = np.unique(np.sum(is_valid, axis=1))
if verbose:
print(
"After 'Ignore any harvests that were planted in the final year, because other cells "
+ "will have incomplete growing seasons for the final year': discrepancy of "
+ f"{discrepancy} patch-seasons"
)
if "pandas" in sys.modules:
bincount = np.bincount(np.sum(is_valid, axis=1))
bincount = bincount[bincount > 0]
dataframe = pd.DataFrame({"Ngs": unique_n_seasons, "Count": bincount})
print(dataframe)
else:
print(f"unique N seasons = {unique_n_seasons}")
print(" ")
return hdates_pg2, sdates_pg2, is_valid, is_fake, discrepancy, unique_n_seasons
def create_dataset(
this_ds,
my_vars,
n_gs,
hdates_ymp,
hdates_pym,
sdates_ymp,
sdates_pym,
hdates_pym2,
sdates_pym2,
hdates_pym3,
sdates_pym3,
hdates_pg,
sdates_pg,
hdates_pg2,
sdates_pg2,
is_valid,
is_fake,
discrepancy,
unique_n_seasons,
):
"""
Create Dataset with time axis as "gs" (growing season) instead of what CLM puts out
"""
if discrepancy == 0:
this_ds_gs = set_up_ds_with_gs_axis(this_ds)
for var in this_ds.data_vars:
if this_ds[var].dims != ("time", "mxharvests", "patch") or (
my_vars and var not in my_vars
):
continue
# Set invalid values to NaN
da_yhp = this_ds[var].copy()
da_yhp = da_yhp.where(~np.isneginf(da_yhp))
# Remove the nans and reshape to patches*growingseasons
da_pyh = da_yhp.transpose("patch", "time", "mxharvests")
ar_pg = np.reshape(da_pyh.values, (this_ds.dims["patch"], -1))
ar_valid_pg = np.reshape(ar_pg[is_valid], (this_ds.dims["patch"], n_gs))
# Change -infs to nans
ar_valid_pg[is_fake] = np.nan
# Save as DataArray to new Dataset, stripping _PERHARV from variable name
newname = var.replace("_PERHARV", "")
if newname in this_ds_gs:
raise RuntimeError(f"{newname} already in dataset!")
da_pg = xr.DataArray(
data=ar_valid_pg,
coords=[this_ds_gs.coords["patch"], this_ds_gs.coords["gs"]],
name=newname,
attrs=da_yhp.attrs,
)
this_ds_gs[newname] = da_pg
this_ds_gs[newname].attrs["units"] = this_ds[var].attrs["units"]
else:
# Print details about example bad patch(es)
if min(unique_n_seasons) < n_gs:
print(f"Too few seasons (min {min(unique_n_seasons)} < {n_gs})")
patch_index = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == min(unique_n_seasons))[
0
][0]
print_onepatch_wrong_n_gs(
patch_index,
this_ds,
sdates_ymp,
hdates_ymp,
sdates_pym,
hdates_pym,
sdates_pym2,
hdates_pym2,
sdates_pym3,
hdates_pym3,
sdates_pg,
hdates_pg,
sdates_pg2,
hdates_pg2,
)
if max(unique_n_seasons) > n_gs:
print(f"Too many seasons (max {max(unique_n_seasons)} > {n_gs})")
patch_index = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == max(unique_n_seasons))[
0
][0]
print_onepatch_wrong_n_gs(
patch_index,
this_ds,
sdates_ymp,
hdates_ymp,
sdates_pym,
hdates_pym,
sdates_pym2,
hdates_pym2,
sdates_pym3,
hdates_pym3,
sdates_pg,
hdates_pg,
sdates_pg2,
hdates_pg2,
)
raise RuntimeError(
"Can't convert time*mxharvests axes to growingseason axis: discrepancy of "
+ f"{discrepancy} patch-seasons"
)
# Preserve units
for var_1 in this_ds_gs:
var_0 = var_1
if var_0 not in this_ds:
var_0 += "_PERHARV"
if var_0 not in this_ds:
continue
if "units" in this_ds[var_0].attrs:
this_ds_gs[var_1].attrs["units"] = this_ds[var_0].attrs["units"]
return this_ds_gs
def convert_axis_time2gs(this_ds, verbose=False, my_vars=None, incl_orig=False):
"""
Convert time*mxharvests axes to growingseason axis
"""
(
n_patch,
n_gs,
expected_valid,
mxharvests,
hdates_ymp,
hdates_pym,
sdates_ymp,
sdates_pym,
) = convert_axis_time2gs_setup(this_ds, verbose)
# Find years where patch was inactive
inactive_py = np.transpose(
np.isnan(this_ds.HDATES).all(dim="mxharvests").values
& np.isnan(this_ds.SDATES_PERHARV).all(dim="mxharvests").values
)
# Find seasons that were planted while the patch was inactive
with np.errstate(invalid="ignore"):
sown_inactive_py = inactive_py[:, :-1] & (hdates_pym[:, 1:, 0] < sdates_pym[:, 1:, 0])
sown_inactive_py = np.concatenate((np.full((n_patch, 1), False), sown_inactive_py), axis=1)
# "Ignore harvests from seasons sown (a) before this output began or (b) when the crop was
# inactive"
with np.errstate(invalid="ignore"):
first_season_before_first_year_p = hdates_pym[:, 0, 0] < sdates_pym[:, 0, 0]
first_season_before_first_year_py = np.full(hdates_pym.shape[:-1], fill_value=False)
first_season_before_first_year_py[:, 0] = first_season_before_first_year_p
sown_prerun_or_inactive_py = first_season_before_first_year_py | sown_inactive_py
sown_prerun_or_inactive_pym = np.concatenate(
(
np.expand_dims(sown_prerun_or_inactive_py, axis=2),
np.full((n_patch, n_gs + 1, mxharvests - 1), False),
),
axis=2,
)
where_sown_prerun_or_inactive_pym = np.where(sown_prerun_or_inactive_pym)
hdates_pym[where_sown_prerun_or_inactive_pym] = np.nan
sdates_pym[where_sown_prerun_or_inactive_pym] = np.nan
if verbose:
print(
"After 'Ignore harvests from before this output began: discrepancy of "
+ f"{np.sum(~np.isnan(hdates_pym)) - expected_valid} patch-seasons'"
)
# We need to keep some non-seasons---it's possible that "the yearY growing season" never
# happened (sowing conditions weren't met), but we still need something there so that we can
# make an array of dimension Npatch*Ngs. We do this by changing those non-seasons from NaN to
# -Inf before doing the filtering and reshaping, after which we'll convert them back to NaNs.
# "In years with no sowing, pretend the first no-harvest is meaningful, unless that was
# intentionally ignored above."
sdates_orig_pym, hdates_pym2, sdates_pym2 = handle_years_with_no_sowing(
this_ds, mxharvests, hdates_pym, sdates_pym
)
# "In years with sowing that are followed by inactive years, check whether the last sowing was
# harvested before the patch was deactivated. If not, pretend the LAST [easier to implement!]
# no-harvest is meaningful."
hdates_pym3, sdates_pym3, hdates_pg, sdates_pg = handle_years_with_sowing_then_inactive(
verbose,
n_patch,
n_gs,
expected_valid,
mxharvests,
inactive_py,
sdates_orig_pym,
hdates_pym2,
sdates_pym2,
)
# "Ignore any harvests that were planted in the final year, because some cells will have
# incomplete growing seasons for the final year."
(
hdates_pg2,
sdates_pg2,
is_valid,
is_fake,
discrepancy,
unique_n_seasons,
) = ignore_harvests_planted_in_final_year(
this_ds, verbose, n_gs, expected_valid, mxharvests, hdates_pg, sdates_pg
)
# Create Dataset with time axis as "gs" (growing season) instead of what CLM puts out
this_ds_gs = create_dataset(
this_ds,
my_vars,
n_gs,
hdates_ymp,
hdates_pym,
sdates_ymp,
sdates_pym,
hdates_pym2,
sdates_pym2,
hdates_pym3,
sdates_pym3,
hdates_pg,
sdates_pg,
hdates_pg2,
sdates_pg2,
is_valid,
is_fake,
discrepancy,
unique_n_seasons,
)
if incl_orig:
return this_ds_gs, this_ds
return this_ds_gs