""" Convert time*mxharvests axes to growingseason axis """ import warnings import sys import numpy as np import xarray as xr try: import pandas as pd except ModuleNotFoundError: pass def pym_to_pg(pym_array, quiet=False): """ In convert_axis_time2gs(), convert year x month array to growingseason axis """ pg_array = np.reshape(pym_array, (pym_array.shape[0], -1)) ok_pg = pg_array[~np.isnan(pg_array)] if not quiet: print( f"{ok_pg.size} included; unique N seasons = " + f"{np.unique(np.sum(~np.isnan(pg_array), axis=1))}" ) return pg_array def ignore_lastyear_complete_season(pg_array, excl, mxharvests): """ Helper function for convert_axis_time2gs() """ tmp_l = pg_array[:, :-mxharvests] tmp_r = pg_array[:, -mxharvests:] tmp_r[np.where(excl)] = np.nan pg_array = np.concatenate((tmp_l, tmp_r), axis=1) return pg_array def convert_axis_time2gs_setup(this_ds, verbose): """ Various setup steps for convert_axis_time2gs_setup() """ # How many non-NaN patch-seasons do we expect to have once we're done organizing things? n_patch = this_ds.dims["patch"] # Because some patches will be planted in the last year but not complete, we have to ignore any # finalyear-planted seasons that do complete. n_gs = this_ds.dims["time"] - 1 expected_valid = n_patch * n_gs mxharvests = this_ds.dims["mxharvests"] if verbose: print( f"Start: discrepancy of {np.sum(~np.isnan(this_ds.HDATES.values)) - expected_valid} " + "patch-seasons" ) # Set all non-positive date values to NaN. These are seasons that were never harvested # (or never started): "non-seasons." if this_ds.HDATES.dims != ("time", "mxharvests", "patch"): raise RuntimeError( "This code relies on HDATES dims ('time', 'mxharvests', 'patch'), not " + f"{this_ds.HDATES.dims}" ) hdates_ymp = this_ds.HDATES.copy().where(this_ds.HDATES > 0).values hdates_pym = np.transpose(hdates_ymp.copy(), (2, 0, 1)) sdates_ymp = this_ds.SDATES_PERHARV.copy().where(this_ds.SDATES_PERHARV > 0).values sdates_pym = np.transpose(sdates_ymp.copy(), (2, 0, 1)) with np.errstate(invalid="ignore"): hdates_pym[hdates_pym <= 0] = np.nan return n_patch, n_gs, expected_valid, mxharvests, hdates_ymp, hdates_pym, sdates_ymp, sdates_pym def set_up_ds_with_gs_axis(ds_in): """ Set up empty Dataset with time axis as "gs" (growing season) instead of what CLM puts out. Includes all the same variables as the input dataset, minus any that had dimensions mxsowings or mxharvests. """ # Get the data variables to include in the new dataset data_vars = {} for var in ds_in.data_vars: if not any(x in ["mxsowings", "mxharvests"] for x in ds_in[var].dims): data_vars[var] = ds_in[var] # Set up the new dataset gs_years = [t.year - 1 for t in ds_in.time.values[:-1]] coords = ds_in.coords coords["gs"] = gs_years ds_out = xr.Dataset(data_vars=data_vars, coords=coords, attrs=ds_in.attrs) return ds_out def print_onepatch_wrong_n_gs( patch_index, this_ds_orig, sdates_ymp, hdates_ymp, sdates_pym, hdates_pym, sdates_pym2, hdates_pym2, sdates_pym3, hdates_pym3, sdates_pg, hdates_pg, sdates_pg2, hdates_pg2, ): """ Print information about a patch (for debugging) """ print( f"patch {patch_index}: {this_ds_orig.patches1d_itype_veg_str.values[patch_index]}, lon " f"{this_ds_orig.patches1d_lon.values[patch_index]} lat " f"{this_ds_orig.patches1d_lat.values[patch_index]}" ) print("Original SDATES (per sowing):") print(this_ds_orig.SDATES.values[:, :, patch_index]) print("Original HDATES (per harvest):") print(this_ds_orig.HDATES.values[:, :, patch_index]) if "pandas" in sys.modules: def print_pandas_ymp(msg, cols, arrs_tuple): print(f"{msg} ({np.sum(~np.isnan(arrs_tuple[0]))})") mxharvests = arrs_tuple[0].shape[1] arrs_list2 = [] cols2 = [] for harvest_index in np.arange(mxharvests): for i, array in enumerate(arrs_tuple): arrs_list2.append(array[:, harvest_index]) cols2.append(cols[i] + str(harvest_index)) arrs_tuple2 = tuple(arrs_list2) dataframe = pd.DataFrame(np.stack(arrs_tuple2, axis=1)) dataframe.columns = cols2 print(dataframe) print_pandas_ymp( "Original", ["sdate", "hdate"], ( this_ds_orig.SDATES_PERHARV.values[:, :, patch_index], this_ds_orig.HDATES.values[:, :, patch_index], ), ) print_pandas_ymp( "Masked", ["sdate", "hdate"], (sdates_ymp[:, :, patch_index], hdates_ymp[:, :, patch_index]), ) print_pandas_ymp( 'After "Ignore harvests from before this output began"', ["sdate", "hdate"], ( np.transpose(sdates_pym, (1, 2, 0))[:, :, patch_index], np.transpose(hdates_pym, (1, 2, 0))[:, :, patch_index], ), ) print_pandas_ymp( 'After "In years with no sowing, pretend the first no-harvest is meaningful"', ["sdate", "hdate"], ( np.transpose(sdates_pym2, (1, 2, 0))[:, :, patch_index], np.transpose(hdates_pym2, (1, 2, 0))[:, :, patch_index], ), ) print_pandas_ymp( ( 'After "In years with sowing that are followed by inactive years, check whether the' " last sowing was harvested before the patch was deactivated. If not, pretend the" ' LAST no-harvest is meaningful."' ), ["sdate", "hdate"], ( np.transpose(sdates_pym3, (1, 2, 0))[:, :, patch_index], np.transpose(hdates_pym3, (1, 2, 0))[:, :, patch_index], ), ) def print_pandas_pg(msg, cols, arrs_tuple): print(f"{msg} ({np.sum(~np.isnan(arrs_tuple[0]))})") arrs_list = list(arrs_tuple) for i, array in enumerate(arrs_tuple): arrs_list[i] = np.reshape(array, (-1)) arrs_tuple2 = tuple(arrs_list) dataframe = pd.DataFrame(np.stack(arrs_tuple2, axis=1)) dataframe.columns = cols print(dataframe) print_pandas_pg( "Same, but converted to gs axis", ["sdate", "hdate"], (sdates_pg[patch_index, :], hdates_pg[patch_index, :]), ) print_pandas_pg( ( 'After "Ignore any harvests that were planted in the final year, because some cells' ' will have incomplete growing seasons for the final year"' ), ["sdate", "hdate"], (sdates_pg2[patch_index, :], hdates_pg2[patch_index, :]), ) else: print("Couldn't import pandas, so not displaying example bad patch ORIGINAL.") def print_nopandas(array_1, array_2, msg): print(msg) if array_1.ndim == 1: # I don't know why these aren't side-by-side! print(np.stack((array_1, array_2), axis=1)) else: print(np.concatenate((array_1, array_2), axis=1)) print_nopandas(sdates_ymp[:, :, patch_index], hdates_ymp[:, :, patch_index], "Masked:") print_nopandas( np.transpose(sdates_pym, (1, 2, 0))[:, :, patch_index], np.transpose(hdates_pym, (1, 2, 0))[:, :, patch_index], 'After "Ignore harvests from before this output began"', ) print_nopandas( np.transpose(sdates_pym2, (1, 2, 0))[:, :, patch_index], np.transpose(hdates_pym2, (1, 2, 0))[:, :, patch_index], 'After "In years with no sowing, pretend the first no-harvest is meaningful"', ) print_nopandas( np.transpose(sdates_pym3, (1, 2, 0))[:, :, patch_index], np.transpose(hdates_pym3, (1, 2, 0))[:, :, patch_index], ( 'After "In years with sowing that are followed by inactive years, check whether the' " last sowing was harvested before the patch was deactivated. If not, pretend the" ' LAST [easier to implement!] no-harvest is meaningful."' ), ) print_nopandas( sdates_pg[patch_index, :], hdates_pg[patch_index, :], "Same, but converted to gs axis" ) print_nopandas( sdates_pg2[patch_index, :], hdates_pg2[patch_index, :], ( 'After "Ignore any harvests that were planted in the final year, because some cells' ' will have incomplete growing seasons for the final year"' ), ) print("\n\n") def handle_years_with_no_sowing(this_ds, mxharvests, hdates_pym, sdates_pym): """ In years with no sowing, pretend the first no-harvest is meaningful, unless that was intentionally ignored earlier in convert_axis_time2gs(). """ sdates_orig_ymp = this_ds.SDATES.copy().values sdates_orig_pym = np.transpose(sdates_orig_ymp.copy(), (2, 0, 1)) hdates_pym2 = hdates_pym.copy() sdates_pym2 = sdates_pym.copy() with np.errstate(invalid="ignore"): sdates_gt_0 = sdates_orig_pym > 0 nosow_py = np.all(~sdates_gt_0, axis=2) nosow_py_1st = nosow_py & np.isnan(hdates_pym[:, :, 0]) where_nosow_py_1st = np.where(nosow_py_1st) hdates_pym2[where_nosow_py_1st[0], where_nosow_py_1st[1], 0] = -np.inf sdates_pym2[where_nosow_py_1st[0], where_nosow_py_1st[1], 0] = -np.inf for harvest_index in np.arange(mxharvests - 1): if harvest_index == 0: continue if harvest_index == 1: print("Warning: Untested with mxharvests > 2") where_nosow_py = np.where( nosow_py & ~np.any(np.isnan(hdates_pym[:, :, 0:harvest_index]), axis=2) & np.isnan(hdates_pym[:, :, harvest_index]) ) hdates_pym2[where_nosow_py[0], where_nosow_py[1], harvest_index + 1] = -np.inf sdates_pym2[where_nosow_py[0], where_nosow_py[1], harvest_index + 1] = -np.inf return sdates_orig_pym, hdates_pym2, sdates_pym2 def handle_years_with_sowing_then_inactive( verbose, n_patch, n_gs, expected_valid, mxharvests, inactive_py, sdates_orig_pym, hdates_pym2, sdates_pym2, ): """ In years with sowing that are followed by inactive years, check whether the last sowing was harvested before the patch was deactivated. If not, pretend the LAST [easier to implement!] no-harvest is meaningful. """ sdates_orig_masked_pym = sdates_orig_pym.copy() with np.errstate(invalid="ignore"): sdates_le_0 = sdates_orig_masked_pym <= 0 sdates_orig_masked_pym[np.where(sdates_le_0)] = np.nan with warnings.catch_warnings(): warnings.filterwarnings(action="ignore", message="All-NaN slice encountered") last_sdate_first_n_gs_py = np.nanmax(sdates_orig_masked_pym[:, :-1, :], axis=2) last_hdate_first_n_gs_py = np.nanmax(hdates_pym2[:, :-1, :], axis=2) with np.errstate(invalid="ignore"): hdate_lt_sdate = last_hdate_first_n_gs_py < last_sdate_first_n_gs_py last_sowing_not_harvested_sameyear_first_n_gs_py = hdate_lt_sdate | np.isnan( last_hdate_first_n_gs_py ) inactive_last_n_gs_py = inactive_py[:, 1:] last_sowing_never_harvested_first_n_gs_py = ( last_sowing_not_harvested_sameyear_first_n_gs_py & inactive_last_n_gs_py ) last_sowing_never_harvested_py = np.concatenate( (last_sowing_never_harvested_first_n_gs_py, np.full((n_patch, 1), False)), axis=1 ) last_sowing_never_harvested_pym = np.concatenate( ( np.full((n_patch, n_gs + 1, mxharvests - 1), False), np.expand_dims(last_sowing_never_harvested_py, axis=2), ), axis=2, ) where_last_sowing_never_harvested_pym = last_sowing_never_harvested_pym hdates_pym3 = hdates_pym2.copy() sdates_pym3 = sdates_pym2.copy() hdates_pym3[where_last_sowing_never_harvested_pym] = -np.inf sdates_pym3[where_last_sowing_never_harvested_pym] = -np.inf hdates_pg = pym_to_pg(hdates_pym3.copy(), quiet=~verbose) sdates_pg = pym_to_pg(sdates_pym3.copy(), quiet=True) if verbose: print( "After 'In years with no sowing, pretend the first no-harvest is meaningful: " + f"discrepancy of {np.sum(~np.isnan(hdates_pg)) - expected_valid} patch-seasons" ) return hdates_pym3, sdates_pym3, hdates_pg, sdates_pg def ignore_harvests_planted_in_final_year( this_ds, verbose, n_gs, expected_valid, mxharvests, hdates_pg, sdates_pg ): """ Ignore any harvests that were planted in the final year, because some cells will have incomplete growing seasons for the final year. """ with np.errstate(invalid="ignore"): hdates_ge_sdates = hdates_pg[:, -mxharvests:] >= sdates_pg[:, -mxharvests:] lastyear_complete_season = hdates_ge_sdates | np.isinf(hdates_pg[:, -mxharvests:]) hdates_pg2 = ignore_lastyear_complete_season( hdates_pg.copy(), lastyear_complete_season, mxharvests ) sdates_pg2 = ignore_lastyear_complete_season( sdates_pg.copy(), lastyear_complete_season, mxharvests ) is_valid = ~np.isnan(hdates_pg2) is_fake = np.isneginf(hdates_pg2) is_fake = np.reshape(is_fake[is_valid], (this_ds.dims["patch"], n_gs)) discrepancy = np.sum(is_valid) - expected_valid unique_n_seasons = np.unique(np.sum(is_valid, axis=1)) if verbose: print( "After 'Ignore any harvests that were planted in the final year, because other cells " + "will have incomplete growing seasons for the final year': discrepancy of " + f"{discrepancy} patch-seasons" ) if "pandas" in sys.modules: bincount = np.bincount(np.sum(is_valid, axis=1)) bincount = bincount[bincount > 0] dataframe = pd.DataFrame({"Ngs": unique_n_seasons, "Count": bincount}) print(dataframe) else: print(f"unique N seasons = {unique_n_seasons}") print(" ") return hdates_pg2, sdates_pg2, is_valid, is_fake, discrepancy, unique_n_seasons def create_dataset( this_ds, my_vars, n_gs, hdates_ymp, hdates_pym, sdates_ymp, sdates_pym, hdates_pym2, sdates_pym2, hdates_pym3, sdates_pym3, hdates_pg, sdates_pg, hdates_pg2, sdates_pg2, is_valid, is_fake, discrepancy, unique_n_seasons, ): """ Create Dataset with time axis as "gs" (growing season) instead of what CLM puts out """ if discrepancy == 0: this_ds_gs = set_up_ds_with_gs_axis(this_ds) for var in this_ds.data_vars: if this_ds[var].dims != ("time", "mxharvests", "patch") or ( my_vars and var not in my_vars ): continue # Set invalid values to NaN da_yhp = this_ds[var].copy() da_yhp = da_yhp.where(~np.isneginf(da_yhp)) # Remove the nans and reshape to patches*growingseasons da_pyh = da_yhp.transpose("patch", "time", "mxharvests") ar_pg = np.reshape(da_pyh.values, (this_ds.dims["patch"], -1)) ar_valid_pg = np.reshape(ar_pg[is_valid], (this_ds.dims["patch"], n_gs)) # Change -infs to nans ar_valid_pg[is_fake] = np.nan # Save as DataArray to new Dataset, stripping _PERHARV from variable name newname = var.replace("_PERHARV", "") if newname in this_ds_gs: raise RuntimeError(f"{newname} already in dataset!") da_pg = xr.DataArray( data=ar_valid_pg, coords=[this_ds_gs.coords["patch"], this_ds_gs.coords["gs"]], name=newname, attrs=da_yhp.attrs, ) this_ds_gs[newname] = da_pg this_ds_gs[newname].attrs["units"] = this_ds[var].attrs["units"] else: # Print details about example bad patch(es) if min(unique_n_seasons) < n_gs: print(f"Too few seasons (min {min(unique_n_seasons)} < {n_gs})") patch_index = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == min(unique_n_seasons))[ 0 ][0] print_onepatch_wrong_n_gs( patch_index, this_ds, sdates_ymp, hdates_ymp, sdates_pym, hdates_pym, sdates_pym2, hdates_pym2, sdates_pym3, hdates_pym3, sdates_pg, hdates_pg, sdates_pg2, hdates_pg2, ) if max(unique_n_seasons) > n_gs: print(f"Too many seasons (max {max(unique_n_seasons)} > {n_gs})") patch_index = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == max(unique_n_seasons))[ 0 ][0] print_onepatch_wrong_n_gs( patch_index, this_ds, sdates_ymp, hdates_ymp, sdates_pym, hdates_pym, sdates_pym2, hdates_pym2, sdates_pym3, hdates_pym3, sdates_pg, hdates_pg, sdates_pg2, hdates_pg2, ) raise RuntimeError( "Can't convert time*mxharvests axes to growingseason axis: discrepancy of " + f"{discrepancy} patch-seasons" ) # Preserve units for var_1 in this_ds_gs: var_0 = var_1 if var_0 not in this_ds: var_0 += "_PERHARV" if var_0 not in this_ds: continue if "units" in this_ds[var_0].attrs: this_ds_gs[var_1].attrs["units"] = this_ds[var_0].attrs["units"] return this_ds_gs def convert_axis_time2gs(this_ds, verbose=False, my_vars=None, incl_orig=False): """ Convert time*mxharvests axes to growingseason axis """ ( n_patch, n_gs, expected_valid, mxharvests, hdates_ymp, hdates_pym, sdates_ymp, sdates_pym, ) = convert_axis_time2gs_setup(this_ds, verbose) # Find years where patch was inactive inactive_py = np.transpose( np.isnan(this_ds.HDATES).all(dim="mxharvests").values & np.isnan(this_ds.SDATES_PERHARV).all(dim="mxharvests").values ) # Find seasons that were planted while the patch was inactive with np.errstate(invalid="ignore"): sown_inactive_py = inactive_py[:, :-1] & (hdates_pym[:, 1:, 0] < sdates_pym[:, 1:, 0]) sown_inactive_py = np.concatenate((np.full((n_patch, 1), False), sown_inactive_py), axis=1) # "Ignore harvests from seasons sown (a) before this output began or (b) when the crop was # inactive" with np.errstate(invalid="ignore"): first_season_before_first_year_p = hdates_pym[:, 0, 0] < sdates_pym[:, 0, 0] first_season_before_first_year_py = np.full(hdates_pym.shape[:-1], fill_value=False) first_season_before_first_year_py[:, 0] = first_season_before_first_year_p sown_prerun_or_inactive_py = first_season_before_first_year_py | sown_inactive_py sown_prerun_or_inactive_pym = np.concatenate( ( np.expand_dims(sown_prerun_or_inactive_py, axis=2), np.full((n_patch, n_gs + 1, mxharvests - 1), False), ), axis=2, ) where_sown_prerun_or_inactive_pym = np.where(sown_prerun_or_inactive_pym) hdates_pym[where_sown_prerun_or_inactive_pym] = np.nan sdates_pym[where_sown_prerun_or_inactive_pym] = np.nan if verbose: print( "After 'Ignore harvests from before this output began: discrepancy of " + f"{np.sum(~np.isnan(hdates_pym)) - expected_valid} patch-seasons'" ) # We need to keep some non-seasons---it's possible that "the yearY growing season" never # happened (sowing conditions weren't met), but we still need something there so that we can # make an array of dimension Npatch*Ngs. We do this by changing those non-seasons from NaN to # -Inf before doing the filtering and reshaping, after which we'll convert them back to NaNs. # "In years with no sowing, pretend the first no-harvest is meaningful, unless that was # intentionally ignored above." sdates_orig_pym, hdates_pym2, sdates_pym2 = handle_years_with_no_sowing( this_ds, mxharvests, hdates_pym, sdates_pym ) # "In years with sowing that are followed by inactive years, check whether the last sowing was # harvested before the patch was deactivated. If not, pretend the LAST [easier to implement!] # no-harvest is meaningful." hdates_pym3, sdates_pym3, hdates_pg, sdates_pg = handle_years_with_sowing_then_inactive( verbose, n_patch, n_gs, expected_valid, mxharvests, inactive_py, sdates_orig_pym, hdates_pym2, sdates_pym2, ) # "Ignore any harvests that were planted in the final year, because some cells will have # incomplete growing seasons for the final year." ( hdates_pg2, sdates_pg2, is_valid, is_fake, discrepancy, unique_n_seasons, ) = ignore_harvests_planted_in_final_year( this_ds, verbose, n_gs, expected_valid, mxharvests, hdates_pg, sdates_pg ) # Create Dataset with time axis as "gs" (growing season) instead of what CLM puts out this_ds_gs = create_dataset( this_ds, my_vars, n_gs, hdates_ymp, hdates_pym, sdates_ymp, sdates_pym, hdates_pym2, sdates_pym2, hdates_pym3, sdates_pym3, hdates_pg, sdates_pg, hdates_pg2, sdates_pg2, is_valid, is_fake, discrepancy, unique_n_seasons, ) if incl_orig: return this_ds_gs, this_ds return this_ds_gs