""" Helper functions for various crop calendar stuff """ import os import glob import numpy as np import xarray as xr import ctsm.crop_calendars.cropcal_utils as utils from ctsm.crop_calendars.convert_axis_time2gs import convert_axis_time2gs from ctsm.crop_calendars.check_rx_obeyed import check_rx_obeyed from ctsm.crop_calendars.cropcal_constants import DEFAULT_GDD_MIN from ctsm.crop_calendars.import_ds import import_ds def check_and_trim_years(year_1, year_n, ds_in): """ After importing a file, restrict it to years of interest. """ ### In annual outputs, file with name Y is actually results from year Y-1. ### Note that time values refer to when it was SAVED. So 1981-01-01 is for year 1980. def get_year_from_cftime(cftime_date): # Subtract 1 because the date for annual files is when it was SAVED return cftime_date.year - 1 # Check that all desired years are included if get_year_from_cftime(ds_in.time.values[0]) > year_1: raise RuntimeError( f"Requested year_1 is {year_1} but first year in outputs is " + f"{get_year_from_cftime(ds_in.time.values[0])}" ) if get_year_from_cftime(ds_in.time.values[-1]) < year_1: raise RuntimeError( f"Requested year_n is {year_n} but last year in outputs is " + f"{get_year_from_cftime(ds_in.time.values[-1])}" ) # Remove years outside range of interest ### Include an extra year at the end to finish out final seasons. ds_in = utils.safer_timeslice(ds_in, slice(f"{year_1+1}-01-01", f"{year_n+2}-01-01")) # Make sure you have the expected number of timesteps (including extra year) n_years_expected = year_n - year_1 + 2 if ds_in.dims["time"] != n_years_expected: raise RuntimeError( f"Expected {n_years_expected} timesteps in output but got {ds_in.dims['time']}" ) return ds_in def open_lu_ds(filename, year_1, year_n, existing_ds, ungrid=True): """ Open land-use dataset """ # Open and trim to years of interest this_ds_gridded = xr.open_dataset(filename).sel(time=slice(year_1, year_n)) # Assign actual lon/lat coordinates this_ds_gridded = this_ds_gridded.assign_coords( lon=("lsmlon", existing_ds.lon.values), lat=("lsmlat", existing_ds.lat.values) ) this_ds_gridded = this_ds_gridded.swap_dims({"lsmlon": "lon", "lsmlat": "lat"}) if "AREA" in this_ds_gridded: this_ds_gridded["AREA_CFT"] = ( this_ds_gridded.AREA * 1e6 * this_ds_gridded.LANDFRAC_PFT * this_ds_gridded.PCT_CROP / 100 * this_ds_gridded.PCT_CFT / 100 ) this_ds_gridded["AREA_CFT"].attrs = {"units": "m2"} this_ds_gridded["AREA_CFT"].load() else: print("Warning: AREA missing from Dataset, so AREA_CFT will not be created") if not ungrid: return this_ds_gridded # Un-grid query_ilons = [int(x) - 1 for x in existing_ds["patches1d_ixy"].values] query_ilats = [int(x) - 1 for x in existing_ds["patches1d_jxy"].values] query_ivts = [ list(this_ds_gridded.cft.values).index(x) for x in existing_ds["patches1d_itype_veg"].values ] this_ds = xr.Dataset(attrs=this_ds_gridded.attrs) for var in ["AREA", "LANDFRAC_PFT", "PCT_CFT", "PCT_CROP", "AREA_CFT"]: if var not in this_ds_gridded: continue if "time" in this_ds_gridded[var].dims: new_coords = existing_ds["GRAINC_TO_FOOD_ANN"].coords else: new_coords = existing_ds["patches1d_lon"].coords if "cft" in this_ds_gridded[var].dims: this_ds[var] = ( this_ds_gridded[var] .isel( lon=xr.DataArray(query_ilons, dims="patch"), lat=xr.DataArray(query_ilats, dims="patch"), cft=xr.DataArray(query_ivts, dims="patch"), drop=True, ) .assign_coords(new_coords) ) else: this_ds[var] = ( this_ds_gridded[var] .isel( lon=xr.DataArray(query_ilons, dims="patch"), lat=xr.DataArray(query_ilats, dims="patch"), drop=True, ) .assign_coords(new_coords) ) for var in existing_ds: if "patches1d_" in var or "grid1d_" in var: this_ds[var] = existing_ds[var] this_ds["lon"] = this_ds_gridded["lon"] this_ds["lat"] = this_ds_gridded["lat"] # Which crops are irrigated? is_irrigated = np.full_like(this_ds["patches1d_itype_veg"], False) for vegtype_str in np.unique(this_ds["patches1d_itype_veg_str"].values): if "irrigated" not in vegtype_str: continue vegtype_int = utils.ivt_str2int(vegtype_str) is_this_vegtype = np.where(this_ds["patches1d_itype_veg"].values == vegtype_int)[0] is_irrigated[is_this_vegtype] = True this_ds["IRRIGATED"] = xr.DataArray( data=is_irrigated, coords=this_ds["patches1d_itype_veg_str"].coords, attrs={"long_name": "Is patch irrigated?"}, ) # How much area is irrigated? this_ds["IRRIGATED_AREA_CFT"] = this_ds["IRRIGATED"] * this_ds["AREA_CFT"] this_ds["IRRIGATED_AREA_CFT"].attrs = { "long name": "CFT area (irrigated types only)", "units": "m^2", } this_ds["IRRIGATED_AREA_GRID"] = ( this_ds["IRRIGATED_AREA_CFT"] .groupby(this_ds["patches1d_gi"]) .sum() .rename({"patches1d_gi": "gridcell"}) ) this_ds["IRRIGATED_AREA_GRID"].attrs = { "long name": "Irrigated area in gridcell", "units": "m^2", } return this_ds def check_v0_le_v1(this_ds, var_list, msg_txt=" ", both_nan_ok=False, throw_error=False): """ Make sure that, e.g., GDDACCUM_PERHARV is always <= HUI_PERHARV """ var0 = var_list[0] var1 = var_list[1] gdd_lt_hui = this_ds[var0] <= this_ds[var1] if both_nan_ok: gdd_lt_hui = gdd_lt_hui | (np.isnan(this_ds[var0]) & np.isnan(this_ds[var1])) if np.all(gdd_lt_hui): print(f"✅{msg_txt}{var0} always <= {var1}") else: msg = f"❌{msg_txt}{var0} *not* always <= {var1}" gdd_lt_hui_vals = gdd_lt_hui.values patch_index = np.where(~gdd_lt_hui_vals)[0][0] msg = ( msg + f"\ne.g., patch {patch_index}: {this_ds.patches1d_itype_veg_str.values[patch_index]}," + f" lon {this_ds.patches1d_lon.values[patch_index]} lat " + f"{this_ds.patches1d_lat.values[patch_index]}:" ) msg = msg + f"\n{this_ds[var0].values[patch_index,:]}" msg = msg + f"\n{this_ds[var1].values[patch_index,:]}" if throw_error: print(msg) else: raise RuntimeError(msg) def get_gs_len_da(this_da): """ Get growing season lengths from a DataArray of hdate-sdate """ tmp = this_da.values with np.errstate(invalid="ignore"): tmp_lt_0 = tmp < 0 tmp[tmp_lt_0] = 365 + tmp[tmp_lt_0] this_da.values = tmp this_da.attrs["units"] = "days" return this_da def import_max_gs_length(paramfile_dir, my_clm_ver, my_clm_subver): """ Import maximum growing season length """ # Get parameter file pattern = os.path.join(paramfile_dir, f"*{my_clm_ver}_params.{my_clm_subver}.nc") paramfile = glob.glob(pattern) if len(paramfile) != 1: raise RuntimeError(f"Expected to find 1 match of {pattern}; found {len(paramfile)}") paramfile_ds = xr.open_dataset(paramfile[0]) # Import max growing season length (stored in netCDF as nanoseconds!) paramfile_mxmats = paramfile_ds["mxmat"].values / np.timedelta64(1, "D") # Import PFT name list paramfile_pftnames = [ x.decode("UTF-8").replace(" ", "") for x in paramfile_ds["pftname"].values ] # Build dict mxmat_dict = {} for i, pftname in enumerate(paramfile_pftnames): mxmat = paramfile_mxmats[i] if not np.isnan(mxmat): mxmat_dict[pftname] = int(mxmat) else: mxmat_dict[pftname] = np.inf return mxmat_dict def import_rx_dates(var_prefix, date_infile, dates_ds, set_neg1_to_nan=True): """ Import prescribed sowing/harvest dates E.g. import_rx_dates("sdate", sdates_rx_file, dates_ds0_orig) """ # Get run info: # Max number of growing seasons per year if "mxsowings" in dates_ds: mxsowings = dates_ds.dims["mxsowings"] else: mxsowings = 1 # Which vegetation types were simulated? itype_veg_to_import = np.unique(dates_ds.patches1d_itype_veg) date_varlist = [] for i in itype_veg_to_import: for j in np.arange(mxsowings): this_var = f"{var_prefix}{j+1}_{i}" date_varlist = date_varlist + [this_var] this_ds = import_ds(date_infile, my_vars=date_varlist) did_warn = False for var in this_ds: v_new = var.replace(var_prefix, "gs") this_ds = this_ds.rename({var: v_new}) # Set -1 prescribed GDD values to NaN. Only warn the first time. if ( set_neg1_to_nan and var_prefix == "gdd" and v_new != var and np.any(this_ds[v_new].values < 0) ): if np.any((this_ds[v_new].values < 0) & (this_ds[v_new].values != -1)): raise RuntimeError(f"Unexpected negative value in {var}") if not did_warn: print("Setting -1 rx GDD values to NaN") did_warn = True this_ds[v_new] = this_ds[v_new].where(this_ds[v_new] != -1) return this_ds def check_no_negative(this_ds_in, varlist_no_negative, which_file, verbose): """ In import_output(), check that there are no unexpected negative values. """ tiny_neg_ok = 1e-12 this_ds = this_ds_in.copy() for var in this_ds: if not any(x in var for x in varlist_no_negative): continue the_min = np.nanmin(this_ds[var].values) if the_min < 0: if np.abs(the_min) <= tiny_neg_ok: if verbose: print( f"Tiny negative value(s) in {var} (abs <= {tiny_neg_ok}) being set to 0" + f" ({which_file})" ) else: print( f"WARNING: Unexpected negative value(s) in {var}; minimum {the_min} " + f"({which_file})" ) values = this_ds[var].copy().values with np.errstate(invalid="ignore"): do_setto_0 = (values < 0) & (values >= -tiny_neg_ok) values[np.where(do_setto_0)] = 0 this_ds[var] = xr.DataArray( values, coords=this_ds[var].coords, dims=this_ds[var].dims, attrs=this_ds[var].attrs, ) elif verbose: print(f"No negative value(s) in {var}; min {the_min} ({which_file})") return this_ds def check_no_zeros(this_ds, varlist_no_zero, which_file, verbose): """ In import_output(), check that there are no unexpected zeros. """ for var in this_ds: if not any(x in var for x in varlist_no_zero): continue if np.any(this_ds[var].values == 0): print(f"WARNING: Unexpected zero(s) in {var} ({which_file})") elif verbose: print(f"No zero value(s) in {var} ({which_file})") def import_output( filename, my_vars, year_1=None, year_n=None, my_vegtypes=utils.define_mgdcrop_list(), sdates_rx_ds=None, gdds_rx_ds=None, verbose=False, ): """ Import CLM output """ # Import this_ds = import_ds(filename, my_vars=my_vars, my_vegtypes=my_vegtypes) # Trim to years of interest (do not include extra year needed for finishing last growing season) if year_1 and year_n: this_ds = check_and_trim_years(year_1, year_n, this_ds) else: # Assume including all growing seasons except last complete one are "of interest" year_1 = this_ds.time.values[0].year year_n = this_ds.time.values[-1].year - 2 this_ds = check_and_trim_years(year_1, year_n, this_ds) # What vegetation types are included? vegtype_list = [ x for x in this_ds.vegtype_str.values if x in this_ds.patches1d_itype_veg_str.values ] # Check for consistency among sowing/harvest date/year info date_vars = ["SDATES_PERHARV", "SYEARS_PERHARV", "HDATES", "HYEARS"] all_nan = np.full(this_ds[date_vars[0]].shape, True) all_nonpos = np.full(this_ds[date_vars[0]].shape, True) all_pos = np.full(this_ds[date_vars[0]].shape, True) for var in date_vars: all_nan = all_nan & np.isnan(this_ds[var].values) with np.errstate(invalid="ignore"): all_nonpos = all_nonpos & (this_ds[var].values <= 0) all_pos = all_pos & (this_ds[var].values > 0) if np.any(np.bitwise_not(all_nan | all_nonpos | all_pos)): raise RuntimeError("Inconsistent missing/present values on mxharvests axis") # When doing transient runs, it's somehow possible for crops in newly-active patches to be # *already alive*. They even have a sowing date (idop)! This will of course not show up in # SDATES, but it does show up in SDATES_PERHARV. # I could put the SDATES_PERHARV dates into where they "should" be, but instead I'm just going # to invalidate those "seasons." this_ds = handle_zombie_crops(this_ds) # Check for no zero values where there shouldn't be varlist_no_zero = ["DATE", "YEAR"] check_no_zeros(this_ds, varlist_no_zero, "original file", verbose) # Convert time*mxharvests axes to growingseason axis this_ds_gs = convert_axis_time2gs(this_ds, verbose=verbose, incl_orig=False) # These are needed for calculating yield later this_ds_gs["GRAINC_TO_FOOD_PERHARV"] = this_ds["GRAINC_TO_FOOD_PERHARV"] this_ds_gs["GDDHARV_PERHARV"] = this_ds["GDDHARV_PERHARV"] # Get growing season length this_ds["GSLEN_PERHARV"] = get_gs_len_da(this_ds["HDATES"] - this_ds["SDATES_PERHARV"]) this_ds_gs["GSLEN"] = get_gs_len_da(this_ds_gs["HDATES"] - this_ds_gs["SDATES"]) this_ds_gs["GSLEN_PERHARV"] = this_ds["GSLEN_PERHARV"] # Get HUI accumulation as fraction of required this_ds_gs["HUIFRAC"] = this_ds_gs["HUI"] / this_ds_gs["GDDHARV"] this_ds_gs["HUIFRAC_PERHARV"] = this_ds["HUI_PERHARV"] / this_ds["GDDHARV_PERHARV"] for var in ["HUIFRAC", "HUIFRAC_PERHARV"]: this_ds_gs[var].attrs["units"] = "Fraction of required" # Avoid tiny negative values varlist_no_negative = ["GRAIN", "REASON", "GDD", "HUI", "YEAR", "DATE", "GSLEN"] this_ds_gs = check_no_negative(this_ds_gs, varlist_no_negative, "new file", verbose) # Check for no zero values where there shouldn't be varlist_no_zero = ["REASON", "DATE"] check_no_zeros(this_ds_gs, varlist_no_zero, "new file", verbose) # Check that e.g., GDDACCUM <= HUI for var_list in [["GDDACCUM", "HUI"], ["SYEARS", "HYEARS"]]: if all(v in this_ds_gs for v in var_list): check_v0_le_v1(this_ds_gs, var_list, both_nan_ok=True, throw_error=True) # Check that prescribed calendars were obeyed if sdates_rx_ds: check_rx_obeyed(vegtype_list, sdates_rx_ds, this_ds, "this_ds", "SDATES") if gdds_rx_ds: check_rx_obeyed( vegtype_list, gdds_rx_ds, this_ds, "this_ds", "GDDHARV", gdd_min=DEFAULT_GDD_MIN, ) # Convert time axis to integer year, saving original as 'cftime' this_ds_gs = this_ds_gs.assign_coords( {"cftime": this_ds["time_bounds"].isel({"hist_interval": 0})} ) this_ds_gs = this_ds_gs.assign_coords({"time": [t.year for t in this_ds_gs["cftime"].values]}) # Get number of harvests this_ds_gs["NHARVESTS"] = (this_ds_gs["GDDHARV_PERHARV"] > 0).sum(dim="mxharvests") # Get number of harvests that would be missed if only seeing max 1 per calendar year if np.any(this_ds_gs["NHARVESTS"] > 2): raise RuntimeError("How to get NHARVEST_DISCREP for NHARVESTS > 2?") this_ds_gs["NHARVEST_DISCREP"] = (this_ds_gs["NHARVESTS"] == 2).astype(int) return this_ds_gs def handle_zombie_crops(this_ds): """ When doing transient runs, it's somehow possible for crops in newly-active patches to be *already alive*. They even have a sowing date (idop)! This will of course not show up in SDATES, but it does show up in SDATES_PERHARV. I could put the SDATES_PERHARV dates into where they "should" be, but instead I'm just going to invalidate those "seasons." """ # In all but the last calendar year, which patches had no sowing? no_sowing_yp = np.all(np.isnan(this_ds.SDATES.values[:-1, :, :]), axis=1) # In all but the first calendar year, which harvests' jdays are < their sowings' jdays? # (Indicates sowing the previous calendar year.) with np.errstate(invalid="ignore"): hsdate1_gt_hdate1_yp = ( this_ds.SDATES_PERHARV.values[1:, 0, :] > this_ds.HDATES.values[1:, 0, :] ) # Where both, we have the problem. falsely_alive_yp = no_sowing_yp & hsdate1_gt_hdate1_yp if np.any(falsely_alive_yp): print( f"Warning: {np.sum(falsely_alive_yp)} patch-seasons being ignored: Seemingly sown the " + "year before harvest, but no sowings occurred that year." ) falsely_alive_yp = np.concatenate( (np.full((1, this_ds.dims["patch"]), False), falsely_alive_yp), axis=0 ) falsely_alive_y1p = np.expand_dims(falsely_alive_yp, axis=1) dummy_false_y1p = np.expand_dims(np.full_like(falsely_alive_yp, False), axis=1) falsely_alive_yhp = np.concatenate((falsely_alive_y1p, dummy_false_y1p), axis=1) for var in this_ds.data_vars: if this_ds[var].dims != ("time", "mxharvests", "patch"): continue this_ds[var] = this_ds[var].where(~falsely_alive_yhp) return this_ds