""" Convert GGCMI crop calendar files for use in CTSM """ import sys import argparse import logging import os import datetime as dt import numpy as np import xarray as xr import cftime # -- add python/ctsm to path (needed if we want to run process_ggcmi_shdates stand-alone) _CTSM_PYTHON = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir) sys.path.insert(1, _CTSM_PYTHON) # pylint: disable=wrong-import-position from ctsm import ctsm_logging import ctsm.crop_calendars.cropcal_utils as utils import ctsm.crop_calendars.regrid_ggcmi_shdates as regrid logger = logging.getLogger(__name__) def get_cft(year): """ Given a year, return the cftime.DatetimeNoLeap of Jan. 1 at 00:00. """ return cftime.DatetimeNoLeap(year, 1, 1, 0, 0, 0, 0, has_year_zero=True) def get_dayssince_jan1y1(year1, year): """ Get the number of days since Jan. 1 of year1 """ cft_y1 = get_cft(year1) cft_y = get_cft(year) time_delta = cft_y - cft_y1 time_delta_secs = time_delta.total_seconds() return time_delta_secs / (60 * 60 * 24) def main(): """ main() function for calling process_ggcmi_shdates.py from command line. """ ctsm_logging.setup_logging_pre_config() args = process_ggcmi_shdates_args() process_ggcmi_shdates( args.input_directory, args.output_directory, args.author, args.file_specifier, args.first_year, args.last_year, args.ggcmi_author, args.regrid_resolution, args.regrid_template_file, args.regrid_extension, args.crop_list, ) def process_ggcmi_shdates_args(): """ Set up and parse input arguments for working with GGCMI crop calendar files """ parser = argparse.ArgumentParser( description=( "Converts raw sowing and harvest date files provided by GGCMI into " + "a format that CLM can read, optionally at a target resolution." ) ) # Required parser.add_argument( "-i", "--input-directory", help="Directory containing the raw GGCMI sowing/harvest date files", type=str, required=True, ) parser.add_argument( "-o", "--output-directory", help="Where to save the CLM-compatible sowing and harvest date files", type=str, required=True, ) parser.add_argument( "-a", "--author", help=( "String to be saved in author_thisfile attribute of output files. " + "E.g., 'Author Name (authorname@ucar.edu)'" ), type=str, required=True, ) # Optional parser.add_argument( "--file-specifier", help=( "String following CROP_IRR_ in input filenames. E.g., mai_ir_FILESPECIFIER.nc4. " + "Will also be saved to output filenames." ), type=str, default="ggcmi_crop_calendar_phase3_v1.01", ) parser.add_argument( "-y1", "--first-year", help=( "First year in output files. Must be present in template file, " + "unless it's the same as the last year." ), type=int, default=2000, ) parser.add_argument( "-yN", "--last-year", help=( "Last year in output files. Must be present in template file, " + "unless it's the same as the first year." ), type=int, default=2000, ) parser.add_argument( "--ggcmi-author", help="Author of original GGCMI files", type=str, default="Jonas Jägermeyr (jonas.jaegermeyr@columbia.edu)", ) ctsm_logging.add_logging_args(parser) # Arguments for regridding parser = regrid.define_arguments(parser) # Get arguments args = parser.parse_args(sys.argv[1:]) ctsm_logging.process_logging_args(args) return args def setup_crop_dict(): """ Associate CLM crop names with (1) their integer counterpart and (2) their GGCMI counterpart. Some notes: - As "CLMname: {clm_num, thiscrop_ggcmi}" - CLM names and numbers taken from commit 3dcbc7499a57904750a994672fc36b4221b9def5 - Using one global GGCMI value for both temperate and tropical versions of corn and soybean. - There is no GGCMI equivalent of CLM's winter barley and rye. Using winter wheat instead. - Using GGCMI "pea" for CLM pulses, as suggested by GGCMI phase 3 protocol. - Only using GGCMI "ri1" for rice; ignoring "ri2". """ def set_crop_dict(thisnum, thisname): return {"clm_num": thisnum, "thiscrop_ggcmi": thisname} crop_dict = { "temperate_corn": set_crop_dict(17, "mai_rf"), "irrigated_temperate_corn": set_crop_dict(18, "mai_ir"), "spring_wheat": set_crop_dict(19, "swh_rf"), "irrigated_spring_wheat": set_crop_dict(20, "swh_ir"), "winter_wheat": set_crop_dict(21, "wwh_rf"), "irrigated_winter_wheat": set_crop_dict(22, "wwh_ir"), "temperate_soybean": set_crop_dict(23, "soy_rf"), "irrigated_temperate_soybean": set_crop_dict(24, "soy_ir"), "barley": set_crop_dict(25, "bar_rf"), "irrigated_barley": set_crop_dict(26, "bar_ir"), "winter_barley": set_crop_dict(27, "wwh_rf"), "irrigated_winter_barley": set_crop_dict(28, "wwh_ir"), "rye": set_crop_dict(29, "rye_rf"), "irrigated_rye": set_crop_dict(30, "rye_ir"), "winter_rye": set_crop_dict(31, "wwh_rf"), "irrigated_winter_rye": set_crop_dict(32, "wwh_ir"), "cassava": set_crop_dict(33, "cas_rf"), "irrigated_cassava": set_crop_dict(34, "cas_ir"), "citrus": set_crop_dict(35, None), "irrigated_citrus": set_crop_dict(36, None), "cocoa": set_crop_dict(37, None), "irrigated_cocoa": set_crop_dict(38, None), "coffee": set_crop_dict(39, None), "irrigated_coffee": set_crop_dict(40, None), "cotton": set_crop_dict(41, "cot_rf"), "irrigated_cotton": set_crop_dict(42, "cot_ir"), "datepalm": set_crop_dict(43, None), "irrigated_datepalm": set_crop_dict(44, None), "foddergrass": set_crop_dict(45, None), "irrigated_foddergrass": set_crop_dict(46, None), "grapes": set_crop_dict(47, None), "irrigated_grapes": set_crop_dict(48, None), "groundnuts": set_crop_dict(49, "nut_rf"), "irrigated_groundnuts": set_crop_dict(50, "nut_ir"), "millet": set_crop_dict(51, "mil_rf"), "irrigated_millet": set_crop_dict(52, "mil_ir"), "oilpalm": set_crop_dict(53, None), "irrigated_oilpalm": set_crop_dict(54, None), "potatoes": set_crop_dict(55, "pot_rf"), "irrigated_potatoes": set_crop_dict(56, "pot_ir"), "pulses": set_crop_dict(57, "pea_rf"), "irrigated_pulses": set_crop_dict(58, "pea_ir"), "rapeseed": set_crop_dict(59, "rap_rf"), "irrigated_rapeseed": set_crop_dict(60, "rap_ir"), "rice": set_crop_dict(61, "ric_rf"), "irrigated_rice": set_crop_dict(62, "ric_ir"), "sorghum": set_crop_dict(63, "sor_rf"), "irrigated_sorghum": set_crop_dict(64, "sor_ir"), "sugarbeet": set_crop_dict(65, "sgb_rf"), "irrigated_sugarbeet": set_crop_dict(66, "sgb_ir"), "sugarcane": set_crop_dict(67, "sgc_rf"), "irrigated_sugarcane": set_crop_dict(68, "sgc_ir"), "sunflower": set_crop_dict(69, "sun_rf"), "irrigated_sunflower": set_crop_dict(70, "sun_ir"), "miscanthus": set_crop_dict(71, None), "irrigated_miscanthus": set_crop_dict(72, None), "switchgrass": set_crop_dict(73, None), "irrigated_switchgrass": set_crop_dict(74, None), "tropical_corn": set_crop_dict(75, "mai_rf"), "irrigated_tropical_corn": set_crop_dict(76, "mai_ir"), "tropical_soybean": set_crop_dict(77, "soy_rf"), "irrigated_tropical_soybean": set_crop_dict(78, "soy_ir"), "c3_crop": set_crop_dict(15, None), "c3_irrigated": set_crop_dict(16, None), } return crop_dict def setup_var_dict(): """ Associate CLM variable names with their GGCMI counterparts. - We also save a placeholder for output file paths associated with each variable. - As CLMname: {GGCMIname, output_file} """ def set_var_dict(name_ggcmi, outfile): return {"name_ggcmi": name_ggcmi, "outfile": outfile} variable_dict = { "sdate": set_var_dict("planting_day", ""), "hdate": set_var_dict("maturity_day", ""), } return variable_dict def set_var_attrs(thisvar_da, thiscrop_clm, thiscrop_ggcmi, varname_ggcmi, new_fillvalue): """ Set output variable attributes """ longname = thisvar_da.attrs["long_name"] longname = longname.replace("rainfed", thiscrop_clm).replace("irrigated", thiscrop_clm) thisvar_da.attrs["long_name"] = longname if thiscrop_ggcmi is None: thisvar_da.attrs["crop_name_clm"] = "none" thisvar_da.attrs["crop_name_ggcmi"] = "none" else: thisvar_da.attrs["crop_name_clm"] = thiscrop_clm thisvar_da.attrs["crop_name_ggcmi"] = thiscrop_ggcmi thisvar_da.attrs["short_name_ggcmi"] = varname_ggcmi thisvar_da.attrs["units"] = "day of year" thisvar_da.encoding["_FillValue"] = new_fillvalue # scale_factor and add_offset are required by I/O library for short data # From https://www.unidata.ucar.edu/software/netcdf/workshops/2010/bestpractices/Packing.html: # unpacked_value = packed_value * scale_factor + add_offset thisvar_da.attrs["scale_factor"] = np.int16(1) thisvar_da.attrs["add_offset"] = np.int16(0) return thisvar_da def fill_convert_int(thisvar_ds, thiscrop_ggcmi, varname_ggcmi, new_fillvalue): """ Ensure fill value and real data are correct format """ dummyvalue = -1 thisvar_ds.variables[varname_ggcmi].encoding["_FillValue"] = new_fillvalue if thiscrop_ggcmi is None: thisvar_ds.variables[varname_ggcmi].values.fill(dummyvalue) else: thisvar_ds.variables[varname_ggcmi].values[ np.isnan(thisvar_ds.variables[varname_ggcmi].values) ] = new_fillvalue thisvar_ds.variables[varname_ggcmi].values = thisvar_ds.variables[ varname_ggcmi ].values.astype("int16") return thisvar_ds def add_time_dim(thisvar_ds, template_ds, varname_ggcmi, varname_clm): """ Add time dimension (https://stackoverflow.com/a/62862440) - Repeats original map for every timestep - Probably not necessary to use this method, since I only end up extracting thisvar_ds.values anyway---I could probably use some numpy method instead. """ thisvar_ds = thisvar_ds.expand_dims(time=template_ds.time) thisvar_da_tmp = thisvar_ds[varname_ggcmi] thisvar_da = xr.DataArray( data=thisvar_da_tmp.values.astype("int16"), attrs=thisvar_da_tmp.attrs, coords=thisvar_da_tmp.coords, name=varname_clm, ) return thisvar_da def create_output_files( regrid_resolution, variable_dict, output_directory, file_specifier, first_year, last_year, template_ds, ): """ Create output files, one for each variable """ datetime_string = dt.datetime.now().strftime("%year%m%d_%H%M%S") nninterp_suffix = "nninterp-" + regrid_resolution for var in variable_dict: basename = ( f"{var}s_{file_specifier}_{nninterp_suffix}." + f"{first_year}-{last_year}.{datetime_string}.nc" ) outfile = os.path.join(output_directory, basename) variable_dict[var]["outfile"] = outfile template_ds.to_netcdf( path=variable_dict[var]["outfile"], format="NETCDF3_CLASSIC", ) return nninterp_suffix def strip_dataset(cropcal_ds, varname_ggcmi): """ Remove all variables except one from Dataset """ droplist = [] for i in list(cropcal_ds.keys()): if i != varname_ggcmi: droplist.append(i) thisvar_ds = cropcal_ds.drop(droplist) return thisvar_ds def process_ggcmi_shdates( input_directory, output_directory, author, file_specifier, first_year, last_year, ggcmi_author, regrid_resolution, regrid_template_file, regrid_extension, crop_list, ): """ Convert GGCMI crop calendar files for use in CTSM """ input_directory = os.path.realpath(input_directory) output_directory = os.path.realpath(output_directory) ############################################################ ### Regrid original GGCMI files to target CLM resolution ### ############################################################ regridded_ggcmi_files_dir = os.path.join( output_directory, f"regridded_ggcmi_files-{regrid_resolution}" ) regrid.regrid_ggcmi_shdates( regrid_resolution, regrid_template_file, input_directory, regridded_ggcmi_files_dir, regrid_extension, crop_list, ) # Set up dictionaries used in remapping crops and variables between GGCMI and CLM crop_dict = setup_crop_dict() variable_dict = setup_var_dict() ################################ ### Instantiate output files ### ################################ # Global attributes for output files comment = ( "Day of year is 1-indexed (i.e., Jan. 1 = 1). " + "Filled using cdo -remapnn,$original -setmisstonn" ) out_attrs = { "title": "GGCMI crop calendar for Phase 3, v1.01", "author_thisfile": author, "author_original": ggcmi_author, "comment": comment, "created": dt.datetime.now().replace(microsecond=0).astimezone().isoformat(), } # Create template dataset time_array = np.array( [get_dayssince_jan1y1(first_year, year) for year in np.arange(first_year, last_year + 1)] ) time_coord = xr.IndexVariable( "time", data=time_array, attrs={ "long_name": "time", "units": f"days since {first_year}-01-01", "calendar": "noleap", }, ) template_ds = xr.Dataset(coords={"time": time_coord}, attrs=out_attrs) # Create output files nninterp_suffix = create_output_files( regrid_resolution, variable_dict, output_directory, file_specifier, first_year, last_year, template_ds, ) ######################### ### Process all crops ### ######################### for thiscrop_clm in crop_dict: # Which crop are we on? crop_int = list(crop_dict.keys()).index(thiscrop_clm) + 1 # Get information about this crop this_dict = crop_dict[thiscrop_clm] thiscrop_int = this_dict["clm_num"] thiscrop_ggcmi = this_dict["thiscrop_ggcmi"] # If --regrid-crop-list specified, only process crops from that list if crop_list is not None and thiscrop_ggcmi is not None and thiscrop_ggcmi not in crop_list: continue # If no corresponding GGCMI crop, skip opening dataset. # Will use previous cropcal_ds as a template. if thiscrop_ggcmi is None: if crop_int == 1: raise ValueError(f"First crop ({thiscrop_clm}) must have a GGCMI type") logger.info( "Filling %s with dummy data (%d of %d)...", str(thiscrop_clm), crop_int, len(crop_dict), ) # Otherwise, import crop calendar file else: logger.info( "Importing %s -> %s (%d of %d)...", str(thiscrop_ggcmi), str(thiscrop_clm), crop_int, len(crop_dict), ) file_ggcmi = os.path.join( regridded_ggcmi_files_dir, f"{thiscrop_ggcmi}_{file_specifier}_{nninterp_suffix}.nc4", ) if not os.path.exists(file_ggcmi): logger.warning( "Skipping %s because input file not found: %s", thiscrop_ggcmi, file_ggcmi ) continue cropcal_ds = xr.open_dataset(file_ggcmi) # Flip latitude to match destination cropcal_ds = cropcal_ds.reindex(lat=cropcal_ds.lat[::-1]) # Rearrange longitude to match destination (does nothing if not needed) cropcal_ds = utils.lon_idl2pm(cropcal_ds, fail_silently=True) for thisvar_clm in variable_dict: # Get GGCMI netCDF info varname_ggcmi = variable_dict[thisvar_clm]["name_ggcmi"] logger.info(" Processing %s...", varname_ggcmi) # Get CLM netCDF info varname_clm = thisvar_clm + "1_" + str(thiscrop_int) file_clm = variable_dict[thisvar_clm]["outfile"] if not os.path.exists(file_clm): raise Exception("Output file not found: " + file_clm) # Strip dataset to just this variable strip_dataset(cropcal_ds, varname_ggcmi) # Convert to integer new_fillvalue = -1 thisvar_ds = fill_convert_int(thisvar_ds, thiscrop_ggcmi, varname_ggcmi, new_fillvalue) # Add time dimension (https://stackoverflow.com/a/62862440) thisvar_da = add_time_dim(thisvar_ds, template_ds, varname_ggcmi, varname_clm) thisvar_da = set_var_attrs( thisvar_da, thiscrop_clm, thiscrop_ggcmi, varname_ggcmi, new_fillvalue ) # Save logger.info(" Saving %s...", varname_ggcmi) thisvar_da.to_netcdf(file_clm, mode="a", format="NETCDF3_CLASSIC") cropcal_ds.close() logger.info("Done!")