""" |------------------------------------------------------------------| |--------------------- Instructions -----------------------------| |------------------------------------------------------------------| Instructions for running using conda python environments: ../../py_env_create conda activate ctsm_py ------------------------------------------------------------------- To see the available options for single point or regional cases: ./subset_data.py --help ------------------------------------------------------------------- This script extracts domain files, surface dataset, and DATM files at either a single point or a region using a global dataset. Currently this script subsets default surface, landuse, and DATM files, which can be seen in the defaults.cfg file. To run a single-point or regional case using this data with the NUOPC driver, you must update the variable(s) `fsurdat` and/or `landuse` in the user_nl_clm namelist file to be the full path to the subset files. This script will automatically create this file using the flag --create-user-mods. To use subset climate data, the namelist file user_nl_datm_streams must also be updated - this script will automatically create this file with --create-user-mods. This flag will also create necessary single-point xml commands in the file shell_commands. To use the created user mods with a case use --user-mods-dir PATH/TO/USER/MODS in the ./create.newcase call. By default, this script only extracts surface dataset. For extracting other files, the appropriate flags should be used. To run this script the following packages are required: - numpy - xarray ------------------------------------------------------------------- To run the script for a single point: ./subset_data.py point To run the script for a region: ./subset_data.py region To remove NPL from your environment on Cheyenne/Casper: deactivate ------------------------------------------------------------------- """ # TODO [NS]: # -[] Automatic downloading of missing files if they are missing # -- Import libraries # -- standard libraries import os import logging import textwrap import configparser from getpass import getuser import argparse from argparse import ArgumentParser # -- import local classes for this script from ctsm.site_and_regional.base_case import DatmFiles from ctsm.site_and_regional.single_point_case import SinglePointCase from ctsm.site_and_regional.regional_case import RegionalCase from ctsm.args_utils import plon_type, plat_type from ctsm.path_utils import path_to_ctsm_root from ctsm.utils import abort # -- import ctsm logging flags from ctsm.ctsm_logging import ( setup_logging_pre_config, add_logging_args, process_logging_args, ) DEFAULTS_CONFIG = "tools/site_and_regional/default_data_2000.cfg" logger = logging.getLogger(__name__) def get_parser(): """ Get the parser object for subset_data.py script. Returns: parser (ArgumentParser): ArgumentParser which includes all the parser information. """ parser = ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter ) parser.print_usage = parser.print_help subparsers = parser.add_subparsers( help="Two possible ways to run this script, either:", dest="run_type" ) pt_parser = subparsers.add_parser("point", help="Run script for a single point.") rg_parser = subparsers.add_parser("region", help="Run script for a region.") # -- single point parser options pt_parser.add_argument( "--lat", help="Single point latitude. [default: %(default)s]", action="store", dest="plat", required=False, type=plat_type, default=42.5, ) pt_parser.add_argument( "--lon", help="Single point longitude. [default: %(default)s]", action="store", dest="plon", required=False, type=plon_type, default=287.8, ) pt_parser.add_argument( "--site", help="Site name or tag. [default: %(default)s]", action="store", dest="site_name", required=False, type=str, default="", ) pt_parser.add_argument( "--uniform-snowpack", help="Modify surface data to have a uniform snow fraction.", action="store_true", dest="uni_snow", required=False, ) pt_parser.add_argument( "--include-nonveg", help="Do not zero non-vegetation land units in the surface data.", action="store_true", dest="include_nonveg", required=False, ) pt_parser.add_argument( "--cap-saturation", help="Modify surface data to not allow saturation excess.", action="store_true", dest="cap_saturation", required=False, ) pt_parser.add_argument( "--evenly_split_cropland", help="Introduce equal areas of all crops", action="store_true", dest="evenly_split_cropland", required=False, ) pt_parser.add_argument( "--dompft", help="Dominant PFT(s): if we set the grid to 100%% one or multiple PFTs \ [default: %(default)s].", action="store", dest="dom_pft", type=int, default=None, nargs="*", ) pt_parser.add_argument( "--pctpft", help="Percetages of each pft (set by --dompft) on the land unit.", action="store", dest="pct_pft", type=float, default=None, nargs="*", ) pt_parser.add_argument( "--cth", help="canopy top height for pft", action="store", dest="cth", type=float, default=None, nargs="*", ) pt_parser.add_argument( "--cbh", help="canopy bottom height for pft", action="store", dest="cbh", type=float, default=None, nargs="*", ) # -- region-specific parser options rg_parser.add_argument( "--lat1", help="Region southernmost latitude. [default: %(default)s]", action="store", dest="lat1", required=False, type=plat_type, default=-40, ) rg_parser.add_argument( "--lat2", help="Region northernmost latitude. [default: %(default)s]", action="store", dest="lat2", required=False, type=plat_type, default=15, ) rg_parser.add_argument( "--lon1", help="Region westernmost longitude. [default: %(default)s]", action="store", dest="lon1", required=False, type=plon_type, default=275.0, ) rg_parser.add_argument( "--lon2", help="Region easternmost longitude. [default: %(default)s]", action="store", dest="lon2", required=False, type=plon_type, default=330.0, ) rg_parser.add_argument( "--reg", help="Region name or tag. [default: %(default)s]", action="store", dest="reg_name", required=False, type=str, default="", ) rg_parser.add_argument( "--create-mesh", help="Subset a mesh file for a region.", action="store_true", dest="create_mesh", required=False, ) # -- common options between both subparsers for subparser in [pt_parser, rg_parser]: subparser.add_argument( "--create-surface", help="Create surface data file at single point/region.", action="store_true", dest="create_surfdata", required=False, ) subparser.add_argument( "--surf-year", help="Year for surface data file at single point/region \ (and start year for land-use timeseries).", action="store", dest="surf_year", type=int, default=2000, required=False, ) subparser.add_argument( "--create-landuse", help="Create landuse data file at a single point/region.", action="store_true", dest="create_landuse", required=False, ) subparser.add_argument( "--create-datm", help="Create DATM forcing data at a single point/region.", action="store_true", dest="create_datm", required=False, ) subparser.add_argument( "--create-domain", help="Create CLM domain file for a single point/region \ Domain files are not needed for NUOPC cases, \ but are needed to create mesh files that are needed for NUOPC cases.", action="store_true", dest="create_domain", required=False, ) subparser.add_argument( "--create-user-mods", help="Create user mods directories and files for running CTSM with the subset data.", action="store_true", dest="create_user_mods", required=False, ) subparser.add_argument( "--datm-syr", help="Start year for creating DATM forcing at single point/region. [default: %(" "default)s]", action="store", dest="datm_syr", required=False, type=int, default=1901, ) subparser.add_argument( "--datm-eyr", help="End year for creating DATM forcing at single point/region. " "[default: %(default)s]", action="store", dest="datm_eyr", required=False, type=int, default=2014, ) subparser.add_argument( "--crop", help="Create datasets using the extensive list of prognostic crop types.", action="store_true", dest="crop_flag", required=False, ) if subparser == pt_parser: parser_name = "single_point" else: parser_name = "regional" subparser.add_argument( "--outdir", help="Output directory. \n [default: %(default)s]", action="store", dest="out_dir", type=str, default=os.path.join(os.getcwd(), "subset_data_" + parser_name), ) subparser.add_argument( "--user-mods-dir", help="User mods directory.", action="store", dest="user_mods_dir", type=str, default="", ) subparser.add_argument( "--out-surface", help="Output surface dataset name \ (if you want to override the default based on the current date). \n \ (only valid if outputing a surface dataset)", action="store", dest="out_surface", type=str, ) cesmroot = path_to_ctsm_root() defaults_file = os.path.join(cesmroot, DEFAULTS_CONFIG) subparser.add_argument( "--cfg-file", help="Default configure file to use for default filenames.", action="store", dest="config_file", type=str, default=defaults_file, ) subparser.add_argument( "--overwrite", help="Flag to overwrite if the files already exists.", action="store_true", dest="overwrite", ) subparser.add_argument( "--inputdata-dir", help="Top level path to the CESM inputdata directory.", action="store", dest="inputdatadir", type=str, default="defaults.cfg", ) add_logging_args(subparser) # -- print help for both subparsers parser.epilog = textwrap.dedent( f"""\ {pt_parser.format_help()} {rg_parser.format_help()} """ ) return parser def check_args(args): """Check the command line arguments""" # --------------------------------- # # print help and exit when no option is chosen if args.run_type not in ("point", "region"): err_msg = textwrap.dedent( """\ \n ------------------------------------ \n Must supply a positional argument: 'point' or 'region'. """ ) raise argparse.ArgumentError(None, err_msg) if not any([args.create_surfdata, args.create_landuse, args.create_datm, args.create_domain]): err_msg = textwrap.dedent( """\ \n ------------------------------------ \n Must supply one of: \n --create-surface \n --create-landuse \n --create-datm \n --create-domain \n \n """ ) raise argparse.ArgumentError(None, err_msg) if not os.path.exists(args.config_file): err_msg = textwrap.dedent( """\ \n ------------------------------------ \n Entered default config file does not exist" """ ) raise argparse.ArgumentError(None, err_msg) if args.out_surface and not args.create_surfdata: err_msg = textwrap.dedent( """\ \n ------------------------------------ \n out-surface option is given without the --create-surface option" """ ) raise argparse.ArgumentError(None, err_msg) if args.create_landuse and not args.create_surfdata: err_msg = textwrap.dedent( """\ \n ------------------------------------ \n --create-landuse option requires the --create-surface option: """ ) raise argparse.ArgumentError(None, err_msg) if args.surf_year != 2000 and not args.create_surfdata: err_msg = textwrap.dedent( """\ \n ------------------------------------ \n --surf-year option is set to something besides the default of 2000 \n without the --create-surface option" """ ) raise argparse.ArgumentError(None, err_msg) if args.surf_year != 1850 and args.create_landuse: err_msg = textwrap.dedent( """\ \n ------------------------------------ \n --surf-year option is NOT set to 1850 and the --create-landuse option \n is selected which requires it to be 1850 """ ) raise argparse.ArgumentError(None, err_msg) if args.surf_year != 1850 and args.surf_year != 2000: err_msg = textwrap.dedent( """\ \n ------------------------------------ \n --surf-year option can only be set to 1850 or 2000 """ ) raise argparse.ArgumentError(None, err_msg) if args.out_surface and os.path.exists(args.out_surface) and not args.overwrite: err_msg = textwrap.dedent( """\ \n ------------------------------------ \n out-surface filename exists and the overwrite option was not also selected" """ ) raise argparse.ArgumentError(None, err_msg) if args.run_type == "region" and args.create_user_mods: if not args.create_mesh: err_msg = textwrap.dedent( """\ \n ------------------------------------ \nERROR: For regional cases, you can not create user_mods \nwithout creating the mesh file. \nPlease rerun the script adding --create-mesh to subset the mesh file." """ ) raise argparse.ArgumentError(None, err_msg) if args.run_type == "region" and args.create_mesh: if not args.create_domain: err_msg = textwrap.dedent( """\ \n ------------------------------------ \nERROR: For regional cases, you can not create mesh files \nwithout creating the domain file. \nPlease rerun the script adding --create-domain to subset the domain file." """ ) raise argparse.ArgumentError(None, err_msg) def setup_user_mods(user_mods_dir, cesmroot): """ Sets up the user mods files and directories """ if not os.path.isdir(user_mods_dir): os.mkdir(user_mods_dir) nl_clm_base = os.path.join(cesmroot, "cime_config/user_nl_clm") nl_clm = os.path.join(user_mods_dir, "user_nl_clm") with open(nl_clm_base, "r") as basefile, open(nl_clm, "w") as user_file: for line in basefile: user_file.write(line) nl_datm_base = os.path.join( cesmroot, "components/cdeps/datm/cime_config" "/user_nl_datm_streams" ) nl_datm = os.path.join(user_mods_dir, "user_nl_datm_streams") with open(nl_datm_base, "r") as base_file, open(nl_datm, "w") as user_file: for line in base_file: user_file.write(line) def determine_num_pft(crop): """ A simple function to determine the number of pfts. Args: crop (bool): crop flag denoting if we are using crop Raises: Returns: num_pft (int) : number of pfts for surface dataset """ if crop: num_pft = "78" else: num_pft = "16" logger.debug("crop_flag = %s => num_pft = %s", crop.__str__(), num_pft) return num_pft def setup_files(args, defaults, cesmroot): """ Sets up the files and folders needed for this program """ if args.user_mods_dir == "": args.user_mods_dir = os.path.join(args.out_dir, "user_mods") if not os.path.isdir(args.out_dir): os.mkdir(args.out_dir) if args.create_user_mods: setup_user_mods(args.user_mods_dir, cesmroot) if args.inputdatadir == "defaults.cfg": clmforcingindir = defaults.get("main", "clmforcingindir") else: clmforcingindir = args.inputdatadir if not os.path.isdir(clmforcingindir): logger.info("clmforcingindir does not exist: %s", clmforcingindir) abort("inputdata directory does not exist") # DATM data datm_type = "datm_gswp3" dir_output_datm = "datmdata" dir_input_datm = os.path.join(clmforcingindir, defaults.get(datm_type, "dir")) if args.create_datm: if not os.path.isdir(os.path.join(args.out_dir, dir_output_datm)): os.mkdir(os.path.join(args.out_dir, dir_output_datm)) logger.info("dir_input_datm : %s", dir_input_datm) logger.info("dir_output_datm: %s", os.path.join(args.out_dir, dir_output_datm)) # if the crop flag is on - we need to use a different land use and surface data file num_pft = determine_num_pft(args.crop_flag) fsurf_in = defaults.get("surfdat", "surfdat_" + num_pft + "pft") fluse_in = defaults.get("landuse", "landuse_" + num_pft + "pft") if args.out_surface: fsurf_out = args.out_surface else: fsurf_out = None file_dict = { "main_dir": clmforcingindir, "fdomain_in": defaults.get("domain", "file"), "fsurf_dir": os.path.join( clmforcingindir, os.path.join(defaults.get("surfdat", "dir")), ), "mesh_dir": os.path.join( clmforcingindir, os.path.join(defaults.get("surfdat", "mesh_dir")), ), "fluse_dir": os.path.join( clmforcingindir, os.path.join(defaults.get("landuse", "dir")), ), "fsurf_in": fsurf_in, "fsurf_out": fsurf_out, "fluse_in": fluse_in, "mesh_surf": defaults.get("surfdat", "mesh_surf"), "datm_tuple": DatmFiles( dir_input_datm, dir_output_datm, defaults.get(datm_type, "domain"), defaults.get(datm_type, "solardir"), defaults.get(datm_type, "precdir"), defaults.get(datm_type, "tpqwdir"), defaults.get(datm_type, "solartag"), defaults.get(datm_type, "prectag"), defaults.get(datm_type, "tpqwtag"), defaults.get(datm_type, "solarname"), defaults.get(datm_type, "precname"), defaults.get(datm_type, "tpqwname"), ), } return file_dict def subset_point(args, file_dict: dict): """ Subsets surface, domain, land use, and/or DATM files at a single point """ logger.info("----------------------------------------------------------------------------") logger.info("This script extracts a single point from the global CTSM datasets.") num_pft = int(determine_num_pft(args.crop_flag)) # -- Create SinglePoint Object single_point = SinglePointCase( plat=args.plat, plon=args.plon, site_name=args.site_name, create_domain=args.create_domain, create_surfdata=args.create_surfdata, create_landuse=args.create_landuse, create_datm=args.create_datm, create_user_mods=args.create_user_mods, dom_pft=args.dom_pft, evenly_split_cropland=args.evenly_split_cropland, pct_pft=args.pct_pft, num_pft=num_pft, cth=args.cth, cbh=args.cbh, include_nonveg=args.include_nonveg, uni_snow=args.uni_snow, cap_saturation=args.cap_saturation, out_dir=args.out_dir, overwrite=args.overwrite, ) logger.debug(single_point) # -- Create CTSM surface data file if single_point.create_surfdata: single_point.create_surfdata_at_point( file_dict["fsurf_dir"], file_dict["fsurf_in"], args.user_mods_dir, specify_fsurf_out=file_dict["fsurf_out"], ) # -- Create CTSM transient landuse data file if single_point.create_landuse: single_point.create_landuse_at_point( file_dict["fluse_dir"], file_dict["fluse_in"], args.user_mods_dir ) # -- Create single point atmospheric forcing data if single_point.create_datm: # subset DATM domain file single_point.create_datmdomain_at_point(file_dict["datm_tuple"]) # subset the DATM data nl_datm = os.path.join(args.user_mods_dir, "user_nl_datm_streams") single_point.create_datm_at_point( file_dict["datm_tuple"], args.datm_syr, args.datm_eyr, nl_datm ) # -- Write shell commands if single_point.create_user_mods: single_point.write_shell_commands(os.path.join(args.user_mods_dir, "shell_commands")) logger.info("Successfully ran script for single point.") def subset_region(args, file_dict: dict): """ Subsets surface, domain, land use, and/or DATM files for a region """ logger.info("----------------------------------------------------------------------------") logger.info("This script extracts a region from the global CTSM datasets.") # -- Create Region Object region = RegionalCase( lat1=args.lat1, lat2=args.lat2, lon1=args.lon1, lon2=args.lon2, reg_name=args.reg_name, create_domain=args.create_domain, create_surfdata=args.create_surfdata, create_landuse=args.create_landuse, create_datm=args.create_datm, create_user_mods=args.create_user_mods, create_mesh=args.create_mesh, out_dir=args.out_dir, overwrite=args.overwrite, ) logger.debug(region) # -- Create CTSM domain file if region.create_domain: region.create_domain_at_reg(file_dict["main_dir"], file_dict["fdomain_in"]) # -- Create CTSM surface data file if region.create_surfdata: region.create_surfdata_at_reg( file_dict["fsurf_dir"], file_dict["fsurf_in"], args.user_mods_dir, specify_fsurf_out=file_dict["fsurf_out"], ) # if region.create_mesh: # region.create_mesh_at_reg (file_dict["mesh_dir"], file_dict["mesh_surf"]) # -- Create CTSM transient landuse data file if region.create_landuse: region.create_landuse_at_reg( file_dict["fluse_dir"], file_dict["fluse_in"], args.user_mods_dir ) # -- Write shell commands if region.create_user_mods: region.write_shell_commands(os.path.join(args.user_mods_dir, "shell_commands")) print("\nFor running this regional case with the created user_mods : ") print( "./create_newcase --case case --res CLM_USRDAT --compset I2000Clm51BgcCrop", "--run-unsupported --user-mods-dirs ", args.user_mods_dir, "\n\n", ) logger.info("Successfully ran script for a regional case.") def main(): """ Calls functions that subset surface, landuse, domain, and/or DATM files for a region or a single point. """ # --------------------------------- # # add logging flags from ctsm_logging setup_logging_pre_config() parser = get_parser() args = parser.parse_args() check_args(args) # --------------------------------- # # process logging args (i.e. debug and verbose) process_logging_args(args) # --------------------------------- # # parse defaults file cesmroot = path_to_ctsm_root() defaults = configparser.ConfigParser() defaults.read(args.config_file) # --------------------------------- # myname = getuser() pwd = os.getcwd() logger.info("User = %s", myname) logger.info("Current directory = %s", pwd) # --------------------------------- # # create files and folders necessary and return dictionary of file/folder locations file_dict = setup_files(args, defaults, cesmroot) if args.run_type == "point": subset_point(args, file_dict) elif args.run_type == "region": subset_region(args, file_dict)