clm5/python/ctsm/toolchain/gen_mksurfdata_namelist.py
2024-05-09 15:14:01 +08:00

929 lines
32 KiB
Python

"""
gen_mksurfdata_namelist.py generates a namelist for use with the mksurfdata
executable. For detailed instructions, see README.
"""
import os
import sys
import xml.etree.ElementTree as ET
import logging
import argparse
import textwrap
import subprocess
from datetime import datetime
import netCDF4
from ctsm.path_utils import path_to_ctsm_root
from ctsm.ctsm_logging import setup_logging_pre_config, add_logging_args, process_logging_args
logger = logging.getLogger(__name__)
# valid options for SSP/RCP scenarios
valid_opts = {
"ssp-rcp": [
"SSP1-2.6",
"SSP3-7.0",
"SSP5-3.4",
"SSP2-4.5",
"SSP1-1.9",
"SSP4-3.4",
"SSP4-6.0",
"SSP5-8.5",
"none",
]
}
def get_parser():
"""
Get parser object for this script.
"""
# set up logging allowing user control
setup_logging_pre_config()
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.print_usage = parser.print_help
add_logging_args(parser)
parser.add_argument(
"--start-year",
help=textwrap.dedent(
"""\
Simulation start year.
[Required]"""
),
action="store",
dest="start_year",
required=True,
type=int,
)
parser.add_argument(
"--end-year",
help=textwrap.dedent(
"""\
Simulation end year.
[Required]"""
),
action="store",
dest="end_year",
required=True,
type=int,
)
parser.add_argument(
"--res",
help="""
Model resolution (required)
To see available supported resolutions, simply invoke this command
with a --res unknown option. For custom resolutions, provide a grid
name of your choosing to be used in the name of the fsurdat file.
""",
action="store",
dest="res",
required=True,
)
parser.add_argument(
"--model-mesh",
help="""
model mesh [default: %(default)s]
Ignore --res and use --model-mesh to be this file
""",
action="store",
dest="force_model_mesh_file",
required=False,
default=None,
)
parser.add_argument(
"--namelist",
help="""
name of output namelist filename
if NOT given the name will be the same as the surface
dataset name with a *.namelist extension rather than *.nc
""",
action="store",
dest="namelist_fname",
required=False,
default=None,
)
parser.add_argument(
"--model-mesh-nx",
help="""
model mesh [default: %(default)s]
Required when using --model-mesh: set nx to the grid's number of
columns; expect nx x ny = elementCount for consistency with the
model mesh
""",
action="store",
dest="force_model_mesh_nx",
required=False,
default=None,
)
parser.add_argument(
"--model-mesh-ny",
help="""
model mesh [default: %(default)s]
Required when using --model-mesh: set ny to the grid's number of
rows; expect nx x ny = elementCount for consistency with the model
mesh
""",
action="store",
dest="force_model_mesh_ny",
required=False,
default=None,
)
parser.add_argument(
"--glc-nec",
help="""
Number of glacier elevation classes to use. [default: %(default)s]
""",
action="store",
dest="glc_nec",
type=int,
default=10,
)
parser.add_argument(
"--ssp-rcp",
help="""
Shared Socioeconomic Pathway and Representative
Concentration Pathway Scenario name(s).
[default: %(default)s]
""",
action="store",
dest="ssp_rcp",
required=False,
choices=valid_opts["ssp-rcp"],
default="none",
)
parser.add_argument(
"--rawdata-dir",
help="""
/path/of/root/of/input/data
on izumi use /fs/cgd/csm/inputdata
[default: %(default)s]
""",
action="store",
dest="input_path",
default="/glade/campaign/cesm/cesmdata/inputdata/",
)
parser.add_argument(
"--vic",
help="""
Flag for adding the fields required for the VIC model.
[default: %(default)s]
""",
action="store_true",
dest="vic_flag",
default=False,
)
parser.add_argument(
"--inlandwet",
help="""
Flag for including inland wetlands.
[default: %(default)s]
""",
action="store_true",
dest="inlandwet",
default=False,
)
parser.add_argument(
"--glc",
help="""
Flag for adding the optional 3D glacier fields for verification of the glacier model.
[default: %(default)s]
""",
action="store_true",
dest="glc_flag",
default=False,
)
parser.add_argument(
"--hires_pft",
help="""
If you want to use the high-resolution pft dataset rather
than the default lower resolution dataset.
(Low resolution is at quarter-degree, high resolution at 3-minute)
[Note: hires only available for 1850 and 2005.]
[default: %(default)s]
""",
action="store_true",
dest="hres_pft",
default=False,
)
parser.add_argument(
"--hires_soitex",
help="""
If you want to use the high-resolution soil texture dataset rather
than the default lower resolution dataset.
(Low resolution is 5x5min, high resolution 30-second)
[default: %(default)s]
""",
action="store_true",
dest="hres_soitex",
default=False,
)
parser.add_argument(
"--nosurfdata",
help="""
Do not output a surface datase
This is useful if you only want a landuse_timeseries file
[default: %(default)s]
""",
action="store_true",
dest="surfdata_flag",
default=False,
)
parser.add_argument(
"--nocrop",
help="""
Do not create datasets with the extensive list of prognostic crop types.
[default: %(default)s]
""",
action="store_true",
dest="crop_flag",
default=False,
)
parser.add_argument(
"--potveg_flag",
help="""
Use Potential Vegetation for pft_years
[default: %(default)s]
""",
action="store_true",
dest="potveg_flag",
default=False,
)
return parser
def main():
"""
See docstring at the top.
"""
# pylint: disable=too-many-statements
args = get_parser().parse_args()
process_logging_args(args)
start_year = args.start_year
end_year = args.end_year
ssp_rcp = args.ssp_rcp
res = args.res
force_model_mesh_file = args.force_model_mesh_file
force_model_mesh_nx = args.force_model_mesh_nx
force_model_mesh_ny = args.force_model_mesh_ny
input_path = args.input_path
nocrop_flag = args.crop_flag
nosurfdata_flag = args.surfdata_flag
vic_flag = args.vic_flag
inlandwet = args.inlandwet
glc_flag = args.glc_flag
potveg = args.potveg_flag
glc_nec = args.glc_nec
hires_pft, hires_soitex = process_hires_options(args, start_year, end_year)
if force_model_mesh_file is not None:
open_mesh_file(force_model_mesh_file, force_model_mesh_nx, force_model_mesh_ny)
hostname = os.getenv("HOSTNAME")
logname = os.getenv("LOGNAME")
logger.info("hostname is %s", hostname)
logger.info("logname is %s", logname)
if ssp_rcp == "none":
check_ssp_years(start_year, end_year)
# determine pft_years - needed to parse xml file
pft_years_ssp, pft_years = determine_pft_years(start_year, end_year, potveg)
# Create land-use txt file for a transient case.
# Determine the run type and if a transient run create output landuse txt file
if end_year > start_year:
run_type = "transient"
else:
run_type = "timeslice"
logger.info("run_type = %s", run_type)
# error check on glc_nec
if (glc_nec <= 0) or (glc_nec >= 100):
raise argparse.ArgumentTypeError("ERROR: glc_nec must be between 1 and 99.")
# create attribute list for parsing xml file
attribute_list = {
"hires_pft": hires_pft,
"hires_soitex": hires_soitex,
"pft_years": pft_years,
"pft_years_ssp": pft_years_ssp,
"ssp_rcp": ssp_rcp,
"res": res,
}
# determine input rawdata
tool_path, must_run_download_input_data, rawdata_files = determine_input_rawdata(
start_year,
input_path,
attribute_list,
)
# determine output mesh
determine_output_mesh(res, force_model_mesh_file, input_path, rawdata_files, tool_path)
# Determine num_pft
if nocrop_flag:
num_pft = "16"
else:
num_pft = "78"
logger.info("num_pft is %s", num_pft)
# Write out if surface dataset will be created
if nosurfdata_flag:
logger.info("surface dataset will not be created")
else:
logger.info("surface dataset will be created")
(
landuse_fname,
fdyndat,
nlfname,
fsurdat,
fsurlog,
must_run_download_input_data,
) = get_file_paths(
args,
start_year,
end_year,
ssp_rcp,
res,
pft_years,
run_type,
rawdata_files,
num_pft,
must_run_download_input_data,
)
git_desc_cmd = f"git -C {tool_path} describe"
try:
# The "git -C" option permits a system test to run this tool from
# elsewhere while running the git command from the tool_path
gitdescribe = subprocess.check_output(git_desc_cmd, shell=True).strip()
except subprocess.CalledProcessError as error:
# In case the "git -C" option is unavailable, as on casper (2022/5/24)
# Still, this does NOT allow the system test to work on machines
# without git -C
logger.info("git -C option unavailable on casper as of 2022/7/2 %s", error)
gitdescribe = subprocess.check_output("git describe", shell=True).strip()
gitdescribe = gitdescribe.decode("utf-8")
# The below two overrides are only used for testing and validation
# it takes a long time to generate the mapping files
# from 1km to the following two resolutions since the output mesh has so few points
if res == "10x15":
mksrf_ftopostats_override = os.path.join(
input_path, "lnd", "clm2", "rawdata", "surfdata_topo_10x15_c220303.nc"
)
logger.info("will override mksrf_ftopostats with = %s", mksrf_ftopostats_override)
else:
mksrf_ftopostats_override = ""
# ----------------------------------------
# Write output namelist file
# ----------------------------------------
with open(nlfname, "w", encoding="utf-8") as nlfile:
nlfile.write("&mksurfdata_input \n")
# -------------------
# raw input data
# -------------------
must_run_download_input_data = write_nml_rawinput(
start_year,
force_model_mesh_file,
force_model_mesh_nx,
force_model_mesh_ny,
vic_flag,
rawdata_files,
landuse_fname,
mksrf_ftopostats_override,
nlfile,
must_run_download_input_data,
)
# -------------------
# output data
# -------------------
write_nml_outdata(
nosurfdata_flag,
vic_flag,
inlandwet,
glc_flag,
hostname,
logname,
num_pft,
fdyndat,
fsurdat,
fsurlog,
gitdescribe,
nlfile,
)
nlfile.write("/ \n")
if must_run_download_input_data:
temp_nlfname = "surfdata.namelist"
os.rename(nlfname, temp_nlfname)
nlfname = temp_nlfname
print(f"Successfully created input namelist file {nlfname}")
def process_hires_options(args, start_year, end_year):
"""
Process options related to hi-res
"""
if args.hres_pft:
if (start_year == 1850 and end_year == 1850) or (start_year == 2005 and end_year == 2005):
hires_pft = "on"
else:
error_msg = (
"ERROR: for --hires_pft you must set both start-year "
"and end-year to 1850 or to 2005"
)
sys.exit(error_msg)
else:
hires_pft = "off"
if args.hres_soitex:
hires_soitex = "on"
else:
hires_soitex = "off"
return hires_pft, hires_soitex
def check_ssp_years(start_year, end_year):
"""
Check years associated with SSP period
"""
if int(start_year) > 2015:
error_msg = (
"ERROR: if start-year > 2015 must add an --ssp_rcp "
"argument that is not none: valid opts for ssp-rcp "
f"are {valid_opts}"
)
sys.exit(error_msg)
elif int(end_year) > 2015:
error_msg = (
"ERROR: if end-year > 2015 must add an --ssp-rcp "
"argument that is not none: valid opts for ssp-rcp "
f"are {valid_opts}"
)
sys.exit(error_msg)
def get_file_paths(
args,
start_year,
end_year,
ssp_rcp,
res,
pft_years,
run_type,
rawdata_files,
num_pft,
must_run_download_input_data,
):
"""
Get various file paths
"""
if run_type == "transient":
landuse_fname, must_run_download_input_data = handle_transient_run(
start_year, end_year, ssp_rcp, rawdata_files, num_pft, must_run_download_input_data
)
print(f"Successfully created input landuse file {landuse_fname}")
else:
landuse_fname = ""
time_stamp = datetime.today().strftime("%y%m%d")
if ssp_rcp == "none":
if pft_years == "PtVg":
ssp_rcp_name = "PtVeg_nourb"
else:
ssp_rcp_name = "hist"
else:
ssp_rcp_name = ssp_rcp
if int(end_year) == int(start_year):
fdyndat = ""
else:
fdyndat = (
f"landuse.timeseries_{res}_{ssp_rcp_name}"
f"_{start_year}-{end_year}_{num_pft}pfts_c{time_stamp}.nc"
)
prefix = f"surfdata_{res}_{ssp_rcp_name}_{start_year}_{num_pft}pfts_c{time_stamp}."
if args.namelist_fname is None:
nlfname = f"{prefix}namelist"
else:
nlfname = args.namelist_fname
fsurdat = f"{prefix}nc"
fsurlog = f"{prefix}log"
return landuse_fname, fdyndat, nlfname, fsurdat, fsurlog, must_run_download_input_data
def determine_pft_years(start_year, end_year, potveg):
"""
determine pft_years - needed to parse xml file
"""
pft_years_ssp = "-999"
if potveg:
pft_years = "PtVg"
elif int(start_year) == 1850 and int(end_year) == 1850:
pft_years = "1850"
elif int(start_year) == 2000 and int(end_year) == 2000:
pft_years = "2000"
elif int(start_year) == 2005 and int(end_year) == 2005:
pft_years = "2005"
elif int(start_year) >= 850 and int(end_year) <= 1849:
pft_years = "0850-1849"
elif int(start_year) >= 1850 and int(start_year) <= 2100 and int(end_year) <= 2015:
pft_years = "1850-2015"
elif int(start_year) >= 1850 and int(start_year) <= 2100 and int(end_year) <= 2100:
pft_years = "1850-2015"
pft_years_ssp = "2016-2100"
elif int(start_year) >= 2016 and int(start_year) <= 2100 and int(end_year) <= 2100:
pft_years = "-999"
pft_years_ssp = "2016-2100"
else:
error_msg = (
f"ERROR: start_year is {start_year} and end_year is "
f"{end_year}; expected start/end-year options are: "
"- 1850, 2000, 2005 for time-slice options "
"- in the range from 850 to 1849 "
"- in the range from 1850 to 2100 "
"- TODO in the range from 2101 to 2300 "
"- OR user must set the potveg_flag "
)
sys.exit(error_msg)
logger.info("pft_years = %s", pft_years)
return pft_years_ssp, pft_years
def write_nml_outdata(
nosurfdata_flag,
vic_flag,
inlandwet,
glc_flag,
hostname,
logname,
num_pft,
fdyndat,
fsurdat,
fsurlog,
gitdescribe,
nlfile,
):
"""
Write output namelist file: output data
"""
# -------------------
# output data files
# -------------------
if nosurfdata_flag:
nlfile.write(" fsurdat = ' ' \n")
else:
nlfile.write(f" fsurdat = '{fsurdat}'\n")
nlfile.write(f" fsurlog = '{fsurlog}' \n")
nlfile.write(f" fdyndat = '{fdyndat}' \n")
# -------------------
# output data logicals
# -------------------
nlfile.write(f" numpft = {num_pft} \n")
nlfile.write(f" no_inlandwet = .{str(not inlandwet).lower()}. \n")
nlfile.write(f" outnc_3dglc = .{str(glc_flag).lower()}. \n")
nlfile.write(f" outnc_vic = .{str(vic_flag).lower()}. \n")
nlfile.write(" outnc_large_files = .false. \n")
nlfile.write(" outnc_double = .true. \n")
nlfile.write(f" logname = '{logname}' \n")
nlfile.write(f" hostname = '{hostname}' \n")
nlfile.write(f" gitdescribe = '{gitdescribe}' \n")
def write_nml_rawinput(
start_year,
force_model_mesh_file,
force_model_mesh_nx,
force_model_mesh_ny,
vic_flag,
rawdata_files,
landuse_fname,
mksrf_ftopostats_override,
nlfile,
must_run_download_input_data,
):
"""
Write output namelist file: raw input data
"""
# pylint: disable=too-many-statements
if force_model_mesh_file is None:
mksrf_fgrid_mesh_nx = rawdata_files["mksrf_fgrid_mesh_nx"]
mksrf_fgrid_mesh_ny = rawdata_files["mksrf_fgrid_mesh_ny"]
mksrf_fgrid_mesh = rawdata_files["mksrf_fgrid_mesh"]
else:
mksrf_fgrid_mesh_nx = force_model_mesh_nx
mksrf_fgrid_mesh_ny = force_model_mesh_ny
mksrf_fgrid_mesh = force_model_mesh_file
nlfile.write(f" mksrf_fgrid_mesh = '{mksrf_fgrid_mesh}' \n")
nlfile.write(f" mksrf_fgrid_mesh_nx = {mksrf_fgrid_mesh_nx} \n")
nlfile.write(f" mksrf_fgrid_mesh_ny = {mksrf_fgrid_mesh_ny} \n")
for key, value in rawdata_files.items():
if key == "mksrf_ftopostats" and mksrf_ftopostats_override != "":
nlfile.write(f" mksrf_ftopostats_override = '{mksrf_ftopostats_override}' \n")
elif "_fvic" not in key and "mksrf_fvegtyp" not in key and "mksrf_fgrid" not in key:
# write everything else
nlfile.write(f" {key} = '{value}' \n")
if start_year <= 2015:
mksrf_fvegtyp = rawdata_files["mksrf_fvegtyp"]
mksrf_fvegtyp_mesh = rawdata_files["mksrf_fvegtyp_mesh"]
mksrf_fhrvtyp = rawdata_files["mksrf_fvegtyp"]
mksrf_fhrvtyp_mesh = rawdata_files["mksrf_fvegtyp_mesh"]
mksrf_fpctlak = rawdata_files["mksrf_fvegtyp_lake"]
mksrf_furban = rawdata_files["mksrf_fvegtyp_urban"]
else:
mksrf_fvegtyp = rawdata_files["mksrf_fvegtyp_ssp"]
mksrf_fvegtyp_mesh = rawdata_files["mksrf_fvegtyp_ssp_mesh"]
mksrf_fhrvtyp = rawdata_files["mksrf_fvegtyp_ssp"]
mksrf_fhrvtyp_mesh = rawdata_files["mksrf_fvegtyp_ssp_mesh"]
mksrf_fpctlak = rawdata_files["mksrf_fvegtyp_ssp_lake"]
mksrf_furban = rawdata_files["mksrf_fvegtyp_ssp_urban"]
if "%y" in mksrf_fvegtyp:
mksrf_fvegtyp = mksrf_fvegtyp.replace("%y", str(start_year))
if "%y" in mksrf_fhrvtyp:
mksrf_fhrvtyp = mksrf_fhrvtyp.replace("%y", str(start_year))
if "%y" in mksrf_fpctlak:
mksrf_fpctlak = mksrf_fpctlak.replace("%y", str(start_year))
if "%y" in mksrf_furban:
mksrf_furban = mksrf_furban.replace("%y", str(start_year))
if not os.path.isfile(mksrf_fvegtyp):
print("WARNING: input mksrf_fvegtyp file " f"{mksrf_fvegtyp} does not exist")
print("WARNING: run ./download_input_data to try TO " "OBTAIN MISSING FILES")
must_run_download_input_data = True
if not os.path.isfile(mksrf_fhrvtyp):
print("WARNING: input mksrf_fhrvtyp file " f"{mksrf_fhrvtyp} does not exist")
print("WARNING: run ./download_input_data to try TO " "OBTAIN MISSING FILES")
must_run_download_input_data = True
if not os.path.isfile(mksrf_fpctlak):
print("WARNING: input mksrf_fpctlak file " f"{mksrf_fpctlak} does not exist")
print("WARNING: run ./download_input_data to try TO " "OBTAIN MISSING FILES")
must_run_download_input_data = True
if not os.path.isfile(mksrf_furban):
print("WARNING: input mksrf_furban file " f"{mksrf_furban} does not exist")
print("WARNING: run ./download_input_data to try TO " "OBTAIN MISSING FILES")
must_run_download_input_data = True
nlfile.write(f" mksrf_fvegtyp = '{mksrf_fvegtyp}' \n")
nlfile.write(f" mksrf_fvegtyp_mesh = '{mksrf_fvegtyp_mesh}' \n")
nlfile.write(f" mksrf_fhrvtyp = '{mksrf_fhrvtyp}' \n")
nlfile.write(f" mksrf_fhrvtyp_mesh = '{mksrf_fhrvtyp_mesh}' \n")
nlfile.write(f" mksrf_fpctlak = '{mksrf_fpctlak}' \n")
nlfile.write(f" mksrf_furban = '{mksrf_furban}' \n")
if vic_flag:
mksrf_fvic = rawdata_files["mksrf_fvic"]
nlfile.write(f" mksrf_fvic = '{mksrf_fvic}' \n")
mksrf_fvic_mesh = rawdata_files["mksrf_fvic_mesh"]
nlfile.write(f" mksrf_fvic_mesh = '{mksrf_fvic_mesh}' \n")
nlfile.write(f" mksrf_fdynuse = '{landuse_fname} ' \n")
return must_run_download_input_data
def handle_transient_run(
start_year, end_year, ssp_rcp, rawdata_files, num_pft, must_run_download_input_data
):
"""
Settings and printout for when run_type is "transient"
"""
if ssp_rcp == "none":
landuse_fname = f"landuse_timeseries_hist_{start_year}-{end_year}_{num_pft}pfts.txt"
else:
landuse_fname = f"landuse_timeseries_{ssp_rcp}_{start_year}-{end_year}_{num_pft}pfts.txt"
with open(landuse_fname, "w", encoding="utf-8") as landuse_file:
for year in range(start_year, end_year + 1):
year_str = str(year)
if year <= 2015:
file1 = rawdata_files["mksrf_fvegtyp"]
file2 = rawdata_files["mksrf_fvegtyp_urban"]
file3 = rawdata_files["mksrf_fvegtyp_lake"]
else:
file1 = rawdata_files["mksrf_fvegtyp_ssp"]
file2 = rawdata_files["mksrf_fvegtyp_ssp_urban"]
file3 = rawdata_files["mksrf_fvegtyp_ssp_lake"]
landuse_input_fname = file1.replace("%y", year_str)
landuse_input_fnam2 = file2.replace("%y", year_str)
landuse_input_fnam3 = file3.replace("%y", year_str)
if not os.path.isfile(landuse_input_fname):
print("WARNING: landunit_input_fname: " f"{landuse_input_fname} does not exist")
print("WARNING: run ./download_input_data to try TO " "OBTAIN MISSING FILES")
must_run_download_input_data = True
if not os.path.isfile(landuse_input_fnam2):
print("WARNING: landunit_input_fnam2: " f"{landuse_input_fnam2} does not exist")
print("WARNING: run ./download_input_data to try TO " "OBTAIN MISSING FILES")
must_run_download_input_data = True
if not os.path.isfile(landuse_input_fnam3):
print("WARNING: landunit_input_fnam3: " f"{landuse_input_fnam3} does not exist")
print("WARNING: run ./download_input_data to try TO " "OBTAIN MISSING FILES")
must_run_download_input_data = True
# -- Each line is written twice in the original perl code:
landuse_line = f"{landuse_input_fname:<196}{year_str}\n"
landuse_lin2 = f"{landuse_input_fnam2:<196}{year_str}\n"
landuse_lin3 = f"{landuse_input_fnam3:<196}{year_str}\n"
landuse_file.write(landuse_line)
landuse_file.write(landuse_line)
landuse_file.write(landuse_lin2)
landuse_file.write(landuse_lin3)
logger.debug("year : %s", year_str)
logger.debug(landuse_line)
return landuse_fname, must_run_download_input_data
def determine_output_mesh(res, force_model_mesh_file, input_path, rawdata_files, tool_path):
"""
determine output mesh
"""
xml_path = os.path.join(tool_path, "../../ccs_config/component_grids_nuopc.xml")
tree2 = ET.parse(xml_path)
root = tree2.getroot()
model_mesh = ""
for child1 in root: # this is domain tag
for _, value in child1.attrib.items():
if value == res:
for child2 in child1:
if child2.tag == "mesh":
model_mesh = child2.text
rawdata_files["mksrf_fgrid_mesh"] = os.path.join(
input_path, model_mesh.strip("$DIN_LOC_ROOT/")
)
if child2.tag == "nx":
rawdata_files["mksrf_fgrid_mesh_nx"] = child2.text
if child2.tag == "ny":
rawdata_files["mksrf_fgrid_mesh_ny"] = child2.text
if not model_mesh and force_model_mesh_file is None:
valid_grids = []
for child1 in root: # this is domain tag
for _, value in child1.attrib.items():
valid_grids.append(value)
if res in valid_grids:
error_msg = (
"ERROR: You have requested a valid grid for which "
"../../ccs_config/component_grids_nuopc.xml does not include a mesh "
"file. For a regular regional or 1x1 grid, you may generate the "
"fsurdat file using the subset_data tool instead. Alternatively "
"and definitely for curvilinear grids, you may generate "
"a mesh file using the workflow currently (2022/7) described in "
"https://github.com/ESCOMP/CTSM/issues/1773#issuecomment-1163432584"
"TODO Reminder to ultimately place these workflow instructions in "
"the User's Guide."
)
sys.exit(error_msg)
else:
error_msg = f"ERROR: invalid input res {res}; " f"valid grid values are {valid_grids}"
sys.exit(error_msg)
def determine_input_rawdata(start_year, input_path, attribute_list):
"""
determine input rawdata
"""
# pylint: disable=too-many-statements
# create dictionary for raw data files names
rawdata_files = {}
must_run_download_input_data = False
tool_path = os.path.join(path_to_ctsm_root(), "tools", "mksurfdata_esmf")
xml_path = os.path.join(tool_path, "gen_mksurfdata_namelist.xml")
tree1 = ET.parse(xml_path)
root = tree1.getroot()
logger.info("root.tag: %s", root.tag)
logger.info("root.attrib: %s", root.attrib)
for child1 in root:
max_match_num = -1
max_match_child = None
for child2 in child1:
if child2.tag == "entry":
num_match = 0
for attrib in attribute_list:
# Get the value of the attrib for the entry
childval = child2.get(attrib, default=None)
if childval == attribute_list[attrib]:
num_match += 1
elif childval is not None:
num_match = -1
break
if num_match > max_match_num:
max_match_num = num_match
max_match_child = child2
if max_match_child is None:
# TODO slevis: Are these if-statements backwards?
# For years greater than 2015 - mksrf_fvegtyp_ssp must have a match
if start_year <= 2015:
if "mksrf_fvegtyp_ssp" not in child1.tag:
error_msg = f"ERROR: {child1.tag} has no matches"
sys.exit(error_msg)
else:
continue
else:
# For years less than 2015 - mksrf_fvegtyp must have a match
if "mksrf_fvegtyp" not in child1.tag:
error_msg = f"ERROR: {child1.tag} has no matches"
sys.exit(error_msg)
else:
continue
for item in max_match_child:
if item.tag == "data_filename":
rawdata_files[child1.tag] = os.path.join(input_path, item.text)
if "%y" not in rawdata_files[child1.tag]:
if not os.path.isfile(rawdata_files[child1.tag]):
print(
"WARNING: input data file "
f"{rawdata_files[child1.tag]} for {child1.tag} "
"does not exist"
)
print(
"WARNING: run ./download_input_data to try TO " "OBTAIN MISSING FILES"
)
must_run_download_input_data = True
if item.tag == "mesh_filename":
new_key = f"{child1.tag}_mesh"
rawdata_files[new_key] = os.path.join(input_path, item.text)
if not os.path.isfile(rawdata_files[new_key]):
print("WARNING: input mesh file " f"{rawdata_files[new_key]} does not exist")
print("WARNING: run ./download_input_data to try TO " "OBTAIN MISSING FILES")
must_run_download_input_data = True
if item.tag == "lake_filename":
new_key = f"{child1.tag}_lake"
rawdata_files[new_key] = os.path.join(input_path, item.text)
if item.tag == "urban_filename":
new_key = f"{child1.tag}_urban"
rawdata_files[new_key] = os.path.join(input_path, item.text)
if item.tag == "lookup_filename":
new_key = f"{child1.tag}_lookup"
rawdata_files[new_key] = os.path.join(input_path, item.text)
return tool_path, must_run_download_input_data, rawdata_files
def open_mesh_file(force_model_mesh_file, force_model_mesh_nx, force_model_mesh_ny):
"""
open mesh_file to read element_count and, if available, orig_grid_dims
"""
# pylint: disable=no-name-in-module,no-member
# The above "pylint: disable" is because pylint complains that netCDF4
# has no member Dataset, even though it does.
mesh_file = netCDF4.Dataset(force_model_mesh_file, "r")
element_count = mesh_file.dimensions["elementCount"].size
if "origGridDims" in mesh_file.variables:
orig_grid_dims = mesh_file.variables["origGridDims"]
if (
int(force_model_mesh_nx) == orig_grid_dims[0]
and int(force_model_mesh_ny) == orig_grid_dims[1]
):
mesh_file.close()
else:
error_msg = (
"ERROR: Found variable origGridDims in "
f"{force_model_mesh_file} with values "
f"{orig_grid_dims[:]} that do not agree with the "
"user-entered mesh_nx and mesh_ny values of "
f"{[force_model_mesh_nx, force_model_mesh_ny]}."
)
sys.exit(error_msg)
elif force_model_mesh_nx is None or force_model_mesh_ny is None:
error_msg = (
"ERROR: You set --model-mesh so you MUST ALSO "
"SET --model-mesh-nx AND --model-mesh-ny"
)
sys.exit(error_msg)
# using force_model_mesh_nx and force_model_mesh_ny either from the
# mesh file (see previous if statement) or the user-entered values
if element_count != int(force_model_mesh_nx) * int(force_model_mesh_ny):
error_msg = (
"ERROR: The product of "
"--model-mesh-nx x --model-mesh-ny must equal "
"exactly elementCount in --model-mesh"
)
sys.exit(error_msg)