244 lines
8.0 KiB
Python
244 lines
8.0 KiB
Python
#! /usr/bin/env python3
|
|
|
|
"""
|
|
|------------------------------------------------------------------|
|
|
|--------------------- Instructions -----------------------------|
|
|
|------------------------------------------------------------------|
|
|
This is a wrapper script for running CTSM simulation for one or more
|
|
neon sites.
|
|
|
|
This script is only for neon site and we will develop a more general
|
|
code later.
|
|
|
|
This script first creates and builds a generic base case.
|
|
Next, it will clone the base_case for different neon sites and run
|
|
types to reduce the need to build ctsm everytime.
|
|
|
|
This script will do the following:
|
|
1) Create a generic base case for cloning.
|
|
2) Make the case for the specific neon site(s).
|
|
3) Make changes to the case, for:
|
|
a. AD spinup
|
|
b. post-AD spinup
|
|
c. transient
|
|
#---------------
|
|
d. SASU or Matrix spinup
|
|
4) Build and submit the case.
|
|
|
|
-------------------------------------------------------------------
|
|
Instructions for running using conda python environments:
|
|
|
|
../../py_env_create
|
|
conda activate ctsm_py
|
|
|
|
-------------------------------------------------------------------
|
|
To see the available options:
|
|
./run_neon.py --help
|
|
-------------------------------------------------------------------
|
|
"""
|
|
# TODO (NS)
|
|
# - [ ]
|
|
# - [ ] Case dependency and the ability to check case status
|
|
# - [ ] If Case dependency works we don't need finidat given explicilty for post-ad and transient.
|
|
|
|
# - [ ] checkout_externals instead of using env varaiable
|
|
# - [ ] wget the fields available and run for those available
|
|
|
|
# - [ ] Matrix spin-up if (SASU) Eric merged it in
|
|
# - [ ] Make sure both AD and SASU are not on at the same time
|
|
|
|
# - [ ] Make sure CIME and other dependencies are checked out.
|
|
|
|
|
|
# Import libraries
|
|
import glob
|
|
import logging
|
|
import os
|
|
import sys
|
|
import pandas as pd
|
|
|
|
# Get the ctsm util tools and then the cime tools.
|
|
_CTSM_PYTHON = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "python"))
|
|
sys.path.insert(1, _CTSM_PYTHON)
|
|
|
|
# pylint: disable=wrong-import-position
|
|
from ctsm.path_utils import path_to_ctsm_root
|
|
from ctsm.download_utils import download_file
|
|
from ctsm.site_and_regional.neon_arg_parse import get_parser
|
|
from ctsm.site_and_regional.neon_site import NeonSite
|
|
|
|
# pylint: disable=import-error, wildcard-import, wrong-import-order
|
|
from standard_script_setup import *
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def check_neon_listing(valid_neon_sites):
|
|
"""
|
|
A function to download and parse neon listing file.
|
|
"""
|
|
listing_file = "listing.csv"
|
|
url = "https://storage.neonscience.org/neon-ncar/listing.csv"
|
|
|
|
download_file(url, listing_file)
|
|
available_list = parse_neon_listing(listing_file, valid_neon_sites)
|
|
return available_list
|
|
|
|
|
|
def parse_neon_listing(listing_file, valid_neon_sites):
|
|
"""
|
|
A function to parse neon listing file
|
|
and find neon sites with the dates
|
|
where data is available.
|
|
|
|
Args:
|
|
listing_file (str): downloaded listing file
|
|
|
|
Returns:
|
|
available_list :
|
|
list of neon_site objects that is found
|
|
on the downloaded listing file.
|
|
"""
|
|
|
|
# pd.set_option("display.max_rows", None, "display.max_columns", None)
|
|
|
|
available_list = []
|
|
|
|
listing_df = pd.read_csv(listing_file)
|
|
|
|
# check for finidat files for transient run
|
|
finidatlist = listing_df[listing_df["object"].str.contains("lnd/ctsm")]
|
|
|
|
# -- filter lines with atm/cdep
|
|
listing_df = listing_df[listing_df["object"].str.contains("atm/cdeps/")]
|
|
|
|
# -- split the object str to extract site name
|
|
listing_df = listing_df["object"].str.split("/", expand=True)
|
|
|
|
# -- groupby site name
|
|
grouped_df = listing_df.groupby(8)
|
|
for key, _ in grouped_df:
|
|
# -- check if it is a valid neon site
|
|
if any(key in x for x in valid_neon_sites):
|
|
site_name = key
|
|
tmp_df = grouped_df.get_group(key)
|
|
|
|
# -- filter files only ending with YYYY-MM.nc
|
|
tmp_df = tmp_df[tmp_df[9].str.contains(r"\d\d\d\d-\d\d.nc")]
|
|
|
|
# -- find all the data versions
|
|
# versions = tmp_df[7].unique()
|
|
# print ("all versions available for ", site_name,":", *versions)
|
|
latest_version = tmp_df[7].iloc[-1]
|
|
# print ("latests version available for ", site_name,":", latest_version)
|
|
|
|
tmp_df = tmp_df[tmp_df[7].str.contains(latest_version)]
|
|
# -- remove .nc from the file names
|
|
tmp_df[9] = tmp_df[9].str.replace(".nc", "", regex=False)
|
|
|
|
tmp_df2 = tmp_df[9].str.split("-", expand=True)
|
|
|
|
# ignore any prefix in file name and just get year
|
|
tmp_df2[0] = tmp_df2[0].str.slice(-4)
|
|
|
|
# -- figure out start_year and end_year
|
|
start_year = tmp_df2[0].iloc[0]
|
|
end_year = tmp_df2[0].iloc[-1]
|
|
|
|
# -- figure out start_month and end_month
|
|
start_month = tmp_df2[1].iloc[0]
|
|
end_month = tmp_df2[1].iloc[-1]
|
|
|
|
logger.debug("Valid neon site %s found!", site_name)
|
|
logger.debug("File version %s", latest_version)
|
|
logger.debug("start_year=%s", start_year)
|
|
logger.debug("end_year=%s", end_year)
|
|
logger.debug("start_month=%s", start_month)
|
|
logger.debug("end_month=%s", end_month)
|
|
finidat = None
|
|
for line in finidatlist["object"]:
|
|
if site_name in line:
|
|
finidat = line.split(",")[0].split("/")[-1]
|
|
|
|
neon_site = NeonSite(site_name, start_year, end_year, start_month, end_month, finidat)
|
|
logger.debug(neon_site)
|
|
available_list.append(neon_site)
|
|
|
|
return available_list
|
|
|
|
|
|
def main(description):
|
|
"""
|
|
Determine valid neon sites. Make an output directory if it does not exist.
|
|
Loop through requested sites and run CTSM at that site.
|
|
"""
|
|
cesmroot = path_to_ctsm_root()
|
|
# Get the list of supported neon sites from usermods
|
|
# The [!Fd]* portion means that we won't retrieve cases that start with:
|
|
# F (FATES) or d (default). We should be aware of adding cases that start with these.
|
|
valid_neon_sites = glob.glob(
|
|
os.path.join(cesmroot, "cime_config", "usermods_dirs", "NEON", "[!Fd]*")
|
|
)
|
|
valid_neon_sites = sorted([v.split("/")[-1] for v in valid_neon_sites])
|
|
|
|
(
|
|
site_list,
|
|
output_root,
|
|
run_type,
|
|
experiment,
|
|
prism,
|
|
overwrite,
|
|
run_length,
|
|
base_case_root,
|
|
run_from_postad,
|
|
setup_only,
|
|
no_batch,
|
|
rerun,
|
|
user_version,
|
|
) = get_parser(sys.argv, description, valid_neon_sites)
|
|
|
|
if output_root:
|
|
logger.debug("output_root : %s", output_root)
|
|
if not os.path.exists(output_root):
|
|
os.makedirs(output_root)
|
|
|
|
# -- check neon listing file for available data:
|
|
available_list = check_neon_listing(valid_neon_sites)
|
|
|
|
# =================================
|
|
# -- all neon sites can be cloned from one generic case
|
|
# -- so no need to define a base_case for every site.
|
|
|
|
res = "CLM_USRDAT"
|
|
if run_type == "transient":
|
|
compset = "IHist1PtClm60Bgc"
|
|
else:
|
|
compset = "I1PtClm60Bgc"
|
|
|
|
# -- Looping over neon sites
|
|
|
|
for neon_site in available_list:
|
|
if neon_site.name in site_list:
|
|
if run_from_postad:
|
|
neon_site.finidat = None
|
|
if not base_case_root:
|
|
user_mods_dirs = None
|
|
base_case_root = neon_site.build_base_case(
|
|
cesmroot, output_root, res, compset, user_mods_dirs, overwrite, setup_only
|
|
)
|
|
logger.info("-----------------------------------")
|
|
logger.info("Running CTSM for neon site : %s", neon_site.name)
|
|
|
|
neon_site.run_case(
|
|
base_case_root,
|
|
run_type,
|
|
prism,
|
|
run_length,
|
|
user_version,
|
|
overwrite=overwrite,
|
|
setup_only=setup_only,
|
|
no_batch=no_batch,
|
|
rerun=rerun,
|
|
experiment=experiment,
|
|
)
|