2024-05-09 15:14:01 +08:00

244 lines
8.0 KiB
Python

#! /usr/bin/env python3
"""
|------------------------------------------------------------------|
|--------------------- Instructions -----------------------------|
|------------------------------------------------------------------|
This is a wrapper script for running CTSM simulation for one or more
neon sites.
This script is only for neon site and we will develop a more general
code later.
This script first creates and builds a generic base case.
Next, it will clone the base_case for different neon sites and run
types to reduce the need to build ctsm everytime.
This script will do the following:
1) Create a generic base case for cloning.
2) Make the case for the specific neon site(s).
3) Make changes to the case, for:
a. AD spinup
b. post-AD spinup
c. transient
#---------------
d. SASU or Matrix spinup
4) Build and submit the case.
-------------------------------------------------------------------
Instructions for running using conda python environments:
../../py_env_create
conda activate ctsm_py
-------------------------------------------------------------------
To see the available options:
./run_neon.py --help
-------------------------------------------------------------------
"""
# TODO (NS)
# - [ ]
# - [ ] Case dependency and the ability to check case status
# - [ ] If Case dependency works we don't need finidat given explicilty for post-ad and transient.
# - [ ] checkout_externals instead of using env varaiable
# - [ ] wget the fields available and run for those available
# - [ ] Matrix spin-up if (SASU) Eric merged it in
# - [ ] Make sure both AD and SASU are not on at the same time
# - [ ] Make sure CIME and other dependencies are checked out.
# Import libraries
import glob
import logging
import os
import sys
import pandas as pd
# Get the ctsm util tools and then the cime tools.
_CTSM_PYTHON = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "python"))
sys.path.insert(1, _CTSM_PYTHON)
# pylint: disable=wrong-import-position
from ctsm.path_utils import path_to_ctsm_root
from ctsm.download_utils import download_file
from ctsm.site_and_regional.neon_arg_parse import get_parser
from ctsm.site_and_regional.neon_site import NeonSite
# pylint: disable=import-error, wildcard-import, wrong-import-order
from standard_script_setup import *
logger = logging.getLogger(__name__)
def check_neon_listing(valid_neon_sites):
"""
A function to download and parse neon listing file.
"""
listing_file = "listing.csv"
url = "https://storage.neonscience.org/neon-ncar/listing.csv"
download_file(url, listing_file)
available_list = parse_neon_listing(listing_file, valid_neon_sites)
return available_list
def parse_neon_listing(listing_file, valid_neon_sites):
"""
A function to parse neon listing file
and find neon sites with the dates
where data is available.
Args:
listing_file (str): downloaded listing file
Returns:
available_list :
list of neon_site objects that is found
on the downloaded listing file.
"""
# pd.set_option("display.max_rows", None, "display.max_columns", None)
available_list = []
listing_df = pd.read_csv(listing_file)
# check for finidat files for transient run
finidatlist = listing_df[listing_df["object"].str.contains("lnd/ctsm")]
# -- filter lines with atm/cdep
listing_df = listing_df[listing_df["object"].str.contains("atm/cdeps/")]
# -- split the object str to extract site name
listing_df = listing_df["object"].str.split("/", expand=True)
# -- groupby site name
grouped_df = listing_df.groupby(8)
for key, _ in grouped_df:
# -- check if it is a valid neon site
if any(key in x for x in valid_neon_sites):
site_name = key
tmp_df = grouped_df.get_group(key)
# -- filter files only ending with YYYY-MM.nc
tmp_df = tmp_df[tmp_df[9].str.contains(r"\d\d\d\d-\d\d.nc")]
# -- find all the data versions
# versions = tmp_df[7].unique()
# print ("all versions available for ", site_name,":", *versions)
latest_version = tmp_df[7].iloc[-1]
# print ("latests version available for ", site_name,":", latest_version)
tmp_df = tmp_df[tmp_df[7].str.contains(latest_version)]
# -- remove .nc from the file names
tmp_df[9] = tmp_df[9].str.replace(".nc", "", regex=False)
tmp_df2 = tmp_df[9].str.split("-", expand=True)
# ignore any prefix in file name and just get year
tmp_df2[0] = tmp_df2[0].str.slice(-4)
# -- figure out start_year and end_year
start_year = tmp_df2[0].iloc[0]
end_year = tmp_df2[0].iloc[-1]
# -- figure out start_month and end_month
start_month = tmp_df2[1].iloc[0]
end_month = tmp_df2[1].iloc[-1]
logger.debug("Valid neon site %s found!", site_name)
logger.debug("File version %s", latest_version)
logger.debug("start_year=%s", start_year)
logger.debug("end_year=%s", end_year)
logger.debug("start_month=%s", start_month)
logger.debug("end_month=%s", end_month)
finidat = None
for line in finidatlist["object"]:
if site_name in line:
finidat = line.split(",")[0].split("/")[-1]
neon_site = NeonSite(site_name, start_year, end_year, start_month, end_month, finidat)
logger.debug(neon_site)
available_list.append(neon_site)
return available_list
def main(description):
"""
Determine valid neon sites. Make an output directory if it does not exist.
Loop through requested sites and run CTSM at that site.
"""
cesmroot = path_to_ctsm_root()
# Get the list of supported neon sites from usermods
# The [!Fd]* portion means that we won't retrieve cases that start with:
# F (FATES) or d (default). We should be aware of adding cases that start with these.
valid_neon_sites = glob.glob(
os.path.join(cesmroot, "cime_config", "usermods_dirs", "NEON", "[!Fd]*")
)
valid_neon_sites = sorted([v.split("/")[-1] for v in valid_neon_sites])
(
site_list,
output_root,
run_type,
experiment,
prism,
overwrite,
run_length,
base_case_root,
run_from_postad,
setup_only,
no_batch,
rerun,
user_version,
) = get_parser(sys.argv, description, valid_neon_sites)
if output_root:
logger.debug("output_root : %s", output_root)
if not os.path.exists(output_root):
os.makedirs(output_root)
# -- check neon listing file for available data:
available_list = check_neon_listing(valid_neon_sites)
# =================================
# -- all neon sites can be cloned from one generic case
# -- so no need to define a base_case for every site.
res = "CLM_USRDAT"
if run_type == "transient":
compset = "IHist1PtClm60Bgc"
else:
compset = "I1PtClm60Bgc"
# -- Looping over neon sites
for neon_site in available_list:
if neon_site.name in site_list:
if run_from_postad:
neon_site.finidat = None
if not base_case_root:
user_mods_dirs = None
base_case_root = neon_site.build_base_case(
cesmroot, output_root, res, compset, user_mods_dirs, overwrite, setup_only
)
logger.info("-----------------------------------")
logger.info("Running CTSM for neon site : %s", neon_site.name)
neon_site.run_case(
base_case_root,
run_type,
prism,
run_length,
user_version,
overwrite=overwrite,
setup_only=setup_only,
no_batch=no_batch,
rerun=rerun,
experiment=experiment,
)