#! /usr/bin/env python3

"""
|------------------------------------------------------------------|
|---------------------  Instructions  -----------------------------|
|------------------------------------------------------------------|
This is a wrapper script for running CTSM simulation for one or more
neon sites.

This script is only for neon site and we will develop a more general
code later.

This script first creates and builds a generic base case.
Next, it will clone the base_case for different neon sites and run
types to reduce the need to build ctsm everytime.

This script will do the following:
    1) Create a generic base case for cloning.
    2) Make the case for the specific neon site(s).
    3) Make changes to the case, for:
        a. AD spinup
	b. post-AD spinup
        c. transient
    	#---------------
    	d. SASU or Matrix spinup
    4) Build and submit the case.

-------------------------------------------------------------------
Instructions for running using conda python environments:

../../py_env_create
conda activate ctsm_py

-------------------------------------------------------------------
To see the available options:
    ./run_neon.py --help
-------------------------------------------------------------------
"""
# TODO (NS)
# - [ ]
# - [ ] Case dependency and the ability to check case status
# - [ ] If Case dependency works we don't need finidat given explicilty for post-ad and transient.

# - [ ] checkout_externals instead of using env varaiable
# - [ ] wget the fields available and run for those available

# - [ ] Matrix spin-up if (SASU) Eric merged it in
# - [ ] Make sure both AD and SASU are not on at the same time

# - [ ] Make sure CIME and other dependencies are checked out.


# Import libraries
import glob
import logging
import os
import sys
import pandas as pd

# Get the ctsm util tools and then the cime tools.
_CTSM_PYTHON = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "python"))
sys.path.insert(1, _CTSM_PYTHON)

# pylint: disable=wrong-import-position
from ctsm.path_utils import path_to_ctsm_root
from ctsm.download_utils import download_file
from ctsm.site_and_regional.neon_arg_parse import get_parser
from ctsm.site_and_regional.neon_site import NeonSite

# pylint: disable=import-error, wildcard-import, wrong-import-order
from standard_script_setup import *

logger = logging.getLogger(__name__)


def check_neon_listing(valid_neon_sites):
    """
    A function to download and parse neon listing file.
    """
    listing_file = "listing.csv"
    url = "https://storage.neonscience.org/neon-ncar/listing.csv"

    download_file(url, listing_file)
    available_list = parse_neon_listing(listing_file, valid_neon_sites)
    return available_list


def parse_neon_listing(listing_file, valid_neon_sites):
    """
    A function to parse neon listing file
    and find neon sites with the dates
    where data is available.

    Args:
        listing_file (str): downloaded listing file

    Returns:
        available_list :
            list of neon_site objects that is found
            on the downloaded listing file.
    """

    # pd.set_option("display.max_rows", None, "display.max_columns", None)

    available_list = []

    listing_df = pd.read_csv(listing_file)

    # check for finidat files for transient run
    finidatlist = listing_df[listing_df["object"].str.contains("lnd/ctsm")]

    # -- filter lines with atm/cdep
    listing_df = listing_df[listing_df["object"].str.contains("atm/cdeps/")]

    # -- split the object str to extract site name
    listing_df = listing_df["object"].str.split("/", expand=True)

    # -- groupby site name
    grouped_df = listing_df.groupby(8)
    for key, _ in grouped_df:
        # -- check if it is a valid neon site
        if any(key in x for x in valid_neon_sites):
            site_name = key
            tmp_df = grouped_df.get_group(key)

            # -- filter files only ending with YYYY-MM.nc
            tmp_df = tmp_df[tmp_df[9].str.contains(r"\d\d\d\d-\d\d.nc")]

            # -- find all the data versions
            # versions = tmp_df[7].unique()
            # print ("all versions available for ", site_name,":", *versions)
            latest_version = tmp_df[7].iloc[-1]
            # print ("latests version available for ", site_name,":", latest_version)

            tmp_df = tmp_df[tmp_df[7].str.contains(latest_version)]
            # -- remove .nc from the file names
            tmp_df[9] = tmp_df[9].str.replace(".nc", "", regex=False)

            tmp_df2 = tmp_df[9].str.split("-", expand=True)

            # ignore any prefix in file name and just get year
            tmp_df2[0] = tmp_df2[0].str.slice(-4)

            # -- figure out start_year and end_year
            start_year = tmp_df2[0].iloc[0]
            end_year = tmp_df2[0].iloc[-1]

            # -- figure out start_month and end_month
            start_month = tmp_df2[1].iloc[0]
            end_month = tmp_df2[1].iloc[-1]

            logger.debug("Valid neon site %s found!", site_name)
            logger.debug("File version %s", latest_version)
            logger.debug("start_year=%s", start_year)
            logger.debug("end_year=%s", end_year)
            logger.debug("start_month=%s", start_month)
            logger.debug("end_month=%s", end_month)
            finidat = None
            for line in finidatlist["object"]:
                if site_name in line:
                    finidat = line.split(",")[0].split("/")[-1]

            neon_site = NeonSite(site_name, start_year, end_year, start_month, end_month, finidat)
            logger.debug(neon_site)
            available_list.append(neon_site)

    return available_list


def main(description):
    """
    Determine valid neon sites. Make an output directory if it does not exist.
    Loop through requested sites and run CTSM at that site.
    """
    cesmroot = path_to_ctsm_root()
    # Get the list of supported neon sites from usermods
    # The [!Fd]* portion means that we won't retrieve cases that start with:
    # F (FATES) or d (default). We should be aware of adding cases that start with these.
    valid_neon_sites = glob.glob(
        os.path.join(cesmroot, "cime_config", "usermods_dirs", "NEON", "[!Fd]*")
    )
    valid_neon_sites = sorted([v.split("/")[-1] for v in valid_neon_sites])

    (
        site_list,
        output_root,
        run_type,
        experiment,
        prism,
        overwrite,
        run_length,
        base_case_root,
        run_from_postad,
        setup_only,
        no_batch,
        rerun,
        user_version,
    ) = get_parser(sys.argv, description, valid_neon_sites)

    if output_root:
        logger.debug("output_root : %s", output_root)
        if not os.path.exists(output_root):
            os.makedirs(output_root)

    # -- check neon listing file for available data:
    available_list = check_neon_listing(valid_neon_sites)

    # =================================
    # -- all neon sites can be cloned from one generic case
    # -- so no need to define a base_case for every site.

    res = "CLM_USRDAT"
    if run_type == "transient":
        compset = "IHist1PtClm60Bgc"
    else:
        compset = "I1PtClm60Bgc"

    # --  Looping over neon sites

    for neon_site in available_list:
        if neon_site.name in site_list:
            if run_from_postad:
                neon_site.finidat = None
            if not base_case_root:
                user_mods_dirs = None
                base_case_root = neon_site.build_base_case(
                    cesmroot, output_root, res, compset, user_mods_dirs, overwrite, setup_only
                )
            logger.info("-----------------------------------")
            logger.info("Running CTSM for neon site : %s", neon_site.name)

            neon_site.run_case(
                base_case_root,
                run_type,
                prism,
                run_length,
                user_version,
                overwrite=overwrite,
                setup_only=setup_only,
                no_batch=no_batch,
                rerun=rerun,
                experiment=experiment,
            )