#GUI_WeatherPrep.py created by Jay Heppler (Davey Tree) with assist from Li Zhang and Ted Endreny (SUNY ESF)
#Code provides a GUI to orchestrate key steps in i-Tree Tools HydroPlus weather retrieval and processing:
#   0. Code designed to work with legacy NOAA NCEI ISD (Integrated Surface Dataset) and active GHCNh (Global Historical Climatology Network hourly) data
#   1. Find NOAA NCEI weather stations based on country, state, county, place and proximity search of database latitude and longitude
#   2. Use station associated 5-digit WBAN or 6-digit WMO or USAF to generate GHCN ID or find ISD
#   2a. GHCN ID for sites with WBAN codes: #<COUNTRY CODE>W000<WBAN>, For Naples, Italy with WBAN = 34113, ITW00034113
#   2b. GHCN ID for for sites without WBAN codes but with WMO or USAF codes: #<COUNTRY CODE>0000<WMO>, For Cagliari, Italy with WMO 16560, GIT000016560
#   3. Retrieve NOAA NCEI weather data from GHCNh via HTTPS or from ISD via FTP
#   4. Convert NOAA NCEI weather data from GHCNh psv to csv or from ISD gunzip to txt with ishapp2.exe
#   5. Process NOAA NCEI weather data to data formatted for HydroPlus tools

# initUI(...) – builds the tabs, year fields, data source combo, etc.
# process_data(...) – should only read values from those widgets and start the Worker.
# Worker(params) – takes a dict of parameters and runs the pipelines.

import os
import sys
import re
import gzip
import ftplib
import subprocess
import shutil
import pandas as pd
import urllib.request  
import warnings
import numpy as np
import xarray as xr
import cdsapi
import zipfile
import calendar
from typing import Optional
from datetime import datetime
from pathlib import Path
from shutil import copy2
from lxml import etree as ET
from datetime import datetime
from PyQt5 import QtWidgets, QtCore, QtGui
from PyQt5.QtCore import QObject, pyqtSignal, QThread, Qt
from PyQt5.QtGui import QPixmap, QIcon, QPainter, QColor, QFont
from PyQt5.QtWidgets import QSplashScreen, QApplication, QTabWidget, QFileDialog
from subprocess import STARTUPINFO, STARTF_USESHOWWINDOW, SW_HIDE

warnings.filterwarnings("ignore", category=pd.errors.DtypeWarning)

#Indexing to handle nested XML elements
_INDEX_RE = re.compile(r"\[\d+\]")

# --- Determine base directory depending on how we're running ---
if getattr(sys, "frozen", False):
    # Running as a PyInstaller-built exe
    SCRIPT_DIR = Path(sys._MEIPASS)
else:
    # Running as a normal .py script
    SCRIPT_DIR = Path(__file__).resolve().parent

# Folder that holds this GUI script's resources
RESOURCES_DIR = SCRIPT_DIR / "resources"

# If you still want a "root" one level up (for other tools, etc.)
ROOT_DIR = SCRIPT_DIR.parent

# Optional: set working directory to ROOT_DIR if other relative paths expect that
os.chdir(ROOT_DIR)

# ===== ERA5 helper utilities (adapted from p_ERA5_nc_to_UrbanTC_csv.V3.py) =====
def era5_get_lat_lon_names(ds):
    lat_candidates = ["latitude", "lat_dd", "y"]
    lon_candidates = ["longitude", "lon_dd", "x"]
    lat_dd = next((n for n in lat_candidates if n in ds.coords or n in ds.dims), None)
    lon_dd = next((n for n in lon_candidates if n in ds.coords or n in ds.dims), None)
    if lat_dd is None or lon_dd is None:
        for name, da in ds.coords.items():
            if getattr(da, "standard_name", "") == "latitude":
                lat_dd = name
            if getattr(da, "standard_name", "") == "longitude":
                lon_dd = name
    if lat_dd is None or lon_dd is None:
        raise ValueError("Could not detect lat_dd/lon_dd names for ERA5.")
    return lat_dd, lon_dd

def era5_sel_point(ds, lat_name, lon_name, lat_pt, lon_pt):
    lons = ds[lon_name]
    lon_sel = lon_pt % 360.0 if float(lons.max()) > 180.0 else lon_pt
    return ds.sel({lat_name: lat_pt, lon_name: lon_sel}, method="nearest")

def era5_ensure_time(ds):
    if ds is None:
        return None

    # Already good
    if "time" in ds.coords:
        return ds

    # Common ERA5 case
    if "valid_time" in ds.coords:
        return ds.rename({"valid_time": "time"})

    # Sometimes valid_time exists as a dimension only
    if "valid_time" in ds.dims:
        return ds.rename({"valid_time": "time"})

    # Generic fallback: look for any dim containing "time"
    for d in ds.dims:
        if "time" in d.lower():
            return ds.rename({d: "time"})

    # Give up gracefully
    return ds

def era5_drop_singleton_dims(ds):
    if ds is None:
        return ds
    drop_dims = [d for d, n in ds.sizes.items() if n == 1 and d != "time"]
    if drop_dims:
        ds = ds.isel({d: 0 for d in drop_dims}, drop=True)
    if "expver" in ds.dims and ds.sizes["expver"] > 1:
        ds = ds.isel(expver=-1, drop=True)
    return ds

def era5_find_variable(ds, name: str, debug_label: str):
    if name not in ds:
        raise KeyError(
            f"ERA5 variable '{name}' not found for {debug_label}. "
            f"Available: {list(ds.data_vars)}"
        )
    return ds[name]

def era5_find_var_robust(ds, prefer_exact, longname_contains=None, stdname_contains=None, debug_label=""):
    if ds is None:
        return None, None
    # exact
    for name in prefer_exact:
        if name in ds:
            return ds[name], name
    # long_name
    if longname_contains:
        for v in ds.data_vars:
            lng = (getattr(ds[v], "long_name", "") or "").lower()
            if all(term.lower() in lng for term in longname_contains):
                return ds[v], v
    # standard_name
    if stdname_contains:
        for v in ds.data_vars:
            std = (getattr(ds[v], "standard_name", "") or "").lower()
            if all(term.lower() in std for term in stdname_contains):
                return ds[v], v
    return None, None


def format_lat_lon(lat_dd: float, lon_dd: float) -> str:
    ns = "N" if lat_dd >= 0 else "S"
    ew = "E" if lon_dd >= 0 else "W"
    return f"{ns}{abs(lat_dd):06.3f}_{ew}{abs(lon_dd):07.3f}"
    # e.g. N040.123_W075.456

#def to manage copying comments between XML files
def _indexless_xpath(tree, elem):
    return _INDEX_RE.sub("", tree.getpath(elem))

def _element_keys(elem, tree):
    # Prefer stable id/name if present:
    for attr in ("id", "name"):
        if elem.get(attr) is not None:
            return (f"{elem.tag}|{attr}={elem.get(attr)}",)
    # Fallback: exact & relaxed absolute XPaths
    exact = tree.getpath(elem)
    relaxed = _indexless_xpath(tree, elem)
    return (exact,) if exact == relaxed else (exact, relaxed)

def _collect_adjacent_comments(parent, child):
    before, after = [], []
    kids = list(parent)
    try:
        i = kids.index(child)
    except ValueError:
        return before, after

    j = i - 1
    while j >= 0 and isinstance(kids[j], ET._Comment):
        txt = (kids[j].text or "").strip()
        if txt:
            before.append(txt)
        j -= 1
    before.reverse()

    j = i + 1
    while j < len(kids) and isinstance(kids[j], ET._Comment):
        txt = (kids[j].text or "").strip()
        if txt:
            after.append(txt)
        j += 1
    return before, after

def _build_comment_index(src_tree):
    idx = {}
    for e in src_tree.iter():
        if not isinstance(e.tag, str):   # skip comments/PIs
            continue
        p = e.getparent()
        if p is None:
            continue
        b, a = _collect_adjacent_comments(p, e)
        if not b and not a:
            continue
        for k in _element_keys(e, src_tree):
            if k not in idx:
                idx[k] = {"before": b, "after": a}
    return idx

def _indent_for(node):
    depth = 0
    p = node.getparent()
    while p is not None:
        depth += 1
        p = p.getparent()
    return "\n" + ("  " * depth)

def _ensure_comments(parent, child, before_comments, after_comments, style="inline_singleline"):
    # existing_before/after as you have now
    existing_before, existing_after = _collect_adjacent_comments(parent, child)

    # ---------- BEFORE (unchanged from your current code) ----------
    insert_pos = list(parent).index(child)
    leading_ws = child.tail if (child.tail is not None and child.tail.strip() == "") else _indent_for(child)
    for txt in before_comments:
        if txt in existing_before:
            continue
        c = ET.Comment(txt)
        c.tail = leading_ws
        parent.insert(insert_pos, c)
        insert_pos += 1
        existing_before.append(txt)

    # ---------- AFTER (new modes) ----------
    kids = list(parent)
    insert_pos = kids.index(child) + 1

    if style == "inline":
        # glue comment directly after </child>, then keep next element on its own line
        for txt in after_comments:
            if txt in existing_after:
                continue
            original_tail = child.tail or _indent_for(child)
            child.tail = ""     # no whitespace between </child> and <!-- ... -->
            c = ET.Comment(txt)
            c.tail = original_tail
            parent.insert(insert_pos, c)
            insert_pos += 1
            existing_after.append(txt)

    elif style == "inline_singleline":
        # </child> <!-- comment -->\n  <next ...>
        # ensure next element gets newline+indent
        next_line = child.tail if (child.tail and "\n" in child.tail) else _indent_for(child)
        for txt in after_comments:
            if txt in existing_after:
                continue
            # Put a space before the comment so it reads nicely on same line
            child.tail = " "    # single space before comment on the same line
            c = ET.Comment(txt)
            c.tail = next_line  # newline+indent for the next element
            parent.insert(insert_pos, c)
            insert_pos += 1
            existing_after.append(txt)

    else:  # "block"
        # </child>\n  <!-- comment -->\n  <next ...>
        if not child.tail or "\n" not in child.tail:
            child.tail = _indent_for(child)
        for txt in after_comments:
            if txt in existing_after:
                continue
            c = ET.Comment(txt)
            c.tail = child.tail
            parent.insert(insert_pos, c)
            insert_pos += 1
            existing_after.append(txt)

def _remove_all_comments_preserve_ws(tree):
    root = tree.getroot()
    to_remove = [n for n in root.iter() if isinstance(n, ET._Comment)]
    for c in to_remove:
        tail = c.tail or ""
        p = c.getparent()
        if p is None:
            continue
        kids = list(p)
        idx = kids.index(c)
        if idx > 0:
            prev = kids[idx - 1]
            prev.tail = (prev.tail or "") + tail
        else:
            p.text = (p.text or "") + tail
        p.remove(c)

def copy_comments_from_template(template_path, dst_tree, clear_existing=False):
    """
    Copy adjacent comments from template xml onto dst_tree.
    Matches by: id/name, exact xpath, relaxed xpath.
    """
    parser_src = ET.XMLParser(remove_blank_text=False, remove_comments=False)
    src_tree = ET.parse(template_path, parser_src)

    if clear_existing:
        _remove_all_comments_preserve_ws(dst_tree)

    idx = _build_comment_index(src_tree)

    count = 0
    for e in dst_tree.iter():
        if not isinstance(e.tag, str):
            continue
        p = e.getparent()
        if p is None:
            continue
        for k in _element_keys(e, dst_tree):
            if k in idx:
                b = idx[k]["before"]; a = idx[k]["after"]
                if b or a:
                    _ensure_comments(p, e, b, a)
                    count += len(b) + len(a)
                break
    return count

import os
import numpy as np
import pandas as pd
from datetime import timedelta

def _infer_gmt_offset_from_lon(lon_dd: float) -> int:
    """
    Rough time-zone inference from longitude (standard time, no DST).
    Uses nearest 15° meridian.
    lon_dd: East-positive (e.g., -77 for US East).
    """
    return int(np.round(lon_dd / 15.0))

def _solar_terms_local_stdtime(dt_local: pd.Timestamp, lat_dd: float, lon_dd: float, gmt_offset: int):
    """
    Returns (hour_angle_rad, declination_rad) using your Hirabayashi/Endreny-style formulas.
    Uses local standard time (no DST) and equation-of-time correction.
    """
    # Julian day (1..365/366)
    jday = int(dt_local.dayofyear)

    # Day angle [rad]
    day_angle = np.deg2rad(360.0 * (jday - 1) / 365.0)

    # Equation of time [minutes]
    eq_time_min = (
        (0.000075
         + 0.001868 * np.cos(day_angle)
         - 0.032077 * np.sin(day_angle)
         - 0.014615 * np.cos(2 * day_angle)
         - 0.040849 * np.sin(2 * day_angle)) * 229.18
    )

    # Declination [rad]
    decl = (
        0.006918
        - 0.399912 * np.cos(day_angle)
        + 0.070257 * np.sin(day_angle)
        - 0.006758 * np.cos(2 * day_angle)
        + 0.000907 * np.sin(2 * day_angle)
        - 0.002697 * np.cos(3 * day_angle)
        + 0.00148  * np.sin(3 * day_angle)
    )

    # Standard meridian [deg]
    std_meridian_deg = 15.0 * gmt_offset

    # Local clock time as decimal hours (match your +1 convention)
    hh = dt_local.hour
    mm = dt_local.minute
    ss = dt_local.second
    hour_decimal = (hh + mm / 60.0 + ss / 3600.0) + 1.0

    # True solar time [hours]
    # time correction (minutes): 4*(std_meridian - lon) + EoT
    # (lon_dd is East-positive; for US West hemisphere lon_dd is negative, which is fine here)
    t_corr_min = 4.0 * (std_meridian_deg - lon_dd) + eq_time_min
    true_solar_time_hr = hour_decimal + (t_corr_min / 60.0)

    # Hour angle [deg] then [rad]
    hour_angle_deg = 15.0 * true_solar_time_hr - 180.0
    hour_angle_rad = np.deg2rad(hour_angle_deg)

    return hour_angle_rad, decl

def write_weather_and_radiation_csvs_from_era5_df(
    df_era5: pd.DataFrame,
    out_dir: str,
    lat_dd: float,
    lon_dd: float,
    gmt_offset_hours: Optional[int] = None
):
    """
    Writes Weather.csv and Radiation.csv to out_dir from your ERA5 dataframe.

    Assumptions:
      - df_era5 has a 'time' column in UTC (datetime)
      - Contains these columns (after your extraction/derivations):
          Tair_2m_K, Tdew_2m_K, Wspd_10m_mps, AtmPres_Pa, Ppt_1hr_m,
          Rsw_down_Wpm2, Rsw_up_Wpm2, Rsw_dir_Wpm2, Rsw_dif,
          Rlw_down_Wpm2, Rlw_up_Wpm2
    """
    os.makedirs(out_dir, exist_ok=True)

    df = df_era5.copy()

    if "time" not in df.columns:
        raise ValueError("ERA5 dataframe must contain a 'time' column.")

    df["time"] = pd.to_datetime(df["time"])

    # Decide GMT offset (standard time)
    if gmt_offset_hours is None:
        gmt_offset_hours = _infer_gmt_offset_from_lon(lon_dd)

    # Convert UTC -> local standard time (no DST)
    df["time_local"] = df["time"] + pd.to_timedelta(gmt_offset_hours, unit="h")

    # Build date/time strings
    df["YYYYMMDD"] = df["time_local"].dt.strftime("%Y%m%d")
    df["HHMMSS"]   = df["time_local"].dt.strftime("%H:%M:%S")

    # ---- Unit conversions ----
    Tair_C = df["Tair_2m_K"] - 273.15
    Tdew_C = df["Tdew_2m_K"] - 273.15
    Patm_kPa = df["AtmPres_Pa"] / 1000.0

    # Precip: ERA5 tp is meters accumulated over the step.
    # If your df column Ppt_1hr_m is already "meters per hour" (hourly step),
    # then rate m/h equals that value.
    Ppt_rate_mph = df["Ppt_1hr_m"]

    # Radiation net (downwelling positive)
    Rad_net = (
        df["Rsw_down_Wpm2"] - df["Rsw_up_Wpm2"]
        + df["Rlw_down_Wpm2"] - df["Rlw_up_Wpm2"]
    )

    # ---- Solar geometry (Latitude_rad constant; HourAngle/Declination vary by time) ----
    lat_rad = np.deg2rad(lat_dd)

    hour_angles = []
    declinations = []
    for tloc in df["time_local"]:
        ha, dec = _solar_terms_local_stdtime(tloc, lat_dd, lon_dd, gmt_offset_hours)
        hour_angles.append(ha)
        declinations.append(dec)

    hour_angles = np.array(hour_angles, dtype=float)
    declinations = np.array(declinations, dtype=float)

    # ---------------- Weather.csv ----------------
    weather = pd.DataFrame({
        "YYYYMMDD": df["YYYYMMDD"],
        "HH:MM:SS": df["HHMMSS"],
        "Temperature_Air_C(C)":            Tair_C,
        "Temperature_DewPoint_C(C)":       Tdew_C,
        "Radiation_Net_Wpm2(W/m^2)":       Rad_net,
        "Wind_Speed_mps(m/s)":             df["Wspd_10m_mps"],
        "Pressure_Atmosphere_kPa(kPa)":    Patm_kPa,
        "Precipitation_Rate_mph(m/h)":     Ppt_rate_mph,
        "Snow_DepthOnGround_m(m)":         0.0,   # placeholder
    })

    # 8-digit precision (as you requested)
    float_cols_weather = [c for c in weather.columns if c not in ("YYYYMMDD", "HH:MM:SS")]
    for c in float_cols_weather:
        weather[c] = weather[c].astype(float).map(lambda x: f"{x:.8f}")

    weather_path = os.path.join(out_dir, "Weather.csv")
    weather.to_csv(weather_path, index=False)

    # ---------------- Radiation.csv ----------------
    radiation = pd.DataFrame({
        "YYYYMMDD": df["YYYYMMDD"],
        "HH:MM:SS": df["HHMMSS"],
        "Radiation_Shortwave_Direct_Wpm2(W/m^2)":  df["Rsw_dir_Wpm2"],
        "Radiation_Shortwave_Diffuse_Wpm2(W/m^2)": df["Rsw_dif"],
        "Radiation_Longwave_Downwelling_Wpm2(W/m^2)": df["Rlw_down_Wpm2"],
        "Radiation_Longwave_Upwelling_Wpm2(W/m^2)":   df["Rlw_up_Wpm2"],
        "Latitude_rad(Radian)":                   lat_rad,
        "HourAngle_rad(Radian)":                  hour_angles,
        "DeclinationAngle_rad(Radian)":           declinations,
    })

    float_cols_rad = [c for c in radiation.columns if c not in ("YYYYMMDD", "HH:MM:SS")]
    for c in float_cols_rad:
        radiation[c] = radiation[c].astype(float).map(lambda x: f"{x:.8f}")

    rad_path = os.path.join(out_dir, "Radiation.csv")
    radiation.to_csv(rad_path, index=False)

    return weather_path, rad_path, gmt_offset_hours


#~~~~~~~~~~~~~~~~~~ACCESSORIES TO MAIN APP~~~~~~~~~~~~~~
#LOADING SCREEN
class SplashScreen(QSplashScreen):
    def __init__(self):
        # Load the original logo image
        #pix_map expects a string path
        pix_map = str(RESOURCES_DIR / "iTreeLogo.png")
        logoPixmap = QPixmap(pix_map)
        #Create larger pixel map with margins to display text
        largerPixmap = QPixmap(logoPixmap.width() + 60, logoPixmap.height() + 90)  # Adding 60px margins
        largerPixmap.fill(QColor(Qt.white))  # Fill the pixmap with a white background

        # Draw the logo onto the center of the larger pixmap
        painter = QPainter(largerPixmap)
        painter.drawPixmap(30, 30, logoPixmap)  # Adjust these values if you need different margins
        painter.end()

        super().__init__(largerPixmap)
        self.setWindowIcon(QIcon(pix_map))  # Set the window icon to your logo

        # Customize the message font and color
        self.setFont(QFont('Arial', 12, QFont.Bold))  # Set the font and size for the message
        self.showMessage("i-Tree Research Suite - WeatherPrep\nLoading...",
                         int(QtCore.Qt.AlignBottom | QtCore.Qt.AlignCenter), QtCore.Qt.black)

#Advanced XML; Secondary Widget
class XMLConfigWindow(QtWidgets.QWidget):
    
    def __init__(self, parent=None):
        super().__init__(parent)
        self.windSensorHeight_spinbox = None
        self.treeHeight_spinbox = None
        self.vegetation_type_dropdown = None
        self.evergreen_spinbox = None
        self.maxLAI_spinbox = None
        self.initUI()

    def open_file_dialog(self):
        options = QFileDialog.Options()
        fileName, _ = QFileDialog.getOpenFileName(self, "Select Precipitation File", "",
                                                  "All Files (*);;Text Files (*.txt)", options=options)
        if fileName:
            # Replace forward slashes with backslashes in the file path
            corrected_file_path = fileName.replace('/', '\\')
            self.precipitation_file_line_edit.setText(corrected_file_path)

    #initUI for Advanced XML Configuration, not HydroPlus Weather Processor UI 
    def initUI(self):
        self.setWindowTitle('Advanced XML Configuration')
        #icon_path expects a string path
        icon_path = str(RESOURCES_DIR / "iTree_transparent.ico")
        self.setWindowIcon(QIcon(icon_path))
        layout = QtWidgets.QGridLayout()

        # Toggle Switch for 'Hydro' and 'Bulding'
        self.model_type = QtWidgets.QComboBox()
        self.model_type.addItems(['Hydro', 'Bulding'])
        layout.addWidget(QtWidgets.QLabel('Model:'), 0, 0)  # Adding the label for the toggle
        layout.addWidget(self.model_type, 0, 1)

        # Maximum LAI
        layout.addWidget(QtWidgets.QLabel('Maximum LAI:'), 1, 0)  # Row 0, Column 0
        self.maxLAI_spinbox = QtWidgets.QDoubleSpinBox()
        self.maxLAI_spinbox.setRange(0, 10)
        self.maxLAI_spinbox.setDecimals(1)
        self.maxLAI_spinbox.setSingleStep(0.1)
        self.maxLAI_spinbox.setValue(5)
        layout.addWidget(self.maxLAI_spinbox, 1, 1)  # Row 0, Column 1

        # Evergreen (%)
        layout.addWidget(QtWidgets.QLabel('Evergreen (%):'), 2, 0)  # Row 1, Column 0
        self.evergreen_spinbox = QtWidgets.QSpinBox()
        self.evergreen_spinbox.setRange(0, 100)
        self.evergreen_spinbox.setValue(5)
        layout.addWidget(self.evergreen_spinbox, 2, 1)  # Row 1, Column 1

        # Vegetation Type
        layout.addWidget(QtWidgets.QLabel('Vegetation Type:'), 3, 0)  # Row 2, Column 0
        self.vegetation_type_dropdown = QtWidgets.QComboBox()
        self.vegetation_type_dropdown.addItems(['Tree', 'Shrub', 'Grass'])
        self.vegetation_type_dropdown.setCurrentIndex(self.vegetation_type_dropdown.findText('Tree'))
        layout.addWidget(self.vegetation_type_dropdown, 3, 1)  # Row 2, Column 1

        # Tree Height (m)
        layout.addWidget(QtWidgets.QLabel('Tree Height (m):'), 4, 0)  # Row 3, Column 0
        self.treeHeight_spinbox = QtWidgets.QSpinBox()
        self.treeHeight_spinbox.setRange(0, 50)
        self.treeHeight_spinbox.setValue(12)
        layout.addWidget(self.treeHeight_spinbox, 4, 1)  # Row 3, Column 1

        # Wind Sensor Height (m)
        layout.addWidget(QtWidgets.QLabel('Sensor Height (m):'), 5, 0)  # Row 4, Column 0
        self.windSensorHeight_spinbox = QtWidgets.QSpinBox()
        self.windSensorHeight_spinbox.setRange(0, 50)
        self.windSensorHeight_spinbox.setValue(10)
        layout.addWidget(self.windSensorHeight_spinbox, 5, 1)  # Row 4, Column 1

        # Precipitation File Selector
        layout.addWidget(QtWidgets.QLabel('Precipitation File:'), 6, 0)  # Adjusted for new row
        file_selector_layout = QtWidgets.QHBoxLayout()
        self.precipitation_file_line_edit = QtWidgets.QLineEdit()
        file_selector_button = QtWidgets.QPushButton('...')
        file_selector_button.clicked.connect(self.open_file_dialog)
        file_selector_layout.addWidget(self.precipitation_file_line_edit)
        file_selector_layout.addWidget(file_selector_button)
        layout.addLayout(file_selector_layout, 6, 1)  # Adjusted for new row

        self.setLayout(layout)


    def get_configuration_values(self):
        return {
            'model': self.model_type.currentText(),
            'max_lai': self.maxLAI_spinbox.value(),
            'evergreen_percent': self.evergreen_spinbox.value(),
            'vegetation_type': self.vegetation_type_dropdown.currentText(),
            'tree_height': self.treeHeight_spinbox.value(),
            'wind_sensor_height': self.windSensorHeight_spinbox.value(),
            'precip_file': self.precipitation_file_line_edit.text()
        }

#CUSTOM SPINBOX FOR SUB-HOURLY INTERVAL
class CustomSpinBox(QtWidgets.QSpinBox):
    
    def __init__(self, parent=None):
        super(CustomSpinBox, self).__init__(parent)
        self.setRange(1, 60)  # Setting a wider range initially
        self.valid_values = [1, 2, 3, 4, 5, 6, 10, 12, 15, 20, 30]
        self.setValue(2)

    def stepBy(self, steps):
        current_index = self.valid_values.index(self.value())
        new_index = max(0, min(current_index + steps, len(self.valid_values) - 1))
        self.setValue(self.valid_values[new_index])

# ~~~~~~~~~~~~~WORKER CLASS FOR GUI MANAGEMENT ~~~~~~~~~~~~~~~~~~~
class Worker(QObject):
    update_console = pyqtSignal(str)
    finished = pyqtSignal()

    def __init__(self, params, parent=None):
        super().__init__(parent)
        self.params = params
        self.stop_requested = False
        self.flag_failure = False
        
    # cc is your 2- or 3-letter country code (e.g., "US", "AS", "CH", "ROM", "Y3")
    # wban and coop are strings from your metadata; usaf is the station's USAF ID.
    def build_station_id(self, cc: str, usaf: str, wban: str, coop: str = "") -> str:
        """
        Build a GHCN(-Hourly) station ID from:
          - cc   : country code (e.g., 'US', 'ASN', 'CH')
          - usaf : USAF ID (string)
          - wban : WBAN ID (string)
          - coop : COOP ID (for US stations), optional

        Rules:
          US:
            - if WBAN != 99999 → USW000<WBAN>
            - else if COOP present → USC00<COOP>
            - else fallback to USC00<WMO> (WMO from USAF)

          Non-US:
            - if WBAN != 99999:
                * 2-letter CC → <CC>W000<WBAN>
                * 3-letter CC → <CC>000<WBAN>  (no 'W', one fewer zero)
            - else (no WBAN):
                * WMO = first 5 digits of USAF
                * 2-letter CC → <CC>0000<WMO>
                * 3-letter CC → <CC>000<WMO>
        """
        cc = cc.upper()
        usaf = (usaf or "").strip()
        wban = (wban or "").strip()
        coop = (coop or "").strip()

        # ---- US SPECIAL CASE ----
        if cc == "US":
            # USW000xxxxx (WBAN-based)
            if wban and wban != "99999":
                wban5 = wban.zfill(5)
                return f"USW000{wban5}"

            # USC00xxxxx (COOP-based)
            if coop:
                coop5 = coop.zfill(5)
                return f"USC00{coop5}"

            # Fallback: use WMO from USAF if we got here
            if usaf:
                wmo = usaf[:5]
                return f"USC00{wmo}"

            raise ValueError("US station missing WBAN, COOP, and USAF identifiers.")

        # ---- NON-US STATIONS ----

        # Case A: WBAN present
        if wban and wban != "99999":
            wban5 = wban.zfill(5)

            if len(cc) == 3:
                # 3-letter CC: no 'W', 3 zeros
                # e.g., ROM00012345
                return f"{cc}000{wban5}"
            else:
                # 2-letter CC: 'W' + 3 zeros, e.g., ASW00012345
                return f"{cc}W000{wban5}"

        # Case B: No WBAN → use USAF/WMO pattern
        if not usaf:
            raise ValueError(f"Non-US station with no WBAN and no USAF: cc={cc}")

        wmo = usaf[:5]
        zero_count = 3 if len(cc) == 3 else 4
        zeros = "0" * zero_count

        # e.g., AS000012345 or ASN00012345 etc.
        return f"{cc}{zeros}{wmo}"

    def download_era5_point(
        self,
        era5_dir: Path,
        lat_dd: float,
        lon_dd: float,
        start_year: int,
        end_year: int,
        start_dt=None,
        end_dt=None,
    ) -> bool:
        era5_dir = Path(era5_dir)
        era5_dir.mkdir(parents=True, exist_ok=True)


        dlat = 0.125
        dlon = 0.125

        north = lat_dd + dlat
        south = lat_dd - dlat
        west  = lon_dd - dlon
        east  = lon_dd + dlon

        north = min(north, 90.0)
        south = max(south, -90.0)
        west  = max(west, -180.0)
        east  = min(east, 180.0)
        
        era5_variables = [
            # Temperature and pressure
            "2m_temperature",
            "2m_dewpoint_temperature",
            "surface_pressure",
            
            # Wind
            "10m_u_component_of_wind",
            "10m_v_component_of_wind",

            # Precipitation and rain
            "total_precipitation",
            
            # Mean rates
            "mean_surface_direct_short_wave_radiation_flux",
            "mean_surface_downward_long_wave_radiation_flux",
            "mean_surface_downward_short_wave_radiation_flux",
            "mean_surface_net_long_wave_radiation_flux",
            "mean_surface_net_short_wave_radiation_flux",

            
            #mean_surface_latent_heat_flux
            #mean_evaporation_rate
        ]

        client = cdsapi.Client()
        
        times = [f"{h:02d}:00" for h in range(24)]
        
        try:
            # Iterate over all years in the [start_dt, end_dt] range
            for year in range(start_dt.year, end_dt.year + 1):
                # Iterate over all months in this year
                for month in range(1, 13):
                    # First/last possible instant in this month
                    last_day_of_month = calendar.monthrange(year, month)[1]
                    month_start = datetime(year, month, 1, 0, 0)
                    month_end   = datetime(year, month, last_day_of_month, 23, 0)

                    # Skip months completely outside [start_dt, end_dt]
                    if month_end < start_dt or month_start > end_dt:
                        continue

                    # Determine day_start/day_end within this month
                    if start_dt.year == year and start_dt.month == month:
                        day_start = start_dt.day
                    else:
                        day_start = 1

                    if end_dt.year == year and end_dt.month == month:
                        day_end = end_dt.day
                    else:
                        day_end = last_day_of_month

                    days_this_month = [f"{d:02d}" for d in range(day_start, day_end + 1)]

                    # Build CDS request for this (year, month)
                    month_str = f"{month:02d}"
                    target_file = era5_dir / (
                        f"era5_singlelevels_{year}{month_str}_{lat_dd:.2f}_{lon_dd:.2f}.zip"
                    )

                    if target_file.exists():
                        self.update_console.emit(
                            f"[ERA5] ZIP file already exists, skipping {target_file.name}"
                        )
                        continue

                    self.update_console.emit(
                        f"[ERA5] Requesting ERA5 for {year}-{month_str}"
                        f" days {day_start:02d}-{day_end:02d} "
                        f"(Latitude_dd={lat_dd:.4f}, Longitude_dd={lon_dd:.4f})..."
                    )

                    era5_server_request = {
                        "product_type": "reanalysis",
                        "data_format": "netcdf",
                        "variable": era5_variables,
                        "year": [str(year)],
                        "month": [month_str],
                        "day": days_this_month,
                        "time": times,
                        "area": [north, west, south, east],
                        "grid": [0.25, 0.25],
                    }

                    client.retrieve(
                        "reanalysis-era5-single-levels",
                        era5_server_request,
                        target_file,
                    )

                    self.update_console.emit(
                        f"[ERA5] Download finished: {target_file.name}"
                    )

        except Exception as e:
            self.update_console.emit(f"[ERA5] Download failed: {e}")
            self.flag_failure = True

        # consider ZIPs as “we have something”
        zip_files = list(era5_dir.glob("*.zip"))
        if not zip_files:
            self.update_console.emit(
                f"[ERA5] No ERA5 ZIP files found in {era5_dir}. "
                f"Check CDS credentials, requested period, and dataset licence."
            )
            return False

        self.update_console.emit(
            f"[ERA5] Debug: found {len(zip_files)} ERA5 ZIP file(s) in {era5_dir}"
        )
        return True

    # extract_era5_nc_point_series(...):    # 1. Ensure directory exists    # 2. Extract .nc files from zip (if needed)    # 3. Load all .nc files    # 4. Extract nearest grid point    # 5. Assemble into a pandas DataFrame → df_era5    # 6. RETURN df_era5
    def extract_era5_nc_point_series(
        self,
        era5_dir: Path,
        lat_pt: float,
        lon_pt: float,
        start_dt=None,
        end_dt=None,
    ) -> pd.DataFrame:
        
        """
        Read ERA5 data in era5_dir:
          - If .nc files already exist, skip ZIP extraction.
          - Otherwise, extract any .nc files from ERA5 .zip containers.
          - Then open all .nc files with netCDF4, merge over time,
            extract nearest grid point, and return a DataFrame.
        """
        era5_dir = Path(era5_dir)
        
        # 0) If .nc already exist, skip ZIP handling entirely
        nc_files = sorted(era5_dir.glob("*.nc"))
        if nc_files:
            self.update_console.emit(
                f"[ERA5] Found {len(nc_files)} existing NetCDF file(s); "
                f"skipping ZIP extraction."
            )
        else:
            # 1) Extract any .zip → .nc, but only if we don't already have .nc
            zip_files = sorted(era5_dir.glob("*.zip"))
            if zip_files:
                self.update_console.emit(
                    f"[ERA5] Found {len(zip_files)} ERA5 ZIP file(s); extracting NetCDF contents..."
                )
                for zpath in zip_files:
                    try:
                        with zipfile.ZipFile(str(zpath), "r") as zf:
                            for member in zf.namelist():
                                if not member.lower().endswith(".nc"):
                                    continue

                                new_name = era5_dir / f"{zpath.stem}_{Path(member).name}"

                                # if already here -> skip extraction
                                if new_name.exists():
                                    self.update_console.emit(
                                        f"[ERA5] Already extracted: {new_name.name}, skipping."
                                    )
                                    continue

                                extracted_path = Path(zf.extract(member, era5_dir))
                                if extracted_path != new_name:
                                    extracted_path.rename(new_name)

                                self.update_console.emit(
                                    f"[ERA5] Extracted {member} -> {new_name.name}"
                                )
                    except Exception as e:
                        raise RuntimeError(
                            f"Failed to extract ERA5 ZIP file {zpath.name}: {e}"
                        )

            # refresh nc_files after extraction attempt
            nc_files = sorted(era5_dir.glob("*.nc"))

        # 2) Now we require that .nc exist
        self.update_console.emit(f"[ERA5] Found {len(nc_files)} NetCDF file(s).")
        
        if not nc_files:
            raise FileNotFoundError(f"No ERA5 NetCDF files found in {era5_dir}")

        # 3) Open and merge all months into a single Dataset (real NetCDF4 now)
        try:
            ds_all = xr.open_mfdataset(
                [str(p) for p in nc_files],
                combine="by_coords",
                engine="netcdf4",
            )
        except Exception as e:
            raise RuntimeError(f"Failed to open ERA5 NetCDF files with netcdf4 engine: {e}")

        # Load all data into memory so we don't have dask-backed arrays
        ds_all = ds_all.load()
        
        ds_all = era5_ensure_time(ds_all)

        # 4) Determine Latitude/Longitude names and select nearest grid point
        lat_name, lon_name = era5_get_lat_lon_names(ds_all)
        ds_point = era5_sel_point(ds_all, lat_name, lon_name, lat_pt, lon_pt)
        ds_point = era5_drop_singleton_dims(ds_point)

        # ---- Instantaneous ERA5 Data variables ----
        #Temperature air at 2 m
        Tair_2m_K              = era5_find_variable(ds_point, "t2m", "Tair_2m_K")
        #Temperature dew at 2 m
        Tdew_2m_K              = era5_find_variable(ds_point, "d2m", "Tdew_2m_K")
        #Atmospheric pressure air at surface
        AtmPres_Pa             = era5_find_variable(ds_point, "sp",  "AtmPres_Pa")
        #Wind speed U component at 10 m
        Wspd_10m_Eastward_mps   = era5_find_variable(ds_point, "u10", "Wspd_10m_Eastward_mps")
        #Wind speed V component at 10 m
        Wspd_10m_Northward_mps  = era5_find_variable(ds_point, "v10", "Wspd_10m_Northward_mps")


        # ---- Accumulated ERA5 Data variables ----
        #Precipitation total 
        Ppt_1hr_m  = era5_find_variable(ds_point, "tp", "Ppt_1hr_m")

        # ---- Mean or Average ERA5 Data variables ----
        #Radiation shortwave downwelling surface
        Rsw_down_Wpm2 = era5_find_variable(ds_point, "avg_sdswrf",  "Rsw_down_Wpm2")
        #Radiation shortwave direct surface
        Rsw_dir_Wpm2  = era5_find_variable(ds_point, "avg_sdirswrf","Rsw_dir_Wpm2")
        #Radiation shortwave net surface
        Rsw_net_Wpm2  = era5_find_variable(ds_point, "avg_snswrf",  "Rsw_net_Wpm2")
        #Radiation longwave downwelling surface
        Rlw_down_Wpm2 = era5_find_variable(ds_point, "avg_sdlwrf",  "Rlw_down_Wpm2")
        #Radiation longwave net surface
        Rlw_net_Wpm2  = era5_find_variable(ds_point, "avg_snlwrf",  "Rlw_net_Wpm2")

        # Compute wind speed by combining vectors with pythagorean theory
        Wspd_10m_mps = None
        if Wspd_10m_Eastward_mps is not None and Wspd_10m_Northward_mps is not None:
            Wspd_10m_mps = xr.apply_ufunc(np.hypot, Wspd_10m_Eastward_mps, Wspd_10m_Northward_mps)
              
        # Compute wind direction (degrees FROM which the wind blows)
        # using ERA5 U (eastward) and V (northward) components.
        #
        # ERA5:
        #   u > 0 → blowing toward EAST
        #   v > 0 → blowing toward NORTH
        #
        # Meteorological wind direction:
        #   0°   = FROM North
        #   90°  = FROM East
        #   180° = FROM South
        #   270° = FROM West
        # ---------------------------------------------------------

        Wdir_10m_deg = None

        if (Wspd_10m_Eastward_mps is not None) and (Wspd_10m_Northward_mps is not None):
            # direction *toward* which the wind is blowing
            theta_to_deg = np.degrees(np.arctan2(Wspd_10m_Northward_mps,
                                                 Wspd_10m_Eastward_mps))

            # rotate by 180° to convert to direction wind is *from*
            Wdir_10m_deg = (theta_to_deg + 180.0) % 360.0

        # Derive upward components
        Rsw_up_Wpm2 = None
        if (Rsw_down_Wpm2 is not None) and (Rsw_net_Wpm2 is not None):
            # net = down - up -> up = down - net
            Rsw_up_Wpm2 = Rsw_down_Wpm2 - Rsw_net_Wpm2

        Rlw_up_Wpm2 = None
        if (Rlw_down_Wpm2 is not None) and (Rlw_net_Wpm2 is not None):
            # net = down - up -> up = down - net
            Rlw_up_Wpm2 = Rlw_down_Wpm2 - Rlw_net_Wpm2

        Rsw_dif = None
        if (Rsw_dir_Wpm2 is not None) and (Rsw_down_Wpm2 is not None):
            # Rsw_dir_Wpm2 = Rsw_down_Wpm2 - Rsw_dir_Wpm2; total = direct + diffuse
            Rsw_dif = Rsw_down_Wpm2 - Rsw_dir_Wpm2


        pieces = {
            "Tair_2m_K":        Tair_2m_K,
            "Tdew_2m_K":        Tdew_2m_K,
            "Wspd_10m_mps":     Wspd_10m_mps,
            "Wdir_10m_deg":     Wdir_10m_deg,
            "AtmPres_Pa":       AtmPres_Pa,
            "Ppt_1hr_m":        Ppt_1hr_m,
            "Rsw_down_Wpm2":    Rsw_down_Wpm2,
            "Rsw_up_Wpm2":      Rsw_up_Wpm2,
            "Rsw_dir_Wpm2":     Rsw_dir_Wpm2,
            "Rsw_dif":          Rsw_dif,
            "Rlw_down_Wpm2":    Rlw_down_Wpm2,
            "Rlw_up_Wpm2":      Rlw_up_Wpm2,
        }

        valid = {k: v for k, v in pieces.items() if v is not None}
        if not valid:
            raise RuntimeError("No required ERA5 variables found; check your NetCDF fields.")

        ds_out = xr.Dataset(valid)
        ds_out = era5_drop_singleton_dims(ds_out)

        cols = [
            "Tair_2m_K", "Tdew_2m_K", "Wspd_10m_mps", "Wdir_10m_deg", 
            "AtmPres_Pa", "Ppt_1hr_m", "Rsw_down_Wpm2", "Rsw_up_Wpm2", 
            "Rsw_dir_Wpm2", "Rsw_dif", "Rlw_down_Wpm2", "Rlw_up_Wpm2",
        ]
        
        cols = [c for c in cols if c in ds_out]

        df = ds_out[cols].to_dataframe().reset_index()
        
        if "time" in df.columns:
            df["time"] = pd.to_datetime(df["time"])
            df = df.sort_values("time")

        # >>> time filtering based on start_dt / end_dt <<<
        if start_dt is not None and end_dt is not None:
            mask = (df["time"] >= start_dt) & (df["time"] <= end_dt)
            df = df.loc[mask].reset_index(drop=True)

        return df

    # ---------- Build GHCN-H station ID by looking in ghcnh-station-attributes.csv ----------
    def build_GHCNh_id(
        self,
        usaf_wban: str,
        country_name: str,
        state: str = "",
        county: str = "",
        place: str = "",
        lat_dd: float = None,
        lon_dd: float = None,
        coop: str = ""
    ):
        """
        Find the GHCN-Hourly station ID by looking it up in
        resources/ghcnh-station-attributes.csv.

        Matching priority:
          1) WBAN (if not 99999)
          2) USAF -> WMO (WMO ~ first 5 digits of USAF)
          3) nearest (LAT_DEC, LON_DEC) within the same country, if possible

        Returns
        -------
        station_id : str
            GHCNh station ID (e.g., ASN00061242)
        usaf_used : str
            USAF identifier used (from usaf_wban or attributes file)
        wban_used : str
            WBAN identifier used (from usaf_wban or attributes file)
        end_year_db : int
            Last year with data according to END_DT in attributes file.
        """
        usaf, wban = usaf_wban.split("-")
        usaf = usaf.strip()
        wban = wban.strip()
        coop = (coop or "").strip()

        # Load ghcnh-station-attributes.csv once per Worker
        if not hasattr(self, "df_ghcnh_attrs"):
            attrs_path = RESOURCES_DIR / "ghcnh-station-attributes.csv"
            self.update_console.emit(f"Loading station attributes from {attrs_path}")
            self.df_ghcnh_attrs = pd.read_csv(
                attrs_path,
                dtype={
                    "GHCNH": str,
                    "NCDC": str,
                    "BEG_DT": str,
                    "END_DT": str,
                    "COOP": str,
                    "WBAN": str,
                    "WMO": str,
                    "STATION_NAME": str,
                    "CC": str,
                    "CTRY_NAME": str,
                    "ST": str,
                    "COUNTY": str,
                    "CD": str,
                    "UTC": str,
                    "LAT_DEC": "float64",
                    "LON_DEC": "float64",
                    "EL_GR_M": "float64",
                }
            )

        df = self.df_ghcnh_attrs

        def pick_latest_end(sub):
            sub = sub.copy()
            sub["END_DT_int"] = pd.to_numeric(sub["END_DT"], errors="coerce").fillna(0).astype(int)
            sub = sub.sort_values("END_DT_int", ascending=False)
            return sub.iloc[0]

        # --- narrow by country if possible ---
        df_search = df
        if "CTRY_NAME" in df.columns and country_name:
            mask = df["CTRY_NAME"].str.strip().str.lower() == country_name.strip().lower()
            df_country = df[mask]
            if not df_country.empty:
                df_search = df_country

        chosen = None

        # 1) WBAN match (if not 99999)
        if wban and wban != "99999" and "WBAN" in df_search.columns:
            wb = wban.zfill(5)
            m = df_search["WBAN"].fillna("").astype(str).str.zfill(5) == wb
            sub = df_search[m]
            if not sub.empty:
                chosen = pick_latest_end(sub)

        # 2) USAF → WMO (USAF ~ WMO*10 → first 5 digits)
        if chosen is None and usaf and "WMO" in df_search.columns:
            wmo = usaf[:5]
            m = df_search["WMO"].fillna("").astype(str).str.zfill(5) == wmo
            sub = df_search[m]
            if not sub.empty:
                chosen = pick_latest_end(sub)

        # 3) nearest lat_dd/lon_dd within same country (if lat_dd/lon_dd given)
        if (
            chosen is None
            and lat_dd is not None and lon_dd is not None
            and "LAT_DEC" in df_search.columns and "LON_DEC" in df_search.columns
        ):
            sub = df_search.dropna(subset=["LAT_DEC", "LON_DEC"]).copy()
            if not sub.empty:
                dlat = sub["LAT_DEC"].astype(float) - float(lat_dd)
                dlon = sub["LON_DEC"].astype(float) - float(lon_dd)
                sub["dist2"] = dlat * dlat + dlon * dlon
                chosen = sub.loc[sub["dist2"].idxmin()]

        # If still nothing, give a clear error
        if chosen is None:
            raise ValueError(
                "Could not locate a GHCN-Hourly station in ghcnh-station-attributes.csv for:\n"
                f"  usaf_wban = {usaf_wban}\n"
                f"  place = '{place}', county = '{county}', state = '{state}', country = '{country_name}'\n"
                "Please check /resources/ghcnh-station-attributes.csv or select another station."
            )

        station_id = str(chosen["GHCNH"]).strip()

        # Use the attributes file’s WBAN/USAF if present (fallback to passed values)
        wban_attr = str(chosen.get("WBAN", "")).strip()
        usaf_attr = str(chosen.get("NCDC", "")).strip()  # NCDC is usually the USAF identifier in that file
        if not usaf_attr:
            usaf_attr = usaf
        if not wban_attr:
            wban_attr = wban

        # Parse END_DT to get last data year
        end_dt_raw = str(chosen.get("END_DT", "0")).strip()
        try:
            end_dt_int = int(end_dt_raw)
            end_year_db = end_dt_int // 10000
        except ValueError:
            end_year_db = None

        # Safe string helper for logging
        def s(val):
            try:
                import math
                if val is None:
                    return ""
                if isinstance(val, float) and math.isnan(val):
                    return ""
            except Exception:
                pass
            return str(val).strip()

        self.update_console.emit(
            f"Matched GHCN-H station {station_id} from ghcnh-station-attributes.csv\n"
            f"  Name: {s(chosen.get('STATION_NAME'))}\n"
            f"  Country: {s(chosen.get('CTRY_NAME'))}, ST: {s(chosen.get('ST'))}, "
            f"County: {s(chosen.get('COUNTY'))}\n"
            f"  Lat/Lon: {s(chosen.get('LAT_DEC'))}, {s(chosen.get('LON_DEC'))}\n"
            f"  Period of record: {s(chosen.get('BEG_DT'))}–{s(chosen.get('END_DT'))}"
        )       

        return station_id, usaf_attr, wban_attr, end_year_db
        
    # ================= ERA5 PIPELINE =======================
    def run_era5_pipeline(
        self,
        start_year: int,
        end_year: int,
        model: str,
        max_lai: float,
        evergreen_percent: float,
        vegetation_type: str,
        tree_height: float,
        wind_sensor_height: float,
        precip_file_path: str,
        country: str,
        state: str,
        county: str,
        place: str,
        lat_dd: float,
        lon_dd: float,
        output_root: str,
        start_month: Optional[int] = None,
        start_day:   Optional[int] = None,
        end_month:   Optional[int] = None,
        end_day:     Optional[int] = None,
    ):
        """
        Download ERA5 NetCDF (monthly), convert to point time series, and write a CSV.
        Returns (meteorological_data_download_path, ok_flag).
        """
        self.flag_failure = False

        # Resolve month/day defaults to "full span" if None
        if start_month is None:
            start_month = 1
        if start_day is None:
            start_day = 1
        if end_month is None:
            end_month = 12
        if end_day is None:
            end_day = 31

        # Clip days to valid month lengths (avoid Feb 30 etc.)
        start_day = min(start_day, calendar.monthrange(start_year, start_month)[1])
        end_day   = min(end_day,   calendar.monthrange(end_year,   end_month)[1])

        start_dt = datetime(start_year, start_month, start_day, 0, 0)
        end_dt   = datetime(end_year,   end_month,   end_day,   23, 0)

        if lat_dd is None or lon_dd is None:
            self.update_console.emit(
                "ERA5 mode requires latitude/longitude. "
                "Please either select a place from the dropdown OR enter Latitude/Longitude manually."
            )
            return "", False

        station_label = (
            f"ERA5_"
            f"{format_lat_lon(lat_dd, lon_dd)}_"
            f"{start_dt:%Y%m%d}_{end_dt:%Y%m%d}"
        )
        # e.g. ERA5_N040.123_W075.456_20000101_20051231

        era5_root = os.path.join(output_root, station_label)
        os.makedirs(era5_root, exist_ok=True)
        era5_dir = Path(era5_root)

        # --- reuse existing ERA5 output if present ---
        existing_nc  = sorted(era5_dir.glob("*.nc"))
        existing_zip = sorted(era5_dir.glob("*.zip"))

        if existing_nc or existing_zip:
            self.update_console.emit(
                f"[ERA5] Found existing ERA5 file(s) in {era5_dir} "
                f"({len(existing_nc)} .nc, {len(existing_zip)} .zip); "
                f"skipping download and proceeding to processing."
            )
            ok_dl = True
        else:
            self.update_console.emit(
                f"Downloading ERA5 data for Latitude_dd={lat_dd:.4f}, Longitude_dd={lon_dd:.4f}, "
                f"years {start_year}-{end_year}..."
            )
            ok_dl = self.download_era5_point(
                era5_dir=era5_dir,
                lat_dd=lat_dd,
                lon_dd=lon_dd,
                start_year=start_year,
                end_year=end_year,
                start_dt=start_dt,
                end_dt=end_dt,
            )

        if not ok_dl or self.flag_failure:
            self.update_console.emit("ERA5 download failed; aborting ERA5 pipeline.")
            return era5_root, False

        # 4) Convert ERA5 NetCDF → point time series DataFrame
        try:
            self.update_console.emit(
                "Extracting ERA5 NetCDF variables to point time series..."
            )
            df_era5 = self.extract_era5_nc_point_series(
                era5_dir=era5_dir,
                lat_pt=lat_dd,
                lon_pt=lon_dd,
                start_dt=start_dt,
                end_dt=end_dt,
            )
        except Exception as e:
            self.update_console.emit(f"Error while processing ERA5 NetCDF: {e}")
            return era5_root, False

        era5_csv = era5_dir / f"{station_label}.csv"
        df_era5.to_csv(era5_csv, index=False)
        self.update_console.emit(f"Saved ERA5 point CSV: {era5_csv}")

        self.update_console.emit(f"Going to Weather.csv and Radiation.csv writer.")

        try:
            # Optional: show what time span we're writing (helps catch UTC/local confusion)
            if "time" in df_era5.columns and len(df_era5) > 0:
                t0 = pd.to_datetime(df_era5["time"]).min()
                t1 = pd.to_datetime(df_era5["time"]).max()
                self.update_console.emit(f"[ERA5] UTC span in df: {t0} .. {t1}")

            weather_path, rad_path, gmt_used = write_weather_and_radiation_csvs_from_era5_df(
                df_era5=df_era5,
                out_dir=str(era5_dir),
                lat_dd=lat_dd,
                lon_dd=lon_dd,
                gmt_offset_hours=None,
            )

            self.update_console.emit(
                f"Wrote Weather.csv and Radiation.csv to {era5_dir} (GMT offset {gmt_used})"
            )

        except Exception as e:
            self.update_console.emit(f"[ERA5] Failed while writing Weather/Radiation CSV: {e}")
            self.update_console.emit(traceback.format_exc())
            return era5_root, False

        return era5_root, True


    #~~~GHCNh data processing ~~~~~~~~~~~~~~~~~~

    #download_GHCNh_data will download GHCNh psv file from NOAA NCEI url_by_year_base
    def download_GHCNh_data(self, GHCNh, start_year, end_year, station_end_year=None):
        """
        Download GHCN-Hourly pipe-separated files via HTTPS for a station
        across a range of years.

        If station_end_year is provided (from ghcnh-station-attributes.csv) and
        the requested end_year exceeds it, we warn and only request up to that
        last recorded year.

        If a yearly file is not found for a requested year, this method:
          1) explains that station data for that year was not found,
          2) attempts to download the station 'period of record' (POR) file
             so the user can inspect the last recorded year,
          3) if that also fails, tells the user to check availability in a browser at:
             https://www.ncei.noaa.gov/oa/global-historical-climatology-network/index.html#hourly/access/
        """
        url_by_year_base = (
            "https://www.ncei.noaa.gov/oa/global-historical-climatology-network/hourly/access/by-year"
        )
        url_by_station_base = (
            "https://www.ncei.noaa.gov/oa/global-historical-climatology-network/hourly/access/by-station/"
        )
        local_name_by_station = f"GHCNh_{GHCNh}_por.psv"

        self.flag_failure = False

        effective_end_year = end_year
        if station_end_year is not None and end_year > station_end_year:
            self.update_console.emit(
                f"NOTE: Requested end year {end_year} exceeds the station's last recorded "
                f"year {station_end_year} in ghcnh-station-attributes.csv.\n"
                f"      WeatherPrep will only attempt download through {station_end_year}."
            )
            effective_end_year = station_end_year

        for year in range(start_year, effective_end_year + 1):
            url_by_year = f"{url_by_year_base}/{year}/psv/GHCNh_{GHCNh}_{year}.psv"
            local_name_by_year = f"GHCNh_{GHCNh}_{year}.psv"

            try:
                self.update_console.emit(f"  Downloading {url_by_year}")
                with urllib.request.urlopen(url_by_year) as resp, open(local_name_by_year, "wb") as out:
                    out.write(resp.read())

            except Exception as e:
                # Yearly file for this station/year could not be retrieved
                self.update_console.emit(
                    f"  No GHCN-Hourly file found for station {GHCNh} in {year} "
                    f"(error: {e})."
                )
                self.update_console.emit(
                    "  Station data for this year was not found. "
                    "Attempting to download the station 'period of record' file "
                    "so you can inspect the last recorded year."
                )

                station_url = f"{url_by_station_base}{local_name_by_station}"

                try:
                    self.update_console.emit(f"  Downloading station POR file {station_url}")
                    with urllib.request.urlopen(station_url) as resp, open(local_name_by_station, "wb") as out:
                        out.write(resp.read())

                    self.update_console.emit(
                        f"  Station POR file saved as {local_name_by_station}.\n"
                        "  Please open this file to see the last year with recorded data.\n"
                        "  If the period of record does not include your requested years, "
                        "the station may not have GHCN-Hourly data for that period."
                    )

                except Exception as e2:
                    self.update_console.emit(
                        f"  Unable to download station POR file {station_url} (error: {e2}).\n"
                        "  Please check station availability using a web browser at:\n"
                        "    https://www.ncei.noaa.gov/oa/global-historical-climatology-network/index.html#hourly/access/\n"
                        "  Use the 'hourly/access' tools there to search for your station "
                        "and confirm the years with available data."
                    )

                # Mark failure and stop trying additional years once we hit a missing year
                self.flag_failure = True
                break


    #GHCNh_psv_to_weatherprep_csv will convert the GHCNh from psv to csv for use in WeatherPrep
    def GHCNh_psv_to_weatherprep_csv(self, output_folder, GHCNh, usaf, wban, start_year, end_year):
        """
        For each year, read GHCN-Hourly .psv and write a CSV with:
            YR--MODAHRMN (YYYYMMDDHHMM, no units in name)
            temperature_C
            dew_point_temperature_C
            station_level_pressure_hPa
            sea_level_pressure_hPa
            altimeter_hPa
            wind_direction_deg
            wind_speed_mps
            wind_gust_mps
            precipitation_mm
            precipitation_6_hour_mm
            precipitation_24_hour_mm
            snow_depth_mm   
            sky_cover_baseht_1_m
            sky_cover_1_code  (raw alphanumeric)
            sky_cover_baseht_2_m
            sky_cover_2_code
            sky_cover_baseht_3_m
            sky_cover_3_code

            From: README FILE FOR THE GLOBAL HISTORICAL CLIMATOLOGY NETWORK hourly (GHCNh) Version 1.0.0
            Note: Since up to 3 cloud layers can be reported, the full state of the sky can best be
            determined by the last layer's value. In other words if three layers are reported and the
            third layer uses BKN then the total state of sky is BKN which is similar in definition to
            “mostly cloudy.” OVC is similar to “cloudy” or overcast and FEW or SCT is similar to
            “partly cloudy.” In cases where there are more than 3 cloud layers, the highest layers will
            not be reported.
            Values in oktas:
            CLR:00 None, SKC or CLR
            FEW:01 One okta - 1/10 or less but not zero
            FEW:02 Two oktas - 2/10 - 3/10, or FEW
            SCT:03 Three oktas - 4/10
            SCT:04 Four oktas - 5/10, or SCT
            BKN:05 Five oktas - 6/10
            BKN:06 Six oktas - 7/10 - 8/10
            BKN:07 Seven oktas - 9/10 or more but not 10/10, or BKN
            OVC:08 Eight oktas - 10/10, or OVC
            VV:09 Sky obscured, or cloud amount cannot be estimated
            X:10 Partial obscuration

        Output: f'{GHCNh}-{year}.csv'
        """
        for year in range(start_year, end_year + 1):
            psv_name = f"GHCNh_{GHCNh}_{year}.psv"
            if not os.path.exists(psv_name):
                self.update_console.emit(f"Missing file {psv_name}, skipping year {year}.")
                continue

            df = pd.read_csv(psv_name, sep='|', low_memory=False)
            
            # Identify datetime column in GHCN-Hourly
            dt_col = None
            for cand in ['date_time', 'DATE', 'date', 'time']:
                if cand in df.columns:
                    dt_col = cand
                    break
            if dt_col is None:
                raise ValueError(
                    f"No datetime column found in GHCN-Hourly file {psv_name}; "
                    "please adjust column name detection."
                )

            df['datetime'] = pd.to_datetime(df[dt_col])

            # Helper: safely grab a column if it exists
            def col(name):
                return df[name] if name in df.columns else pd.NA

            out = pd.DataFrame()
           
            # Date/time: YYYYMMDDHHMM (no units suffix in name)
            out['YR--MODAHRMN'] = df['datetime'].dt.strftime('%Y%m%d%H%M')
            
            # Temperatures (C)
            out['temperature_C'] = col('temperature')                    # °C
            out['dew_point_temperature_C'] = col('dew_point_temperature')  # °C

            # Pressures (hPa)
            out['station_level_pressure_hPa'] = col('station_level_pressure')
            out['sea_level_pressure_hPa'] = col('sea_level_pressure')
            out['altimeter_hPa'] = col('altimeter')

            # Wind (deg, m/s)
            out['wind_direction_deg'] = col('wind_direction')            # deg
            out['wind_speed_mps'] = col('wind_speed')                    # m/s
            out['wind_gust_mps'] = col('wind_gust')                      # m/s

            # Precipitation (mm)
            out['precipitation_mm'] = col('precipitation')               # 1-hr total, mm
            out['precipitation_6_hour_mm'] = col('precipitation_6_hour') # mm
            out['precipitation_24_hour_mm'] = col('precipitation_24_hour') # mm
            out['snow_depth_mm'] = col('snow_depth') # mm
            
            # Sky cover layer 1 base height (m)
            out['sky_cover_baseht_1_m'] = col('sky_cover_baseht_1')
            out['sky_cover_1_code'] = col('sky_cover_1')

            ## Sky cover layer 1: numeric + raw code
            # if 'sky_cover_1' in df.columns:
                # sky_map = {
                    # 'CLR': 0,
                    # 'FEW': 1,
                    # 'SCT': 3,
                    # 'BKN': 5,
                    # 'OVC': 8,
                    # 'VV': 9,
                    # 'X': 10,
                # }
                # out['sky_cover_1_code'] = df['sky_cover_1']  # raw string
                # out['sky_cover_1'] = df['sky_cover_1'].map(sky_map).astype('Int64')
            # else:
                # out['sky_cover_1_code'] = pd.NA
                # out['sky_cover_1'] = pd.NA

            # Sky cover layer 2
            out['sky_cover_baseht_2_m'] = col('sky_cover_baseht_2')
            out['sky_cover_2_code'] = col('sky_cover_2')

            # Sky cover layer 3
            out['sky_cover_baseht_3_m'] = col('sky_cover_baseht_3')
            out['sky_cover_3_code'] = col('sky_cover_3')

            output_file = os.path.join(output_folder, f'{GHCNh}-{year}.csv')
            out.to_csv(output_file, index=False)
            self.update_console.emit(f"  Wrote WeatherPrep CSV: {output_file}")

    #prepare_multi_year_folder_GHCNh will prepare one folder to contain multiple years
    def prepare_multi_year_folder_GHCNh(self, wpc_path, output_folder, station_id, start_year, end_year):
        """
        station_id will now be the GHCNh-style ID (e.g., ITW00034113),
        and this will combine multiple year CSVs into a single CSV.
        """
        start_year_str = str(start_year)
        end_year_str = str(end_year)
        folder_name = os.path.join(output_folder, f'{station_id}', f'{start_year_str[2:4]}-{end_year_str[2:4]}')
        os.makedirs(folder_name, exist_ok=True)
        copy2(wpc_path, folder_name)

        if start_year != end_year:
            output_weather_csv = os.path.join(folder_name, f'{station_id}-{start_year}-{end_year}.csv')
            with open(output_weather_csv, 'w', newline='') as output_multi:
                for year in range(start_year, end_year + 1):
                    yearly_weather = os.path.join(output_folder, f'{station_id}-{year}.csv')
                    with open(yearly_weather, 'r') as year_file:
                        lines = year_file.readlines()

                        if year == start_year:
                            # write header + data for first year
                            output_multi.writelines(lines)
                        else:
                            # skip header line for subsequent years
                            output_multi.writelines(lines[1:])

        # Cleanup: remove the per-year temp CSVs in Outputs
        for year in range(start_year, end_year + 1):
            yearly_weather = os.path.join(output_folder, f'{station_id}-{year}.csv')
            if os.path.exists(yearly_weather):
                os.remove(yearly_weather)                    
        
        return folder_name
        
    #prepare_single_year_folder_GHCNh will prepare one folder to contain each year
    def prepare_single_year_folder_GHCNh(self, wpc_path, output_folder, station_id, year):
        folder_name = os.path.join(output_folder, station_id, str(year))
        os.makedirs(folder_name, exist_ok=True)
        
        # Copy the wpc_path file
        copy2(wpc_path, folder_name)
        
        # ALSO copy the actual weather CSV file for this year
        weather_csv = os.path.join(output_folder, f'{station_id}-{year}.csv')
        if os.path.exists(weather_csv):
            copy2(weather_csv, folder_name)
            # Clean up the temporary CSV file in Outputs folder
            os.remove(weather_csv)
        
        return folder_name

    #update_multi_year_config_file_GHCNh will prepare one WeatherPrepConfig.xml to contain multiple years
    def update_multi_year_config_file_GHCNh(self, weatherprep_path, folder_name, station_id, start_year, end_year,
                           country, state, county, place,
                           model, max_lai, evergreen_percent, vegetation_type, tree_height, wind_sensor_height, precip_file_path):
        # Construct the file path for the config file
        config_file = os.path.join(folder_name, 'WeatherPrepConfig.xml')
        print(f"Updating config file: {config_file}")

        # Parse the XML file with lxml to keep comments
        parser = ET.XMLParser(remove_blank_text=True, remove_comments=False)
        tree = ET.parse(config_file, parser)
        root = tree.getroot()
        # A function to safely update text in XML elements
        def update_text(element, text):
            if element is not None:
                element.text = str(text)
            else:
                print(f"Warning: Attempted to update a non-existent element for {text}")

        # Update year-related information
        update_text(root.find('.//StartYear'), start_year)
        update_text(root.find('.//EndYear'), end_year)

        # Update model and path information
        update_text(root.find('.//Model'), model)
        update_text(root.find('.//SurfaceWeatherDataFile'), os.path.join(folder_name, f'{station_id}-{start_year}-{end_year}.csv'))

        # Update location information
        update_text(root.find('.//Nation'), country)
        update_text(root.find('.//State'), state)
        update_text(root.find('.//County'), county)
        update_text(root.find('.//Place'), place)

        # Update vegetation information
        update_text(root.find('.//MaximumLAI'), max_lai)
        update_text(root.find('.//EvergreenPercent'), evergreen_percent)
        update_text(root.find('.//VegetationType'), vegetation_type)
        update_text(root.find('.//Height_Tree_m'), tree_height)
        update_text(root.find('.//Height_WindSensor_m'), wind_sensor_height)

        # Update precipitation file path only if provided
        if precip_file_path is not None:
            update_text(root.find('.//PrecipitationDataCsv'), precip_file_path)
        else:
            print("No precipitation file path provided; existing path is preserved.")
            update_text(root.find('.//PrecipitationDataCsv'), '')

        #Copy comments from the template WeatherPrepConfig.xml (wpc_path) ---
        #CLEAR_EXISTING set to True to wipe any pre-existing comments in the target before copying.
        CLEAR_EXISTING = True
        wpc_path = RESOURCES_DIR / "WeatherPrepConfig.xml"
        #copy_comments_from_template function called: Use the actual template path you want; in this flow the template is 'wpc_path'.
        return_value = copy_comments_from_template(wpc_path, tree, clear_existing=CLEAR_EXISTING)
        
        # Save the updated XML file with pretty print
        tree.write(config_file, pretty_print=True, xml_declaration=True, encoding='UTF-8')
        print("Config file has been updated.")

        # Hide command prompt window on Windows OS
        if sys.platform.startswith('win'):
            startupinfo = STARTUPINFO()
            startupinfo.dwFlags |= STARTF_USESHOWWINDOW
            startupinfo.wShowWindow = SW_HIDE
            result = subprocess.run([weatherprep_path, folder_name], stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                                    text=True, startupinfo=startupinfo)
        else:
            result = subprocess.run([weatherprep_path, folder_name], stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                                    text=True)

        print(f"Subprocess finished with status: {result.returncode}")
        return result

    #update_single_year_config_file_GHCNh will prepare one WeatherPrepConfig.xml to contain single years
    def update_single_year_config_file_GHCNh(self, weatherprep_path, folder_name, station_id, year,
                           country, state, county, place,
                           model, max_lai, evergreen_percent, vegetation_type, tree_height, wind_sensor_height,
                           precip_file_path):
        # Construct the file path for the config file
        config_file = os.path.join(folder_name, 'WeatherPrepConfig.xml')
        print(f"Updating config file: {config_file}")

        # Parse the XML file with lxml to keep comments
        parser = ET.XMLParser(remove_blank_text=True, remove_comments=False)
        tree = ET.parse(config_file, parser)
        root = tree.getroot()

        # A function to safely update text in XML elements
        def update_text(element, text):
            if element is not None:
                element.text = str(text)
            else:
                print(f"Warning: Attempted to update a non-existent element for {text}")

        # Update year-related information
        update_text(root.find('.//StartYear'), year)
        update_text(root.find('.//EndYear'), year)

        # Update model and path information
        update_text(root.find('.//Model'), model)
        update_text(root.find('.//SurfaceWeatherDataFile'), os.path.join(folder_name, f'{station_id}-{year}.csv'))


        # Update location information
        update_text(root.find('.//Nation'), country)
        update_text(root.find('.//State'), state)
        update_text(root.find('.//County'), county)
        update_text(root.find('.//Place'), place)

        # Update vegetation information
        update_text(root.find('.//MaximumLAI'), max_lai)
        update_text(root.find('.//EvergreenPercent'), evergreen_percent)
        update_text(root.find('.//VegetationType'), vegetation_type)
        update_text(root.find('.//Height_Tree_m'), tree_height)
        update_text(root.find('.//Height_WindSensor_m'), wind_sensor_height)

        # Update precipitation file path only if provided
        if precip_file_path is not None:
            update_text(root.find('.//PrecipitationDataCsv'), precip_file_path)
        else:
            print("No precipitation file path provided; existing path is preserved.")
            update_text(root.find('.//PrecipitationDataCsv'), '')

        #Copy comments from the template WeatherPrepConfig.xml (wpc_path) ---
        #CLEAR_EXISTING set to True to wipe any pre-existing comments in the target before copying.
        CLEAR_EXISTING = True
        wpc_path = RESOURCES_DIR / "WeatherPrepConfig.xml"
        #copy_comments_from_template function called: Use the actual template path you want; in this flow the template is 'wpc_path'.
        return_value = copy_comments_from_template(wpc_path, tree, clear_existing=CLEAR_EXISTING)
        
        # Save the updated XML file with pretty print
        tree.write(config_file, pretty_print=True, xml_declaration=True, encoding='UTF-8')
        print("Config file has been updated.")
        # Hide command prompt window on Windows OS
        if sys.platform.startswith('win'):
            startupinfo = STARTUPINFO()
            startupinfo.dwFlags |= STARTF_USESHOWWINDOW
            startupinfo.wShowWindow = SW_HIDE
            result = subprocess.run([weatherprep_path, folder_name], stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                                    text=True, startupinfo=startupinfo)
        else:
            result = subprocess.run([weatherprep_path, folder_name], stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                                    text=True)

        print(f"Subprocess finished with status: {result.returncode}")
        return result

    #delete_downloaded_files_GHCNh will delete the GHCNh psv file
    def delete_downloaded_files_GHCNh(self, GHCNh, year):
        if os.path.exists(f"GHCNh_{GHCNh}_{year}.psv"):
            os.remove(f"GHCNh_{GHCNh}_{year}.psv")
        if os.path.exists(f"GHCNh_{GHCNh}_{year}.csv"):
            os.remove(f"GHCNh_{GHCNh}_{year}.csv")
            
    #subhourly_processing will process hourly to sub-hourly for GHCNh or ISD
    def subhourly_processing(self, start_ymdh, end_ymdh, subhour_interval, folder_path):
        #Make new folder
        output_folder_path = os.path.join(folder_path, 'Subhourly')
        os.makedirs(output_folder_path, exist_ok=True)
        #Define file list
        meteo_list = ['Evaporation', 'Radiation', 'Weather']
        # Convert start_ymdh and end_ymdh to datetime objects
        start_date = pd.to_datetime(start_ymdh, format='%Y%m%d%H')
        end_date = pd.to_datetime(end_ymdh, format='%Y%m%d%H')

        for meteo in meteo_list:
            file_path = os.path.join(folder_path, f"{meteo}.csv")
            df_sh = pd.read_csv(file_path, parse_dates={'Datetime': ['YYYYMMDD', 'HH:MM:SS']})

            # Store the original header
            original_header = list(df_sh.columns)

            # Convert the 'Datetime' column to the desired format 'YYYYMMDD'
            df_sh['Datetime'] = pd.to_datetime(df_sh['Datetime'], format='%Y%m%d %H:%M:%S')

            # Filter data based on user-provided date range
            mask = (df_sh['Datetime'] >= start_date) & (
                        df_sh['Datetime'] <= (pd.to_datetime(end_date) + pd.Timedelta(days=1)))
            df_sh = df_sh[mask]

            # Resample data based on the specified interval
            df_sh.set_index('Datetime', inplace=True)
            interval_str = f'{subhour_interval}T'
            df_sh_resampled = df_sh.resample(interval_str).mean()

            # Use linear interpolation to fill data gaps
            df_sh_resampled.interpolate(method='linear', inplace=True)

            # Reset the index to restore the original 'Datetime' column
            df_sh_resampled.reset_index(inplace=True)

            # Create separate columns for 'YYYYMMDD' and 'HH:MM:SS'
            df_sh_resampled['YYYYMMDD'] = df_sh_resampled['Datetime'].dt.strftime('%Y%m%d')
            df_sh_resampled['HH:MM:SS'] = df_sh_resampled['Datetime'].dt.strftime('%H:%M:%S')

            # Drop the original 'Datetime' column
            df_sh_resampled.drop(columns=['Datetime'], inplace=True)

            # Restore the original header, excluding 'Datetime'
            df_sh_resampled = df_sh_resampled[
                ['YYYYMMDD', 'HH:MM:SS'] + [col for col in original_header if col != 'Datetime']]  # Reorder columns

            output_file_path = os.path.join(output_folder_path, f"{meteo}.csv")
            df_sh_resampled.to_csv(output_file_path, index=False, header=True, date_format='%Y%m%d %H:%M:%S')
            

#~~~ISD data processing ~~~~~~~~~~~~~~~~~~
    #download_noaa_data_ISD will download NOAA NCEI ISD 
    def download_noaa_data_ISD(self, usaf_wban, start_year, end_year):
        try:
            # FTP address and login
            ftp = ftplib.FTP('ftp.ncei.noaa.gov')
            ftp.login()
            # Navigate to present year
            for year in range(start_year, end_year + 1):
                ftp.cwd(f'/pub/data/noaa/{year}')
                file_name = f'{usaf_wban}-{year}.gz'
                # Open and write to local directory
                with open(file_name, 'wb') as f:
                    ftp.retrbinary(f'RETR {file_name}', f.write)
            self.flag_failure = False
            ftp.quit()
        except ftplib.error_perm as e:
            if "530" in str(e):
                self.update_console.emit(f"FTP Connection Error: {e}\nServer-side connection may be down. Please try again later.")
                self.flag_failure = True
            else:
                self.update_console.emit(f"FTP Connection Error: {e}")
                self.flag_failure = True

        except Exception as e:
            # Handle other exceptions
            self.update_console.emit(f"Error during download: {e}")
            self.flag_failure = True

    #unzip_gz_file_ISD unzips downloaded ISD; the gz contains a ISD file without any extension
    def unzip_gz_file_ISD(self, usaf_wban, start_year, end_year):
        for year in range(start_year, end_year + 1):
            file_name = f'{usaf_wban}-{year}.gz'
            # Unzip GZ to 'file' for input into ishapp
            with gzip.open(file_name, 'rb') as f_in:
                unzipped_file_name = file_name.replace('.gz', '')
                with open(unzipped_file_name, 'wb') as f_out:
                    shutil.copyfileobj(f_in, f_out)

    #run_ishapp2_ISD converts the ISD to ISH format and _ish is added to end of file name to distinguish it
    def run_ishapp2_ISD(self, ishapp_path, usaf_wban, start_year, end_year):
        for year in range(start_year, end_year + 1):
            #Note: Working directory is WeatherPrep\Outputs
            #print("Current working directory:", os.getcwd())
            input_file = f'{usaf_wban}-{year}'
            output_file = f'{usaf_wban}-{year}_ish.txt'

            #Check if OS is Windows to hide ishapp window
            if sys.platform.startswith('win'):
                # Creating a STARTUPINFO object to modify the visibility of the subprocess
                startupinfo = STARTUPINFO()
                startupinfo.dwFlags |= STARTF_USESHOWWINDOW
                startupinfo.wShowWindow = SW_HIDE
                subprocess.run([ishapp_path, input_file, output_file],
                               stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
                               startupinfo=startupinfo)
            else:
                subprocess.run([ishapp_path, input_file, output_file],
                               stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
                               
    #prepare_multi_year_folder_ISD will prepare one folder to contain multiple years
    def prepare_multi_year_folder_ISD(self, wpc_path, output_folder, usaf_wban, start_year, end_year):
        start_year_str = str(start_year)
        end_year_str = str(end_year)
        folder_name = os.path.join(output_folder, f'{usaf_wban}', f'{start_year_str[2:4]}-{end_year_str[2:4]}')
        os.makedirs(folder_name, exist_ok=True)

        # Copy WeatherPrepConfig XML into the final folder
        copy2(wpc_path, folder_name)

        # -------- SINGLE YEAR CASE --------
        if start_year == end_year:
            src_weather = os.path.join(output_folder, f'{usaf_wban}-{start_year}_ish.txt')
            dst_weather = os.path.join(folder_name, f'{usaf_wban}-{start_year}_ish.txt')

            if os.path.exists(src_weather):
                copy2(src_weather, dst_weather)
            else:
                print(f"ERROR: Expected weather file not found: {src_weather}")

            # cleanup temp file in Outputs/
            if os.path.exists(src_weather):
                os.remove(src_weather)

            return folder_name

        # -------- MULTI-YEAR CASE --------
        output_weather_txt = os.path.join(folder_name, f'{usaf_wban}-{start_year}-{end_year}_ish.txt')
        with open(output_weather_txt, 'w') as output_multi:
            for year in range(start_year, end_year + 1):
                yearly_weather = os.path.join(output_folder, f'{usaf_wban}-{year}_ish.txt')
                with open(yearly_weather, 'r') as year_file:
                    lines = year_file.readlines()
                    if year == start_year:
                        output_multi.writelines(lines)
                    else:
                        output_multi.writelines(lines[1:])

        # cleanup temp ISD files
        for year in range(start_year, end_year + 1):
            yearly_weather = os.path.join(output_folder, f'{usaf_wban}-{year}_ish.txt')
            if os.path.exists(yearly_weather):
                os.remove(yearly_weather)

        return folder_name

    #prepare_single_year_folder_ISD will prepare one folder to contain each year
    def prepare_single_year_folder_ISD(self, wpc_path, output_folder, usaf_wban, year):
        folder_name = os.path.join(output_folder, usaf_wban, str(year))
        os.makedirs(folder_name, exist_ok=True)
        
        # Copy WeatherPrepConfig XML into the final folder
        copy2(wpc_path, folder_name)
        
        # ALSO copy the actual weather data file for this year
        weather_file = os.path.join(output_folder, f'{usaf_wban}-{year}_ish.txt')
        if os.path.exists(weather_file):
            copy2(weather_file, folder_name)
            # Clean up the temporary file in Outputs folder
            os.remove(weather_file)
        
        return folder_name

    #update_multi_year_config_file_ISD will prepare one WeatherPrepConfig.xml to contain multiple years
    def update_multi_year_config_file_ISD(self, weatherprep_path, folder_name, usaf_wban, start_year, end_year,
                           country, state, county, place,
                           model, max_lai, evergreen_percent, vegetation_type, tree_height, wind_sensor_height, precip_file_path):
        # Construct the file path for the config file
        config_file = os.path.join(folder_name, 'WeatherPrepConfig.xml')
        print(f"Updating config file: {config_file}")

        # Parse the XML file with lxml to keep comments
        parser = ET.XMLParser(remove_blank_text=True, remove_comments=False)
        tree = ET.parse(config_file, parser)
        root = tree.getroot()
        # A function to safely update text in XML elements
        def update_text(element, text):
            if element is not None:
                element.text = str(text)
            else:
                print(f"Warning: Attempted to update a non-existent element for {text}")

        # Update year-related information
        update_text(root.find('.//StartYear'), start_year)
        update_text(root.find('.//EndYear'), end_year)

        # Update model and path information
        update_text(root.find('.//Model'), model)
        update_text(root.find('.//SurfaceWeatherDataFile'), os.path.join(folder_name, f'{usaf_wban}-{start_year}-{end_year}_ish.txt'))

        # Update location information
        update_text(root.find('.//Nation'), country)
        update_text(root.find('.//State'), state)
        update_text(root.find('.//County'), county)
        update_text(root.find('.//Place'), place)

        # Update vegetation information
        update_text(root.find('.//MaximumLAI'), max_lai)
        update_text(root.find('.//EvergreenPercent'), evergreen_percent)
        update_text(root.find('.//VegetationType'), vegetation_type)
        update_text(root.find('.//Height_Tree_m'), tree_height)
        update_text(root.find('.//Height_WindSensor_m'), wind_sensor_height)

        # Update precipitation file path only if provided
        if precip_file_path is not None:
            update_text(root.find('.//PrecipitationDataCsv'), precip_file_path)
        else:
            print("No precipitation file path provided; existing path is preserved.")
            update_text(root.find('.//PrecipitationDataCsv'), '')

        #Copy comments from the template WeatherPrepConfig.xml (wpc_path) ---
        #CLEAR_EXISTING set to True to wipe any pre-existing comments in the target before copying.
        CLEAR_EXISTING = True
        wpc_path = RESOURCES_DIR / "WeatherPrepConfig.xml"
        #copy_comments_from_template function called: Use the actual template path you want; in this flow the template is 'wpc_path'.
        return_value = copy_comments_from_template(wpc_path, tree, clear_existing=CLEAR_EXISTING)
        
        # Save the updated XML file with pretty print
        tree.write(config_file, pretty_print=True, xml_declaration=True, encoding='UTF-8')
        print("Config file has been updated.")

        # Hide command prompt window on Windows OS
        if sys.platform.startswith('win'):
            startupinfo = STARTUPINFO()
            startupinfo.dwFlags |= STARTF_USESHOWWINDOW
            startupinfo.wShowWindow = SW_HIDE
            result = subprocess.run([weatherprep_path, folder_name], stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                                    text=True, startupinfo=startupinfo)
        else:
            result = subprocess.run([weatherprep_path, folder_name], stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                                    text=True)

        print(f"Subprocess finished with status: {result.returncode}")
        return result

    #update_single_year_config_file_ISD will prepare one WeatherPrepConfig.xml to contain single years
    def update_single_year_config_file_ISD(self, weatherprep_path, folder_name, usaf_wban, year,
                           country, state, county, place,
                           model, max_lai, evergreen_percent, vegetation_type, tree_height, wind_sensor_height,
                           precip_file_path):
        # Construct the file path for the config file
        config_file = os.path.join(folder_name, 'WeatherPrepConfig.xml')
        print(f"Updating config file: {config_file}")

        # Parse the XML file with lxml to keep comments
        parser = ET.XMLParser(remove_blank_text=True, remove_comments=False)
        tree = ET.parse(config_file, parser)
        root = tree.getroot()

        # A function to safely update text in XML elements
        def update_text(element, text):
            if element is not None:
                element.text = str(text)
            else:
                print(f"Warning: Attempted to update a non-existent element for {text}")

        # Update year-related information
        update_text(root.find('.//StartYear'), year)
        update_text(root.find('.//EndYear'), year)

        # Update model and path information
        update_text(root.find('.//Model'), model)
        update_text(root.find('.//SurfaceWeatherDataFile'), os.path.join(folder_name, f'{usaf_wban}-{year}_ish.txt'))

        # Update location information
        update_text(root.find('.//Nation'), country)
        update_text(root.find('.//State'), state)
        update_text(root.find('.//County'), county)
        update_text(root.find('.//Place'), place)

        # Update vegetation information
        update_text(root.find('.//MaximumLAI'), max_lai)
        update_text(root.find('.//EvergreenPercent'), evergreen_percent)
        update_text(root.find('.//VegetationType'), vegetation_type)
        update_text(root.find('.//Height_Tree_m'), tree_height)
        update_text(root.find('.//Height_WindSensor_m'), wind_sensor_height)

        # Update precipitation file path only if provided
        if precip_file_path is not None:
            update_text(root.find('.//PrecipitationDataCsv'), precip_file_path)
        else:
            print("No precipitation file path provided; existing path is preserved.")
            update_text(root.find('.//PrecipitationDataCsv'), '')

        #Copy comments from the template WeatherPrepConfig.xml (wpc_path) ---
        #CLEAR_EXISTING set to True to wipe any pre-existing comments in the target before copying.
        CLEAR_EXISTING = True
        wpc_path = RESOURCES_DIR / "WeatherPrepConfig.xml"
        #copy_comments_from_template function called: Use the actual template path you want; in this flow the template is 'wpc_path'.
        return_value = copy_comments_from_template(wpc_path, tree, clear_existing=CLEAR_EXISTING)
        
        # Save the updated XML file with pretty print
        tree.write(config_file, pretty_print=True, xml_declaration=True, encoding='UTF-8')
        print("Config file has been updated.")
        # Hide command prompt window on Windows OS
        if sys.platform.startswith('win'):
            startupinfo = STARTUPINFO()
            startupinfo.dwFlags |= STARTF_USESHOWWINDOW
            startupinfo.wShowWindow = SW_HIDE
            result = subprocess.run([weatherprep_path, folder_name], stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                                    text=True, startupinfo=startupinfo)
        else:
            result = subprocess.run([weatherprep_path, folder_name], stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                                    text=True)

        print(f"Subprocess finished with status: {result.returncode}")
        return result

    #delete_downloaded_files_ISD deletes unneeded ISD files
    def delete_downloaded_files_ISD(self, usaf_wban, year):
        if os.path.exists(f'{usaf_wban}-{year}.gz'):
            os.remove(f'{usaf_wban}-{year}.gz')
        if os.path.exists(f'{usaf_wban}-{year}'):
            os.remove(f'{usaf_wban}-{year}')

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # ~~~~~~~~~~~~~~TASK HANDLER FOR SAFE TERMINATION ~~~~~~~~~~~~~~~
    def request_stop(self):
        self.stop_requested = True

    # ~~~~~~~~~~~~~~~~~ TASK ~~~~~~~~~~~~~~~~~~
    def run_task(self):
        ishapp_path = ROOT_DIR / "NOAA_ISD_ishapp2" / "ishapp2.exe"

        weatherprep_path = ROOT_DIR / "WeatherPrep" / "WeatherPrep" / "bin" / "Release" / "WeatherPrep.exe"
        
        wpc_path = RESOURCES_DIR / "WeatherPrepConfig.xml"

        output_folder = ROOT_DIR / "Outputs"
        output_folder.mkdir(exist_ok=True)
        os.chdir(output_folder)

        # Required params
        usaf_wban = self.params['usaf_wban']
        start_year = int(self.params['start_year'])
        end_year = int(self.params['end_year'])
        country = self.params['country']
        state = self.params['state']
        county = self.params['county']
        place = self.params['place']

        # Advanced XML params
        model = self.params['model']
        max_lai = self.params['max_lai']
        evergreen_percent = self.params['evergreen_percent']
        vegetation_type = self.params['vegetation_type']
        tree_height = self.params['tree_height']
        wind_sensor_height = self.params['wind_sensor_height']
        precip_file_path = self.params['precip_file']
        
        #multiyear_flag checkbox from UI to combine multiple years into a single output
        #Note: Useful to ensure time zone adjustment only affects start of first and end of last year of a multiple year run
        multiyear_flag = self.params['multiyear_flag']
        # ---Define data_source for worker class
        data_source = self.params.get('data_source', 'GHCNh')

        start_ymdh = self.params.get('start_ymdh')
        end_ymdh = self.params.get('end_ymdh')
        subhour_interval = self.params.get('subhour_interval')

        # NEW: Latitude/Longitude passed from GUI
        lat_dd = self.params.get('lat_dd')
        lon_dd = self.params.get('lon_dd')
        
        flag_run_successful = False

        # 1) GHCN-Hourly path (now matches: 'GHCNh')
        # GHCN-HOURLY PIPELINE
        #if data_source.startswith("GHCNh") then working with latest GHCNh format
        if data_source.startswith("GHCNh"):
            if lat_dd is None or lon_dd is None:
                self.update_console.emit(
                    "GHCN-Hourly station selection requires latitude and longitude."
                )
                return

            # Build GHCN-Hourly station ID from attributes file
            station_id, usaf, wban, station_end_year = self.build_GHCNh_id(
                usaf_wban,
                country,
                state,
                county,
                place,
                lat_dd,
                lon_dd
            )
            self.update_console.emit(f"Using GHCN-Hourly station ID {station_id}...")

            self.update_console.emit(
                f"Downloading GHCN-Hourly weather data (HTTPS) for {station_id}..."
            )
            self.download_GHCNh_data(station_id, start_year, end_year, station_end_year)

            if not self.flag_failure:
                # Existing logic: convert PSV to WeatherPrep CSV, then folder/config prep…
                self.update_console.emit("Converting GHCN-Hourly PSV to WeatherPrep CSV...")
                self.GHCNh_psv_to_weatherprep_csv(output_folder, station_id, usaf, wban, start_year, end_year)
                if multiyear_flag:
                    self.update_console.emit(f'Preparing folder for {station_id} (multi-year GHCN)...')
                    meteorological_data_download_path = self.prepare_multi_year_folder_GHCNh(
                        wpc_path, output_folder, station_id, start_year, end_year
                    )

                    self.update_console.emit(f'Running WeatherPrep for {station_id}...')
                    #result updated for multi
                    result = self.update_multi_year_config_file_GHCNh(
                        weatherprep_path, meteorological_data_download_path, station_id, start_year, end_year,
                        country, state, county, place,
                        model, max_lai, evergreen_percent, vegetation_type,
                        tree_height, wind_sensor_height, precip_file_path
                    )
                    #flag_run_successful updated
                    flag_run_successful = (result.returncode == 0)
                        
                    for year in range(start_year, end_year + 1):
                        self.update_console.emit('Deleting intermediate GHCN PSV files...')
                        self.delete_downloaded_files_GHCNh(station_id, year)
                else:
                    for year in range(start_year, end_year + 1):
                        self.update_console.emit(f'Preparing folder for {station_id}-{year} (GHCN)...')
                        meteorological_data_download_path = self.prepare_single_year_folder_GHCNh(
                            wpc_path, output_folder, station_id, year
                        )

                        self.update_console.emit(f'Running WeatherPrep for {station_id}-{year}...')
                        #result updated for single
                        result = self.update_single_year_config_file_GHCNh(
                            weatherprep_path, meteorological_data_download_path, station_id, year,
                            country, state, county, place,
                            model, max_lai, evergreen_percent, vegetation_type,
                            tree_height, wind_sensor_height, precip_file_path
                        )
                        #flag_run_successful updated
                        ok = (result.returncode == 0)
                        flag_run_successful = flag_run_successful or ok

                        self.update_console.emit('Deleting intermediate GHCN PSV files...')
                        self.delete_downloaded_files_GHCNh(station_id, year)

        # 2) ERA5-only / ERA5+blend modes
        elif data_source.startswith("ERA5"):
            self.update_console.emit(f"Selected data source: {data_source} (ERA5 mode).")

            lat_dd = self.params.get('lat_dd')
            lon_dd = self.params.get('lon_dd')

            start_year = self.params.get('start_year')
            end_year   = self.params.get('end_year')

            # NEW: date-range fields
            start_month = self.params.get('era5_start_month')
            start_day   = self.params.get('era5_start_day')
            end_month   = self.params.get('era5_end_month')
            end_day     = self.params.get('era5_end_day')

            # Run ERA5 pipeline (download + NetCDF->CSV + XML update)
            meteorological_data_download_path, ok = self.run_era5_pipeline(
                start_year=start_year,
                end_year=end_year,
                model=model,
                max_lai=max_lai,
                evergreen_percent=evergreen_percent,
                vegetation_type=vegetation_type,
                tree_height=tree_height,
                wind_sensor_height=wind_sensor_height,
                precip_file_path=precip_file_path,
                country=country,
                state=state,
                county=county,
                place=place,
                lat_dd=lat_dd,
                lon_dd=lon_dd,
                output_root=output_folder,
                start_month=start_month,
                start_day=start_day,
                end_month=end_month,
                end_day=end_day,
            )

            flag_run_successful = ok

            if not ok:
                self.update_console.emit(
                    "ERA5 processing encountered an error. See messages above for details."
                )
                self.finished.emit()
                return

        # 3) ISD paths (V1 legacy + V2 improved)
        # ISD LEGACY PIPELINE
        #elif data_source.startswith("ISD") then working with legacy ISD format
        elif data_source.startswith("ISD"):

            self.update_console.emit(f'Downloading ISD weather data (FTP) for {usaf_wban}...')
            self.download_noaa_data_ISD(usaf_wban, start_year, end_year)

            if not self.flag_failure:
                self.update_console.emit('Unzipping .gz files...')
                self.unzip_gz_file_ISD(usaf_wban, start_year, end_year)

                self.update_console.emit('Running ishapp2.exe (NOAA stage2)...')
                self.run_ishapp2_ISD(ishapp_path, usaf_wban, start_year, end_year)

                #if multiyear_flag true then multiple years of ISD are combined into a single folder, file, and WeatherPrepConfig.xml
                if multiyear_flag:
                    self.update_console.emit(f'Preparing folder for {usaf_wban} (multi-year ISD)...')
                    meteorological_data_download_path = self.prepare_multi_year_folder_ISD(wpc_path, output_folder, usaf_wban, start_year, end_year)

                    self.update_console.emit(f'Running WeatherPrep for {usaf_wban}...')
                    #result updated for multi
                    result = self.update_multi_year_config_file_ISD(
                        weatherprep_path, meteorological_data_download_path, usaf_wban, start_year, end_year,
                        country, state, county, place,
                        model, max_lai, evergreen_percent, vegetation_type,
                        tree_height, wind_sensor_height, precip_file_path
                    )
                    #flag_run_successful updated
                    flag_run_successful = (result.returncode == 0)

                    for year in range(start_year, end_year + 1):
                        self.update_console.emit('Deleting intermediate ISD files...')
                        self.delete_downloaded_files_ISD(usaf_wban, year)
                        
                #else if multiyear_flag false then a single year of ISD is in a single folder, file, and WeatherPrepConfig.xml
                else:
                    for year in range(start_year, end_year + 1):
                        self.update_console.emit(f'Preparing folder for {usaf_wban}-{year} (ISD)...')
                        meteorological_data_download_path = self.prepare_single_year_folder_ISD(wpc_path, output_folder, usaf_wban, year)

                        self.update_console.emit(f'Running WeatherPrep for {usaf_wban}-{year}...')
                        #result updated for single
                        result = self.update_single_year_config_file_ISD(
                            weatherprep_path, meteorological_data_download_path, usaf_wban, year,
                            country, state, county, place,
                            model, max_lai, evergreen_percent, vegetation_type,
                            tree_height, wind_sensor_height, precip_file_path
                        )
                        #flag_run_successful updated
                        ok = (result.returncode == 0)
                        flag_run_successful = flag_run_successful or ok

                        self.update_console.emit('Deleting intermediate ISD files...')
                        self.delete_downloaded_files_ISD(usaf_wban, year)

        # -------------------------
        # Sub-hourly resampling & final messages
        # -------------------------
        if flag_run_successful and start_ymdh and end_ymdh:
            self.update_console.emit('Creating sub-hourly weather files...')
            self.subhourly_processing(start_ymdh, end_ymdh, subhour_interval, meteorological_data_download_path)
            output_folder_path = os.path.join(meteorological_data_download_path, 'Subhourly')
            self.update_console.emit('\nWeather processing completed.')
            self.update_console.emit(f'Check output files at: {output_folder_path}')
            self.update_console.emit(f'Thank you for using i-Tree tools to improve the world!')
        elif flag_run_successful:
            self.update_console.emit('\nWeather processing completed.')
            output_folder_path = meteorological_data_download_path
            self.update_console.emit(f'Check output files at: {output_folder_path}')
            self.update_console.emit(f'Thank you for using i-Tree tools to improve the world!')
        else:
            self.update_console.emit('\nWeather processing could not complete. See console/log for details.')

        self.finished.emit()


        print("\n")
        print("Weather processing complete. To release command prompt window, exit the GUI.")
        print("Thank you for using i-Tree tools to improve the world!")
        
#MAIN Widget
class WeatherProcessorApp(QtWidgets.QWidget):
    def __init__(self):
        super().__init__()
        # Mostly intialize empty params
        self.tab_widget = None
        self.console = None
        self.process_button = None
        self.data_table = None
        self.advanced_xml_cb = None
        self.reset_button = None
        self.worker = None
        self.xmlWindow = None
        self.country_dropdown = None
        self.state_dropdown = None
        self.county_dropdown = None
        self.place_dropdown = None
        self.lat_edit = None
        self.lon_edit = None

        csv_file_loc = RESOURCES_DIR / "itree-locations.csv"
        csv_file_USAF_WBAN = RESOURCES_DIR / "usaf_wban-history.csv"
        csv_file_inventory = RESOURCES_DIR / "usaf_wban-inventory.csv"
        self.df_loc = pd.read_csv(csv_file_loc)
        self.df_USAF_WBAN = pd.read_csv(csv_file_USAF_WBAN, dtype = {'USAF': str, 'WBAN': str})
        # Specify the data types for the columns in usaf_wban-inventory.csv
        dtype_inventory = {
            'USAF': str,
            'WBAN': str,
            'YEAR': int,
            'JAN': float,
            'FEB': float,
            'MAR': float,
            'APR': float,
            'MAY': float,
            'JUN': float,
            'JUL': float,
            'AUG': float,
            'SEP': float,
            'OCT': float,
            'NOV': float,
            'DEC': float
        }

        self.df_inv = pd.read_csv(csv_file_inventory, dtype=dtype_inventory)
        self.initUI()
        self.radio_button_group = QtWidgets.QButtonGroup()
        # Set up thread now in case user closes code before initializing worker
        self.thread = None
        self.xmlConfigWindow = XMLConfigWindow()

    # Advanced XML Set-up
    def toggleXMLWindow(self, state):
        if state == QtCore.Qt.Checked:
            if not self.xmlWindow:  # Create the window if it does not exist
                self.xmlWindow = XMLConfigWindow()  # Removed self to not set parent
                self.xmlWindow.setGeometry(900, 300, 250, 200)  # Adjust position and size
            self.xmlWindow.show()  # Show the XML Configuration window
        else:
            if self.xmlWindow:
                self.xmlWindow.close()  # Close the XML Configuration window

    # ~~~~~~~~~~~~~~~~~~~ UI CODE ~~~~~~~~~~~~~~~~~~~
    def try_disconnect_signal(self, dropdown):
        # Disconnects index signals in reset_apps
        try:
            dropdown.currentIndexChanged.disconnect()
        except TypeError:
            pass

    def initialize_dropdowns(self):
        # Get unique nation names and sort them
        nations = sorted(self.df_loc['NationName'].unique())
        # Populate the country dropdown with sorted nation names
        nation_list = ['--Select--'] + nations
        self.country_dropdown.addItems(nation_list)
        self.country_dropdown.currentIndexChanged.connect(self.on_country_changed)
        self.country_dropdown.setEnabled(True)
        # Initially, disable other dropdowns
        self.state_dropdown.setEnabled(False)
        self.county_dropdown.setEnabled(False)
        self.place_dropdown.setEnabled(False)
        
        # Auto-select the desired location
        self.auto_select_location("United States of America", "New York", "Onondaga", "Syracuse")

    def auto_select_location(self, country, state, county, place):
        """Automatically select the specified location in the dropdowns"""
        # Select country
        country_index = self.country_dropdown.findText(country)
        if country_index >= 0:
            self.country_dropdown.setCurrentIndex(country_index)
            
            # Wait a bit for the state dropdown to populate, then select state
            QtCore.QTimer.singleShot(100, lambda: self._select_state(state, county, place))

    def _select_state(self, state, county, place):
        """Select state after country is selected"""
        state_index = self.state_dropdown.findText(state)
        if state_index >= 0:
            self.state_dropdown.setCurrentIndex(state_index)
            
            # Wait a bit for the county dropdown to populate, then select county
            QtCore.QTimer.singleShot(100, lambda: self._select_county(county, place))

    def _select_county(self, county, place):
        """Select county after state is selected"""
        county_index = self.county_dropdown.findText(county)
        if county_index >= 0:
            self.county_dropdown.setCurrentIndex(county_index)
            
            # Wait a bit for the place dropdown to populate, then select place
            QtCore.QTimer.singleShot(100, lambda: self._select_place(place))

    def _select_place(self, place):
        """Select place after county is selected"""
        place_index = self.place_dropdown.findText(place)
        if place_index >= 0:
            self.place_dropdown.setCurrentIndex(place_index)

    def reset_app(self):
        # Resetting dropdowns
        self.place_dropdown.clear()
        self.county_dropdown.clear()
        self.state_dropdown.clear()
        self.country_dropdown.clear()
        self.try_disconnect_signal(self.country_dropdown)
        self.try_disconnect_signal(self.state_dropdown)
        self.try_disconnect_signal(self.county_dropdown)
        self.try_disconnect_signal(self.place_dropdown)
        
        if self.lat_edit is not None:
            self.lat_edit.clear()
        if self.lon_edit is not None:
            self.lon_edit.clear()

        # Optionally, re-initialize the dropdowns or other parts of the app as needed
        self.initialize_dropdowns()

        # Clear any other data or fields as needed
        self.data_table.setRowCount(0)
        self.hourly_start_year_entry.clear()
        self.hourly_end_year_entry.clear()
        self.sub_hourly_end_entry.clear()
        self.sub_hourly_start_entry.clear()


        # Add any other reset logic here

    def create_centered_widget(self, widget):
        # Create a QWidget and a QHBoxLayout
        widget_holder = QtWidgets.QWidget()
        layout = QtWidgets.QHBoxLayout()

        # Add the widget (e.g., a radio button) to the layout
        layout.addWidget(widget)

        # Set alignment to center
        layout.setAlignment(QtCore.Qt.AlignCenter)

        # Set the layout margins to 0 for the widget to occupy the full space
        layout.setContentsMargins(0, 0, 0, 0)

        # Set the layout to the widget holder
        widget_holder.setLayout(layout)

        return widget_holder


    def update_console(self, text):
        self.console.append(text)

    #cte 2025 remove command prompt output and as it is limited to historical year, and not relevant to ERA5.
    #... Modify the computation of good for only when non-ERA5 is chosen?
    #... Redesign GUI to ask for data source 1st, and if non-ERA5 then this is activated?
    # ~~~~~~~~~~~~~~~ LOCATION DROPDOWN ~~~~~~~~~~~~~~~~
    def calculate_distance(self, lat1, lon1, lat2, lon2):
        # Simplistic distance calculation, could be replaced with a more accurate method
        return ((lat1 - lat2) ** 2 + (lon1 - lon2) ** 2) ** 0.5

    def on_country_changed(self, index):
        if index != -1:  # Check if a selection is made
            country = self.country_dropdown.currentText()
            states = sorted(self.df_loc[self.df_loc['NationName'] == country]['PrimaryPN'].unique())
            # Order of operations - CLEAR dropdown, add items, attempt to disconnect signal, reconnect signal, enable signal
            self.state_dropdown.clear()
            state_list = ['--Select--'] + states
            self.state_dropdown.addItems(state_list)
            self.try_disconnect_signal(self.state_dropdown)
            self.state_dropdown.currentIndexChanged.connect(self.on_state_changed)
            self.state_dropdown.setEnabled(True)

    def on_state_changed(self, index):
        if index != -1:  # Check if a selection is made
            state = self.state_dropdown.currentText()
            counties = sorted(self.df_loc[(self.df_loc['NationName'] == self.country_dropdown.currentText()) &
                                      (self.df_loc['PrimaryPN'] == state)]['SecondaryPN'].unique())
            self.county_dropdown.clear()
            county_list = ['--Select--'] + counties
            self.county_dropdown.addItems(county_list)
            self.try_disconnect_signal(self.county_dropdown)
            self.county_dropdown.currentIndexChanged.connect(self.on_county_changed)
            self.county_dropdown.setEnabled(True)
            if len(counties) == 1:
                self.on_county_changed(0)

    def on_county_changed(self, index):
        if index != -1:  # Check if a selection is made
            county = self.county_dropdown.currentText()
            places = sorted(self.df_loc[(self.df_loc['NationName'] == self.country_dropdown.currentText()) &
                                    (self.df_loc['PrimaryPN'] == self.state_dropdown.currentText()) &
                                    (self.df_loc['SecondaryPN'] == county)]['TertiaryPN'].unique())
            self.place_dropdown.clear()
            place_list = ['--Select--'] + places
            self.place_dropdown.addItems(place_list)
            self.try_disconnect_signal(self.place_dropdown)
            self.place_dropdown.currentIndexChanged.connect(self.on_place_changed)
            self.place_dropdown.setEnabled(True)

    def on_place_changed(self, index):
        print("on_place_changed called with index:", index)
        if index != -1:
            # Extract selected location details
            place = self.place_dropdown.currentText()
            county = self.county_dropdown.currentText()
            state = self.state_dropdown.currentText()
            country = self.country_dropdown.currentText()
            print(f"Selected place: {place}, county: {county}, state: {state}, country: {country}")

            if place and county and state and country:
                selected_location = self.df_loc[
                    (self.df_loc['TertiaryPN'] == place) &
                    (self.df_loc['SecondaryPN'] == county) &
                    (self.df_loc['PrimaryPN'] == state) &
                    (self.df_loc['NationName'] == country)
                    ]
                print("Selected location DataFrame:")
                print(selected_location)

                if not selected_location.empty:
                    lat_dd = float(selected_location.iloc[0]['Latitude'])
                    lon_dd = float(selected_location.iloc[0]['Longitude'])
                    print(f"Latitude: {lat_dd}, Longitude: {lon_dd}")

                    # Persist for downstream use (ERA5, GHCNh, etc.)
                    self.current_lat_dd = lat_dd
                    self.current_lon_dd = lon_dd

                    # Filter usaf_wban_history_df by distance
                    usaf_wban_filtered_df = self.df_USAF_WBAN[
                        (self.df_USAF_WBAN['LAT'].between(lat_dd - 0.5, lat_dd + 0.5)) &
                        (self.df_USAF_WBAN['LON'].between(lon_dd - 0.5, lon_dd + 0.5))
                        ].copy()
                    print("Filtered USAF_WBAN_history DataFrame:")
                    print(usaf_wban_filtered_df.head())

                    # Calculate distances for each station
                    print("Calculating distances...")
                    usaf_wban_filtered_df['DISTANCE'] = usaf_wban_filtered_df.apply(
                        lambda row: self.calculate_distance(lat_dd, lon_dd, row.get('LAT', 0), row.get('LON', 0)),
                        axis=1
                    )
                    print("Distances calculated:")
                    print(usaf_wban_filtered_df[['USAF', 'WBAN', 'DISTANCE']].head())

                    # Sort by distance
                    usaf_wban_filtered_df = usaf_wban_filtered_df.sort_values(by='DISTANCE')
                    print("DataFrame sorted by DISTANCE:")
                    print(usaf_wban_filtered_df[['USAF', 'WBAN', 'DISTANCE']].head())

                    # Process USAF_WBAN_inventory.csv for latest 5 full years
                    current_year = datetime.now().year
                    years_of_interest = list(range(current_year - 5, current_year))
                    print("Filtering inventory DataFrame for years:", years_of_interest)
                    df_inv_filtered = self.df_inv[self.df_inv['YEAR'].isin(years_of_interest)].copy()
                    print("Filtered inventory DataFrame:")
                    print(df_inv_filtered.head())

                    # Sum JAN-DEC for each station per year
                    month_columns = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN',
                                     'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC']
                    print("Calculating YEAR_TOTAL for each station per year...")
                    df_inv_filtered['YEAR_TOTAL'] = df_inv_filtered[month_columns].sum(axis=1)
                    print("YEAR_TOTAL calculated:")
                    print(df_inv_filtered[['USAF', 'WBAN', 'YEAR', 'YEAR_TOTAL']].head())

                    # Group by USAF and WBAN, compute average YEAR_TOTAL
                    print("Grouping by USAF and WBAN to compute average YEAR_TOTAL...")
                    grouped = df_inv_filtered.groupby(['USAF', 'WBAN'])['YEAR_TOTAL'].mean().reset_index()
                    print("Grouped DataFrame with average YEAR_TOTAL:")
                    print(grouped.head())

                    print("Unique USAF codes in usaf_wban_filtered_df:")
                    print(usaf_wban_filtered_df['USAF'].unique())

                    print("Unique USAF codes in grouped:")
                    print(grouped['USAF'].unique())

                    # Check for intersection
                    common_usaf_codes = set(usaf_wban_filtered_df['USAF']).intersection(set(grouped['USAF']))
                    print("Common USAF codes between usaf_wban_filtered_df and grouped:")
                    print(common_usaf_codes)

                    # Determine QUALITY based on average
                    print("Determining QUALITY for each station...")

                    def determine_quality(avg):
                        if avg > 8040:
                            return 'Good'
                        elif avg > 4368:
                            return 'Fair'
                        else:
                            return 'Poor'

                    grouped['QUALITY'] = grouped['YEAR_TOTAL'].apply(determine_quality)
                    print("QUALITY determined:")
                    print(grouped[['USAF', 'WBAN', 'QUALITY']].head())

                    # Merge QUALITY into usaf_wban_filtered_df
                    print("Merging QUALITY into filtered USAF WBAN DataFrame...")
                    # Merge QUALITY into usaf_wban_filtered_df
                    merged_df = pd.merge(
                        usaf_wban_filtered_df,
                        grouped[['USAF', 'WBAN', 'QUALITY']],
                        on=['USAF', 'WBAN'],
                        how='left'
                    )

                    # Fill NaN QUALITY values with 'Poor'
                    merged_df['QUALITY'] = merged_df['QUALITY'].fillna('Poor')

                    # Select required fields
                    required_fields = merged_df[['USAF', 'WBAN', 'STATION NAME', 'BEGIN', 'END', 'QUALITY']].copy()

                    # ---- Custom sorting: QUALITY then END ----

                    # Map QUALITY to an order: Good < Fair < Poor
                    quality_order = {'Good': 0, 'Fair': 1, 'Poor': 2}
                    required_fields['QUALITY_order'] = required_fields['QUALITY'].map(quality_order).fillna(99)

                    # Treat 'Active' as the "latest" end date
                    end_sort = required_fields['END'].replace('Active', '99991231')

                    # If END is like 'YYYYMMDD', we can sort numerically
                    required_fields['END_sort'] = end_sort.astype(int)

                    # Sort: first by QUALITY (Good→Fair→Poor), then by END (latest→earliest)
                    required_fields = required_fields.sort_values(
                        by=['QUALITY_order', 'END_sort'],
                        ascending=[True, False]
                    )

                    # Drop helper columns before populating table
                    required_fields = required_fields.drop(columns=['QUALITY_order', 'END_sort'])

                    print("Final required fields DataFrame (sorted):")
                    print(required_fields)


                    print("Final required fields DataFrame:")
                    print(required_fields.head())

                    # Clear existing buttons and table content
                    print("Clearing existing radio buttons and table content...")
                    for button in self.radio_button_group.buttons():
                        self.radio_button_group.removeButton(button)

                    self.data_table.setRowCount(len(required_fields))
                    table_row = 0
                    first_button = None  # To keep track of the first radio button

                    print("Populating the data table...")
                    for _, row in required_fields.iterrows():
                        radio_button = QtWidgets.QRadioButton()
                        if table_row == 0:  # Auto-select the first radio button
                            radio_button.setChecked(True)
                            first_button = radio_button
                        self.radio_button_group.addButton(radio_button)

                        centered_radio_button = self.create_centered_widget(radio_button)
                        self.data_table.setCellWidget(table_row, 0, centered_radio_button)

                        for j, val in enumerate(row):
                            # Adjust 'END' column if necessary
                            if required_fields.columns[j] == 'END' and str(val).startswith('2024'):
                                val = 'Active'
                            self.data_table.setItem(table_row, j + 1, QtWidgets.QTableWidgetItem(str(val)))

                        table_row += 1

                    print("Resizing columns to contents...")
                    self.data_table.resizeColumnsToContents()

                else:
                    print("Selected location is empty after filtering.")
            else:
                print("One or more of place, county, state, or country is empty.")
        else:
            print("Index is -1, no action taken.")

    def get_selected_row_data(self):
        # Get the checked button from the button group
        checked_button = self.radio_button_group.checkedButton()

        if checked_button is not None:
            # Find the row index of the checked button
            for i in range(self.data_table.rowCount()):
                if self.data_table.cellWidget(i, 0).layout().itemAt(0).widget() == checked_button:
                    usaf = self.data_table.item(i, 1).text()  # Column 1 for USAF
                    wban = self.data_table.item(i, 2).text().zfill(5)
                    ws_begin = self.data_table.item(i, 4).text()
                    ws_end = self.data_table.item(i, 5).text()  # Column 2 for WBAN
                    return usaf, wban, ws_begin, ws_end

        return None, None  # Return None if no button is checked

    # ~~~~~~~~~~~~~~~ THREAD HANDLERS FOR MULTI-RUNS ~~~~~~~~~~~~~~~~
    def cleanup_thread(self):
        if self.thread:
            if self.thread.isRunning():
                self.worker.request_stop()
                self.thread.quit()
                self.thread.wait()

            # Disconnect all signals connected to the worker and thread here
            # to prevent "RuntimeError: wrapped C/C++ object of type QThread has been deleted"
            self.worker.finished.disconnect(self.cleanup_thread)
            # Add any other necessary disconnects here

            # Reset thread and worker to None after cleanup to prevent access to deleted objects
            self.thread = None
            self.worker = None

    def closeEvent(self, event):
        self.cleanup_thread()
        event.accept()

    def setup_worker_and_thread(self):
        self.worker.moveToThread(self.thread)

        self.worker.update_console.connect(self.update_console)
        self.worker.finished.connect(self.cleanup_thread)
        self.thread.started.connect(self.worker.run_task)

        self.thread.start()

    # ------------- LAT/LON PARSER -------------
    def _parse_latlon_text(self, text):
        """
        Parse a latitude/longitude string into decimal degrees.

        Accepts:
          - Decimal degrees: "40.75", "-73.95"
          - Simple DMS: "40 45 0 N", "73 57 0 W", "40 45 N", etc.
        Returns float or raises ValueError.
        """
        if not text or text.strip() == "":
            raise ValueError("Empty Latitude/Longitude string")

        s = text.strip().upper()

        # If it looks like plain decimal, just convert
        try:
            # but be careful: things like "40 N" will fail here and fall through
            return float(s)
        except ValueError:
            pass

        # Tokenize by whitespace and punctuation
        # e.g. "40 45 0 N" -> ["40", "45", "0", "N"]
        tokens = re.split(r"[^\d\w\.\-]+", s)
        tokens = [t for t in tokens if t]  # drop empties

        if not tokens:
            raise ValueError(f"Could not parse Latitude/Longitude from '{text}'")

        # Look for hemisphere letter
        hemi = None
        if tokens[-1] in ("N", "S", "E", "W"):
            hemi = tokens[-1]
            nums = tokens[:-1]
        else:
            nums = tokens

        if not nums:
            raise ValueError(f"Could not parse numeric values from '{text}'")

        # Interpret nums as D, D M, or D M S
        try:
            deg = float(nums[0])
            minutes = float(nums[1]) if len(nums) > 1 else 0.0
            seconds = float(nums[2]) if len(nums) > 2 else 0.0
        except ValueError:
            raise ValueError(f"Non-numeric component in '{text}'")

        dec = deg + minutes / 60.0 + seconds / 3600.0

        if hemi in ("S", "W"):
            dec = -abs(dec)
        elif hemi in ("N", "E"):
            dec = abs(dec)

        return dec
    # ------------------------------------------
    def _toggle_era5_range_group(self, index: int):
        text = self.data_source_combo.currentText()
        use_era5 = text.startswith("ERA5:")
        self.era5_range_group.setEnabled(use_era5)


    # ~~~~~~~~~~~~~~~~~~ MAIN SCRIPT ~~~~~~~~~~~~~~~~~~
    def process_data(self):

        usaf, wban, ws_start, ws_end = self.get_selected_row_data()
        place = self.place_dropdown.currentText()
        county = self.county_dropdown.currentText()
        state = self.state_dropdown.currentText()
        country = self.country_dropdown.currentText()

        # Clear the console at the start of a new run
        try:
            self.console.clear()
        except Exception:
            pass
        self.console.append("=== New processing job started ===\n")

        # Default values for advanced configuration
        config_values = {
            'model': 'Hydro',
            'max_lai': 5,              # Default value for Maximum LAI
            'evergreen_percent': 5,    # Default value for Evergreen (%)
            'vegetation_type': 'Tree', # Default value for Vegetation Type
            'tree_height': 12,         # Default value for Tree Height (m)
            'wind_sensor_height': 10,  # Default value for Wind Sensor Height (m)
            'precip_file': None
        }

        # OPTIONAL FIELDS initialized as None
        start_ymdh = None
        end_ymdh = None
        subhour_interval = None
        multiple_years = False

        # If Advanced XML window open, override defaults
        if self.xmlWindow is not None:
            config_from_xml = self.xmlWindow.get_configuration_values()
            config_values.update(config_from_xml)

        # Identify which tab is active
        active_tab_index = self.tab_widget.currentIndex()
        active_tab_title = self.tab_widget.tabText(active_tab_index)

        # ---- YEAR / SUB-HOURLY HANDLING ------------------------
        if active_tab_title == "Hourly":
            start_year = self.hourly_start_year_entry.text().strip()
            end_year   = self.hourly_end_year_entry.text().strip()
            multiple_years = self.combine_years_checkbox.isChecked()

        elif active_tab_title == "Sub-Hourly":
            start_ymdh = self.sub_hourly_start_entry.text().strip()
            end_ymdh   = self.sub_hourly_end_entry.text().strip()

            if not (len(start_ymdh) == 10 and len(end_ymdh) == 10):
                self.console.append(
                    "Error: 'Start YYYYMMDDHH' or 'End YYYYMMDDHH' format is incorrect. "
                    "Please ensure the format is YYYYMMDDHH."
                )
                return

            subhour_interval = int(self.factor_spin_box.value())

            # Extract just the year part for further processing
            start_year = start_ymdh[:4]
            end_year   = end_ymdh[:4]

            # For sub-hourly we always keep one file per year
            multiple_years = False
        else:
            self.console.append("Error: Unknown tab state.")
            return
        # --------------------------------------------------------

        # Check if all required fields are provided
        if not all([usaf, wban, start_year, end_year, place, county, state, country]):
            self.console.append(
                "Error: Missing required information. Please ensure all fields are filled.")
            return

        # Convert to integers and check station year range
        try:
            start_year = int(start_year)
            end_year   = int(end_year)
            current_year = datetime.now().year
            ws_start = int(ws_start[:4])
            ws_end   = int(ws_end[:4]) if ws_end != 'Active' else current_year
        except ValueError:
            self.console.append("Error: Invalid year format. Please enter valid start and end years.")
            return

        # ---Define data_source for process data
        data_source = self.data_source_combo.currentText()

        # Warn if user picks ISD and wants 2025 or later
        if data_source.startswith("ISD") and end_year >= 2025:
            msg = (
                "You selected ISD (legacy FTP + ishapp2) as the data source, "
                "but your End Year is 2025 or later.<br><br>"
                "NOAA discontinued updates to ISD in late August 2025.<br>"
                "For 2025+ you should generally use GHCN-Hourly (new).<br><br>"
                'More info: <a href="https://www.ncei.noaa.gov/news/next-generation-climate-dataset-built-seamless-integration">'
                "NOAA next-generation climate dataset</a><br><br>"
                "Do you still want to continue with ISD for this run?"
            )

            box = QtWidgets.QMessageBox(self)
            box.setIcon(QtWidgets.QMessageBox.Warning)
            box.setWindowTitle("ISD data discontinued")
            box.setTextFormat(QtCore.Qt.RichText)
            box.setText(msg)
            box.setStandardButtons(QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No)
            box.setDefaultButton(QtWidgets.QMessageBox.No)

            label = box.findChild(QtWidgets.QLabel)
            if label:
                label.setOpenExternalLinks(True)
                label.setTextInteractionFlags(QtCore.Qt.TextBrowserInteraction)

            reply = box.exec_()
            if reply == QtWidgets.QMessageBox.No:
                return

        # Check station coverage
        if ws_end != current_year and end_year > ws_end:
            self.console.append(
                f"Please confirm that your End Year ({end_year}) is equal to or less than "
                f"the weather station's end year ({ws_end})."
            )
            return

        if start_year < ws_start:
            self.console.append(
                "Please confirm that your Start Year is equal to or greater than "
                "the weather station's start year.")
            return

        # ---------------- LAT/LON RESOLUTION -------------------
        lat_dd = None
        lon_dd = None

        # Use custom Latitude/Longitude if box is checked
        if hasattr(self, "use_custom_latlon_cb") and self.use_custom_latlon_cb.isChecked():
            lat_text = self.lat_edit.text().strip() if hasattr(self, "lat_edit") else ""
            lon_text = self.lon_edit.text().strip() if hasattr(self, "lon_edit") else ""
            if lat_text and lon_text:
                try:
                    lat_dd = float(lat_text)
                    lon_dd = float(lon_text)
                except ValueError:
                    self.console.append(
                        "Warning: Could not parse custom latitude/longitude as decimal degrees. "
                        "Falling back to dropdown location coordinates, if available."
                    )

        # Fall back to dropdown-derived coordinates if still missing
        if lat_dd is None or lon_dd is None:
            lat_dd = getattr(self, "current_lat_dd", None)
            lon_dd = getattr(self, "current_lon_dd", None)

        if (data_source.startswith("ERA5") and (lat_dd is None or lon_dd is None)):
            self.console.append(
                "Error: No latitude/longitude resolved. For ERA5 you must either "
                "select a place in the dropdown or enter custom Latitude/Longitude."
            )
            return
        # --------------------------------------------------------

        # ------------- ERA5 MONTH/DAY RANGE (NEW) --------------
        era5_start_month = None
        era5_start_day   = None
        era5_end_month   = None
        era5_end_day     = None

        if data_source.startswith("ERA5"):
            era5_start_month = self.era5_start_month_sb.value()
            era5_start_day   = self.era5_start_day_sb.value()
            era5_end_month   = self.era5_end_month_sb.value()
            era5_end_day     = self.era5_end_day_sb.value()

            # quick sanity check in a synthetic year
            try:
                from datetime import date
                d_start = date(2001, era5_start_month, era5_start_day)
                d_end   = date(2001, era5_end_month,   era5_end_day)
                if d_start > d_end:
                    self.console.append(
                        "Error: ERA5 start month/day must be <= ERA5 end month/day.")
                    return
            except Exception as e:
                self.console.append(f"Error parsing ERA5 month/day: {e}")
                return
        # --------------------------------------------------------

        # Gather all parameters into a dict
        params = {
            'data_source': data_source,
            'usaf_wban': f'{usaf}-{wban}',
            'start_year': start_year,
            'end_year': end_year,
            'start_ymdh': start_ymdh,
            'end_ymdh': end_ymdh,
            'subhour_interval': subhour_interval,
            'place': place,
            'county': county,
            'state': state,
            'country': country,
            'lat_dd': lat_dd,
            'lon_dd': lon_dd,
            'multiyear_flag': multiple_years,
            # new ERA5 date-range fields:
            'era5_start_month': era5_start_month,
            'era5_start_day':   era5_start_day,
            'era5_end_month':   era5_end_month,
            'era5_end_day':     era5_end_day,
            # and all the config values:
            **config_values,
        }

        # Clean up previous thread if needed
        if self.thread is not None:
            self.cleanup_thread()

        # Create a QThread object
        self.thread = QThread()

        # Create a worker object with the params dictionary
        self.worker = Worker(params)

        # Setup worker and thread
        self.setup_worker_and_thread()

    # ~~~~~~~~~~~~~~~~~~~~~ UI ~~~~~~~~~~~~~~~~~~~~~~~~~
    #initUI for HydroPlus Weather Processor, not Advanced XML Configuration UI 
    def initUI(self):
        # Set the title and size of the main window
        self.setWindowTitle('HydroPlus Weather Processor')
        #icon_path expects a string path
        icon_path = str(RESOURCES_DIR / "iTree_transparent.ico")
        self.setWindowIcon(QIcon(icon_path))
        self.setGeometry(200, 200, 1000, 720)  # x, y, width, height

        # Create layout
        grid = QtWidgets.QGridLayout()
        self.setLayout(grid)

        # Country Dropdown (replacing Country Entry)
        grid.addWidget(QtWidgets.QLabel('Nation:'), 0, 0)
        self.country_dropdown = QtWidgets.QComboBox()
        grid.addWidget(self.country_dropdown, 0, 1, 1, 1)

        # State Dropdown (replacing State Entry)
        grid.addWidget(QtWidgets.QLabel('State:'), 1, 0, 1, 1)
        self.state_dropdown = QtWidgets.QComboBox()
        self.state_dropdown.setEnabled(False)  # Disabled initially
        grid.addWidget(self.state_dropdown, 1, 1, 1, 1)

        # County Dropdown (replacing County Entry)
        grid.addWidget(QtWidgets.QLabel('County:'), 2, 0, 1, 1)
        self.county_dropdown = QtWidgets.QComboBox()
        self.county_dropdown.setEnabled(False)  # Disabled initially
        grid.addWidget(self.county_dropdown, 2, 1, 1, 1)

        # Place Dropdown (replacing Place Entry)
        grid.addWidget(QtWidgets.QLabel('Place:'), 3, 0)
        self.place_dropdown = QtWidgets.QComboBox()
        self.place_dropdown.setEnabled(False)  # Disabled initially
        grid.addWidget(self.place_dropdown, 3, 1, 1, 1)
        
        # Create a QLabel for instructions
        instructions = QtWidgets.QLabel("  Instructions:\n"
                                        "   1. Select nearest location to project area.\n"
                                        "   2. Select weather station.\n"
                                        "   3. Enter start and end year. \n"
                                        "   4. Click 'Process Data'")
        instructions.setWordWrap(True)
        font = QFont()
        font.setPointSize(10) 
        instructions.setFont(font)
        # Add the QLabel to the layout, spanning columns 2 and 3, and rows 2 through 4
        grid.addWidget(instructions, 0, 2, 3, 2)

        # ----------------- Lat/Lon alternative input -----------------
        self.use_custom_latlon_cb = QtWidgets.QCheckBox(
            "Use custom latitude/longitude instead of place selection")
        self.use_custom_latlon_cb.setToolTip(
            "If checked, WeatherPrep will use the latitude/longitude fields below for ERA5\n"
            "and any other workflows that support point coordinates directly."
        )

        # --------------------------------------------------------------

        # Advanced XML Configuration checkbox
        self.advanced_xml_cb = QtWidgets.QCheckBox('Advanced XML Configuration')
        grid.addWidget(self.advanced_xml_cb, 3, 3)  # Adjust grid positioning as needed
        self.advanced_xml_cb.stateChanged.connect(self.toggleXMLWindow)

        # Reset checkbox
        self.reset_button = QtWidgets.QPushButton('Reset Location')
        grid.addWidget(self.reset_button, 4, 0, 1, 4)
        self.reset_button.clicked.connect(self.reset_app)

        # Adding a QTableWidget to display the data
        self.data_table = QtWidgets.QTableWidget(self)
        self.data_table.setColumnCount(7)
        self.data_table.setHorizontalHeaderLabels(
            ['Select', 'USAF', 'WBAN', 'STATION_NA', 'BEGIN', 'END', 'QUALITY'])
        grid.addWidget(self.data_table, 5, 0, 1, 4) 

        # ----------------- Lat/Lon alternative input -----------------
        self.use_custom_latlon_cb = QtWidgets.QCheckBox(
            "Use custom latitude/longitude instead of place selection")
        self.use_custom_latlon_cb.setToolTip(
            "If checked, WeatherPrep will use the latitude/longitude fields below for ERA5\n"
            "and any other workflows that support point coordinates directly."
        )
        grid.addWidget(self.use_custom_latlon_cb, 6, 0, 1, 2)

        lat_label = QtWidgets.QLabel("Latitude (°):")
        grid.addWidget(lat_label, 7, 0)
        self.lat_edit = QtWidgets.QLineEdit()
        self.lat_edit.setPlaceholderText("e.g. 40.75 or 40 45 0 N")
        grid.addWidget(self.lat_edit, 7, 1)

        lon_label = QtWidgets.QLabel("Longitude (°):")
        grid.addWidget(lon_label, 8, 0)
        self.lon_edit = QtWidgets.QLineEdit()
        self.lon_edit.setPlaceholderText("e.g. -73.95 or 73 57 0 W")
        grid.addWidget(self.lon_edit, 8, 1)
        # --------------------------------------------------------------

        # Initialize QTabWidget
        self.tab_widget = QtWidgets.QTabWidget()
        grid.addWidget(self.tab_widget, 9, 0, 1, 4)

        # Process Button
        self.process_button = QtWidgets.QPushButton('Process Data')
        self.process_button.clicked.connect(self.process_data)
        grid.addWidget(self.process_button, 10, 0, 1, 2)

        # Close Button (NEW)
        self.close_button = QtWidgets.QPushButton('Close')
        self.close_button.clicked.connect(QtWidgets.QApplication.instance().quit)
        grid.addWidget(self.close_button, 10, 2, 1, 2)

        # Console
        self.console = QtWidgets.QTextEdit()
        self.console.setReadOnly(True)
        grid.addWidget(self.console, 11, 0, 2, 4)

              # ERA5 time options
        hourly_tab = QtWidgets.QWidget()
        hourly_layout = QtWidgets.QGridLayout(hourly_tab)

        # Add widgets for 'Hourly' tab
        hourly_layout.addWidget(QtWidgets.QLabel('Start (YYYY):'), 0, 0)
        self.hourly_start_year_entry = QtWidgets.QLineEdit()
        hourly_layout.addWidget(self.hourly_start_year_entry, 0, 1)

        hourly_layout.addWidget(QtWidgets.QLabel('End (YYYY):'), 0, 2)
        self.hourly_end_year_entry = QtWidgets.QLineEdit()
        hourly_layout.addWidget(self.hourly_end_year_entry, 0, 3)

        self.combine_years_checkbox = QtWidgets.QCheckBox(
            'Combine multiple years into one file')
        hourly_layout.addWidget(self.combine_years_checkbox, 1, 1, 1, 3)
        
        hourly_layout.addWidget(QtWidgets.QLabel('Data source:'), 2, 0)
        self.data_source_combo = QtWidgets.QComboBox()
        self.data_source_combo.addItems([
            "GHCNh: Observations with METSTAT shortwave, Martin & Berdahl longwave",
            "ERA5: Reanalysis by ECMWF at Copernicus",
            "ISD: Legacy observations through September 2025",
            "ERA5 + GHCNh blend: Not available"
        ])
        hourly_layout.addWidget(self.data_source_combo, 2, 1, 1, 3)

        # ---------- ERA5 month/day range within each year ----------
        self.era5_range_group = QtWidgets.QGroupBox(
            "Optional ERA5 date range within each year")
        era5_layout = QtWidgets.QGridLayout(self.era5_range_group)

        # Start month/day
        era5_layout.addWidget(QtWidgets.QLabel("Start month:"), 0, 0)
        self.era5_start_month_sb = QtWidgets.QSpinBox()
        self.era5_start_month_sb.setRange(1, 12)
        self.era5_start_month_sb.setValue(1)
        era5_layout.addWidget(self.era5_start_month_sb, 0, 1)

        era5_layout.addWidget(QtWidgets.QLabel("Start day:"), 0, 2)
        self.era5_start_day_sb = QtWidgets.QSpinBox()
        self.era5_start_day_sb.setRange(1, 31)
        self.era5_start_day_sb.setValue(1)
        era5_layout.addWidget(self.era5_start_day_sb, 0, 3)

        # End month/day
        era5_layout.addWidget(QtWidgets.QLabel("End month:"), 1, 0)
        self.era5_end_month_sb = QtWidgets.QSpinBox()
        self.era5_end_month_sb.setRange(1, 12)
        self.era5_end_month_sb.setValue(12)
        era5_layout.addWidget(self.era5_end_month_sb, 1, 1)

        era5_layout.addWidget(QtWidgets.QLabel("End day:"), 1, 2)
        self.era5_end_day_sb = QtWidgets.QSpinBox()
        self.era5_end_day_sb.setRange(1, 31)
        self.era5_end_day_sb.setValue(31)
        era5_layout.addWidget(self.era5_end_day_sb, 1, 3)

        # Small note
        note = QtWidgets.QLabel(
            "If left as 1/1–12/31, ERA5 downloads full years.\n"
            "Otherwise, WeatherPrep will only request those months/days\n"
            "in each year for ERA5 downloads. Hours are assumed 0–23."
        )
        note.setWordWrap(True)
        era5_layout.addWidget(note, 2, 0, 1, 4)

        hourly_layout.addWidget(self.era5_range_group, 3, 0, 1, 4)

        # Explicit tab order for the main location controls
        self.setTabOrder(self.country_dropdown, self.state_dropdown)
        self.setTabOrder(self.state_dropdown, self.county_dropdown)
        self.setTabOrder(self.county_dropdown, self.place_dropdown)
        self.setTabOrder(self.place_dropdown, self.data_table)      # or hourly_start_year_entry, etc.
        self.setTabOrder(self.data_table, self.hourly_start_year_entry)
        self.setTabOrder(self.hourly_start_year_entry, self.hourly_end_year_entry)
        self.setTabOrder(self.hourly_end_year_entry, self.combine_years_checkbox)
        self.setTabOrder(self.combine_years_checkbox, self.data_source_combo)
        self.setTabOrder(self.data_source_combo, self.process_button)
        self.setTabOrder(self.process_button, self.close_button)
        self.setTabOrder(self.close_button, self.reset_button)
      
        # ------------------------------------------------------------

        # Disable unless ERA5 selected:
        self.era5_range_group.setEnabled(False)
        self.data_source_combo.currentIndexChanged.connect(
            self._toggle_era5_range_group)

        self.tab_widget.addTab(hourly_tab, "Hourly")

      
        # ... [Include definitions for browse_folder and process_data methods] ...
        self.initialize_dropdowns()

if __name__ == '__main__':
    import sys

    app = QtWidgets.QApplication(sys.argv)
    splash = SplashScreen()
    splash.show()

    mainWindow = WeatherProcessorApp()
    splash.finish(mainWindow)
    mainWindow.show()
    sys.exit(app.exec_())
