#Created:   ChatGPT by T. Endreny at SUNY ESF in 2025
#Purpose:   Script will obtain Giovanni time series for a point (0.1 deg raster), not an area average; to obtain area average, sample multiple points.
#Input:     File z_PrecipitationData_GIOVANNI_download_places.csv that contains this header, PlaceName, Latitude_dd, Longitude_dd, Area_km2, Start_Date_Time,Stop_Date_Time, and data values below. A new line starting with keyword #Archive,,,,, stops reading of archived places.
#Dates:     Update the START and END variables below. 
#Acccess:   Access to NASA EarthData is needed for download
#Output:    File Place_GPM_3IMERGHH_07_precipitation_HourlyPrecip_m.csv written to Place folder within giovanni_timeseries_grid folder, as well as raw data


#Note: EarthData login credentials stored in C:\Users\te\.netrc
#Note: .netrc file contains:
# machine urs.earthdata.nasa.gov
  # login YOUR_EARTHDATA_USERNAME
  # password YOUR_EARTHDATA_PASSWORD
  
#Giovanni product: https://disc.gsfc.nasa.gov/datasets/GPM_3IMERGHH_07/summary
#Giovanni download instructions: https://disc.gsfc.nasa.gov/information/howto?keywords=time%20series&title=How%20to%20Access%20the%20%22Giovanni%20in%20the%20Cloud:%20Time%20Series%22%20Service%20Using%20Python

import os, csv, time, netrc, io, requests
from requests.auth import HTTPBasicAuth
from pathlib import Path
from typing import List, Tuple, Iterable, Optional
import pandas as pd
from typing import Iterable, Tuple

# --------------------------- USER INPUTS ------------------------------------
INPUT_CSV = r"C:\iTree\WeatherPrep\z_PrecipitationData_GIOVANNI_download_places.csv"  # Name,Latitude_dd,Longitude_dd,Area
OUT_DIR   = r"C:\iTree\WeatherPrep\Outputs"


# Giovanni data id (confirmed working for you)
DATA_ID   = "GPM_3IMERGHH_07_precipitation"   # or "GPM_3IMERGHHL_07_precipitation" for Late # or "GLDAS_NOAH025_3H_2_1_Tair_f_inst" for testing others
#Note: These DATA_ID do not work: "GPM_3IMERGHH_07_precipitationCal" or "GPM_3IMERGHHL_07_precipitationCal"

# Time window
#Note: The earliest date for IMERG HH: 1998-01-01
DEFAULT_START = "1998-01-01T00:00:00"
DEFAULT_STOP   = "1998-01-15T23:59:59"

# ---- Grid controls ----
GRID_N_LON = 1        # number of columns in longitude direction (e.g., 1, 2, 3, 5)
GRID_N_LAT = 1        # number of rows in latitude  direction  (e.g., 1, 1, 3, 3, 5)
GRID_SPACING_DEG = 0.1  # IMERG native resolution ~0.1°; use multiples of this
ALIGN_TO_IMERG_GRID = True
# IMERG centers are commonly -179.95 + 0.1*k (lon), -89.95 + 0.1*k (lat).
IMERG_BASE_LON = -179.95
IMERG_BASE_LAT =  -89.95

# Polite pacing
SLEEP_SEC = 0.5
# ---------------------------------------------------------------------------

SIGNIN_URL     = "https://api.giovanni.earthdata.nasa.gov/signin"
TIMESERIES_URL = "https://api.giovanni.earthdata.nasa.gov/timeseries"

# --------------------------- TOKEN HANDLING ---------------------------------
def get_token_from_env() -> Optional[str]:
    tok = os.getenv("GIOVANNI_TOKEN")
    return tok.strip() if tok else None

def get_token_via_signin() -> str:
    creds = netrc.netrc().hosts.get('urs.earthdata.nasa.gov')
    if not creds:
        raise RuntimeError("No 'urs.earthdata.nasa.gov' entry in your .netrc. Add your Earthdata Login there.")
    user, _, pwd = creds
    r = requests.get(SIGNIN_URL, auth=HTTPBasicAuth(user, pwd), allow_redirects=True, timeout=60)
    r.raise_for_status()
    return r.text.replace('"', '').strip()

def ensure_token() -> str:
    tok = get_token_from_env()
    if not tok:
        tok = get_token_via_signin()
    print("Giovanni token acquired. Length:", len(tok))
    return tok

# ----------------------------- IO ------------------------------------------
from typing import Iterable, Tuple
import csv

def read_locations(
    csv_path: str,
    stop_keyword: str = "#Archive"
) -> Iterable[Tuple[str, float, float, str, str]]:

    with open(csv_path, newline="", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            name = row["PlaceName"].strip()

            # Stop processing entirely if we hit the keyword
            if name.startswith(stop_keyword):
                break

            lat = float(row["Latitude_dd"])
            lon = float(row["Longitude_dd"])

            start_date_time = row.get("Start_Date_Time", "").strip()
            stop_date_time   = row.get("Stop_Date_Time", "").strip()

            yield name, lat, lon, start_date_time, stop_date_time

def save_text(text: str, path: Path) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    with open(path, "w", encoding="utf-8", newline="") as f:
        f.write(text)

# ------------------------- GRID CONSTRUCTION --------------------------------
def _center_offsets(count: int, spacing: float) -> List[float]:
    """
    Return offsets (in degrees) centered around 0.
    - Odd counts (e.g., 3): [-1, 0, +1] * spacing
    - Even counts (e.g., 2): [-0.5, +0.5] * spacing  (beware: lands on cell edges)
      For stability, we later snap the *centroid* to IMERG cell centers if requested.
    """
    if count <= 0:
        return []
    if count % 2 == 1:
        k = (count - 1) // 2
        return [i * spacing for i in range(-k, k + 1)]
    else:
        # even count: symmetrically straddle zero by half-steps
        k = count // 2
        return [ (i + 0.5) * spacing for i in range(-k, k) ]

def _snap_to_grid(coord: float, base: float, spacing: float) -> float:
    """
    Snap 'coord' to nearest grid center defined by base + spacing * k.
    E.g., base=-179.95, spacing=0.1 → centers at -179.95, -179.85, ...
    """
    k = round((coord - base) / spacing)
    return base + k * spacing

def build_grid(lat: float, lon: float,
               nlat: int, nlon: int, spacing: float,
               align_to_imerg: bool) -> List[Tuple[float, float]]:
    # Optionally snap centroid to nearest presumed IMERG cell center
    if align_to_imerg:
        lat = _snap_to_grid(lat, IMERG_BASE_LAT, spacing)
        lon = _snap_to_grid(lon, IMERG_BASE_LON, spacing)

    dlat_list = _center_offsets(nlat, spacing)
    dlon_list = _center_offsets(nlon, spacing)

    pts = []
    for dy in dlat_list:
        for dx in dlon_list:
            pts.append( (lat + dy, lon + dx) )
    return pts

# ------------------------- GIOVANNI CALLS -----------------------------------
def call_timeseries_point(lat: float, lon: float, t0: str, t1: str, data_id: str, token: str) -> str:
    params = {"data": data_id, "location": f"[{lat},{lon}]", "time": f"{t0}/{t1}"}
    headers = {"authorizationtoken": token, "Accept": "text/csv"}
    r = requests.get(TIMESERIES_URL, params=params, headers=headers, timeout=120)
    if r.status_code != 200:
        raise requests.HTTPError(f"{r.status_code} for data={data_id} lat={lat} lon={lon}. Body: {r.text[:200]}")
    return r.text

# ------------------------- CSV MERGING --------------------------------------
def parse_giovanni_csv(csv_text: str) -> pd.DataFrame:
    """
    Parse Giovanni CSV text into a DataFrame with columns ['time', 'value'].
    Lines starting with '#' are comments.
    """
    rows = []
    for line in io.StringIO(csv_text):
        line = line.strip()
        if not line or line.startswith("#"):
            continue
        parts = [p.strip() for p in line.split(",")]
        if len(parts) < 2:
            continue
        t = parts[0]
        try:
            v = float(parts[1])
        except ValueError:
            v = float("nan")
        rows.append((t, v))
    df = pd.DataFrame(rows, columns=["time", "value"])
    return df

def merge_series(dfs: List[pd.DataFrame], col_names: List[str]) -> pd.DataFrame:
    """
    Outer-join on 'time', combine columns, and add 'mean' across numeric columns.
    """
    if not dfs:
        return pd.DataFrame(columns=["time"])
    out = dfs[0].rename(columns={"value": col_names[0]})
    for df, name in zip(dfs[1:], col_names[1:]):
        out = out.merge(df.rename(columns={"value": name}), on="time", how="outer")
    # sort by time (string; Giovanni time format is sortable lexicographically)
    out = out.sort_values("time").reset_index(drop=True)
    # compute mean across the point columns
    value_cols = col_names
    out["mean"] = out[value_cols].astype(float).mean(axis=1)
    return out

def merged_to_hourly_precip_m(merged: pd.DataFrame) -> pd.DataFrame:
    """
    Take a merged Giovanni IMERG time series (with columns ['time', ..., 'mean'])
    where 'mean' is IMERG precipitation in mm/hr at 30-min resolution, and
    convert it to an hourly precipitation *depth* in meters.

    Output columns: ['YYYYMMDD', 'HH:MM:SS', 'Precipitation_Rate_mph(m/h)'].
    """
    if "time" not in merged.columns or "mean" not in merged.columns:
        raise ValueError("merged_to_hourly_precip_m expects columns 'time' and 'mean'.")

    df = merged.copy()

    # 1) Parse times, coercing non-time lines (prod_name, doi, Request_time, etc.) to NaT.
    #    No explicit format: handles both 'YYYY-MM-DD HH:MM' and 'YYYY-MM-DD HH:MM:SS'.
    df["time_dt"] = pd.to_datetime(df["time"], errors="coerce")

    # 2) Ensure 'mean' is numeric; non-numeric entries become NaN.
    df["mean"] = pd.to_numeric(df["mean"], errors="coerce")

    # 3) Keep only real time-series rows.
    df = df.dropna(subset=["time_dt", "mean"])

    # 4) Sort by time.
    df = df.sort_values("time_dt")

    # 5) IMERG half-hourly: units = mm/hr at 30-min steps.
    #    Depth for each 30-min step = rate (mm/hr) * 0.5 hr = 0.5 * rate (mm).
    HALF_HOUR_HOURS = 0.5
    df["precip_mm"] = df["mean"] * HALF_HOUR_HOURS

    # 6) Resample to hourly by summing the two 30-min depths.
    hourly = (
        df.set_index("time_dt")["precip_mm"]
        .resample("H")
        .sum()
        .reset_index()
    )

    # 7) Convert mm → m and format output columns.
    hourly["YYYYMMDD"] = hourly["time_dt"].dt.strftime("%Y%m%d")
    hourly["HH:MM:SS"] = hourly["time_dt"].dt.strftime("%H:%M:%S")
    hourly["Precipitation_Rate_mph(m/h)"] = hourly["precip_mm"] / 1000.0  # mm → m

    return hourly[["YYYYMMDD", "HH:MM:SS", "Precipitation_Rate_mph(m/h)"]]

# ------------------------------ MAIN ----------------------------------------
def main():
    token = ensure_token()

    out_root = Path(OUT_DIR)
    out_root.mkdir(parents=True, exist_ok=True)

    for name, lat, lon, start_csv, stop_csv in read_locations(INPUT_CSV):

        t0 = start_csv if start_csv else DEFAULT_START
        t1 = stop_csv   if stop_csv   else DEFAULT_STOP

        # Build the grid for this site
        pts = build_grid(lat, lon, GRID_N_LAT, GRID_N_LON, GRID_SPACING_DEG, ALIGN_TO_IMERG_GRID)
        site_dir = out_root / name
        site_dir.mkdir(parents=True, exist_ok=True)

        dfs = []
        colnames = []
        for idx, (plat, plon) in enumerate(pts, start=1):
            # Save each point CSV separately
            point_tag = f"r{((idx-1)//GRID_N_LON)+1}c{((idx-1)%GRID_N_LON)+1}"
            out_point = site_dir / f"{name}_{DATA_ID}_{point_tag}.csv"
            try:
                txt = call_timeseries_point(plat, plon, t0, t1, DATA_ID, token)
                save_text(txt, out_point)
                print(f"[OK] {name} {point_tag}: wrote {out_point}")
                # For merging
                df = parse_giovanni_csv(txt)
                dfs.append(df)
                colnames.append(point_tag)
            except Exception as ex:
                print(f"[ERR] {name} {point_tag}: {ex}")
            time.sleep(SLEEP_SEC)

        # Merge into one CSV with mean column
        if dfs:
            merged = merge_series(dfs, colnames)
            merged_path = site_dir / f"{name}_{DATA_ID}_MERGED.csv"
            merged.to_csv(merged_path, index=False)
            print(f"[OK] {name}: merged file -> {merged_path}")

            # NEW: convert merged IMERG series to hourly precipitation depth in meters
            try:
                hourly_df = merged_to_hourly_precip_m(merged)
                hourly_path = site_dir / f"{name}_{DATA_ID}_HourlyPrecip_m.csv"
                hourly_df.to_csv(hourly_path, index=False)
                print(f"[OK] {name}: hourly precipitation file -> {hourly_path}")
            except Exception as ex:
                print(f"[ERR] {name}: failed to compute hourly precipitation depth: {ex}")


if __name__ == "__main__":
    main()
