#z_PrecipitationData_GIOVANNI.py will process NASA GIOVANNI server GPM_3IMERGHH data into format usable by i-Tree
#Note: Script can be run in two approaches, explained below as P and W:

#Created by Jay Heppler at Davey Institute for processing of international weather data lacking precipitation
#Updated by T. Endreny at SUNY ESF with various user and runtime features 

#Test:>python z_b_PrecipitationData_GIOVANNI.py C:\iTree\WeatherPrep\TestingFilesAndScript\TestCases\z_GiovannieSample

#Approach P: Generate precipitation csv, to use in in WeatherPrepConfig.xml and create Weather.csv 
#Step 1a: Download WeatherPrep scripts and utility from i-Tree Tools and save as: C:\iTree\WeatherPrep\
#Step 1b: Follow directions in C:\iTree\WeatherPrep\NOAA_data_tool\ReadMe.txt to obtain raw NOAA data 
#Step 1c: Create work folder such as C:\iTree\a_prep_Weather\syr_ny\2022\ to contain the raw NOAA data
#Step 2a: Obtain data at https://giovanni.gsfc.nasa.gov/giovanni/ by: 
#Step 2b: ... 1) Select Plot Time Series, Area Averaged, 2) Select Date Range (UTC) starting 1998-01-01, ...
#Step 2c: ... 3) Select Region (W,S,E,N) based on 0.05 degree distances from city center, 4) Keyword GPM_3IMERGHH, ... 
#Step 2d: ... 5) Select best data option (e.g., Precipitation, Units=mm/hr, Temp.Res=Half-Hourly), ...
#Step 2e: ... 6) Click Plot Data & wait ...
#Step 2f: ... 7) Once plot is generated, expand Downloads left of plot and select CSV, 8, Save to work folder
#Step 2g: Keep original name of GPM_3IMERGHH product, e.g., g4.areaAvgTimeSeries.GPM_3IMERGHH.._precip..12E_42N.csv
#Step 3a: Run this GIOVANNI script and select data output option letter P, for Precipitation.csv
#Step 4a: Either/Or Run WeatherPrep_GUI.exe or WeatherPrep_GUI.py, activating Advanced XML Configuration radio button
#Step 4b: Provide path for GIOVANNI output Precipitation.csv
# 
#Step 5a: Either/Or Modify WeatherPrepConfig.xml element PrecipitationDataCsv to point to Precipitation.csv
#Step 5b: Generate weather.csv file that contains the GIOVANNI precipitation data

#Approach W: Update weather.csv, by combining precipitation csv with existing Weather.csv
#Step 1a: Download WeatherPrep scripts and utility from i-Tree Tools and save as: C:\iTree\WeatherPrep\
#Step 1b: Follow directions in C:\iTree\WeatherPrep\NOAA_data_tool\ReadMe.txt to obtain raw NOAA data 
#Step 1c: Create work folder such as C:\iTree\a_prep_Weather\syr_ny\2022\ to contain the raw NOAA data
#Step 2a: Follow directions in C:\iTree\WeatherPrep\TestCases\ReadMe.txt
#Step 2b: Modify WeatherPrepConfig.xml element PrecipitationDataCsv to be blank
#Step 2c: Generate 1st estimate of weather.csv file that does not yet contain the GIOVANNI precipitation data
#Step 3a: Obtain data at https://giovanni.gsfc.nasa.gov/giovanni/ by: 
#Step 3b: ... 1) Select Plot Time Series, Area Averaged, 2) Select Date Range (UTC), ...
#Step 3c: ... 3) Select Region (E,S,W,N), 4) Keyword GPM_3IMERGHH, ... 
#Step 3d: ... 5) Select best data option (e.g., Precipitation, Units=mm/hr, Temp.Res=Half-Hourly), ...
#Step 3e: ... 6) Click Plot Data & wait ...
#Step 3f: ... 7) Once plot is generated, expand Downloads left of plot and select CSV, 8, Save to work folder
#Step 3g: Keep original name of GPM_3IMERGHH product, e.g., g4.areaAvgTimeSeries.GPM_3IMERGHH.._precip..12E_42N.csv
#Step 4a: Run this GIOVANNI script and select data output option letter W, for Weather.csv
#Step 4b: Update weather.csv file to contain the GIOVANNI precipitation data

#Giovanni input data as CSV file is formatted as:
#Date_YYYYMMDD Time_HH:MM:SS,Precipitation_mm
#2019-01-01 00:00:00,0.76200000
#2019-01-01 01:00:00,0.00000000

import csv
import os 
import glob 
import shutil
import sys
import io
import pandas as pd
from datetime import datetime

#If len sys.argv < 2 then command line argument missing
if len(sys.argv) < 2:
    print("Warning: Python script requires command line input of a <target_directory>")
    print("This script searches the target_directory for a precipitation data file:")
    print("- with a name containing the keyword 'GPM_3IMERGHH',")
    print("- in the format of a NASA GIOVANNI IMERGE .csv file.")
    sys.exit(1)
#target_directory is the path given in sys.argv[1]
target_directory = sys.argv[1]
    
#Before running script, put weather file of interest and GIOVANNI file of interest into same folder. 
os.chdir(target_directory)


#precip_find uses glob.glob command to find file with string *GPM_3IMERGHH*, as named by Giovanni output
precip_find = glob.glob('*GPM_3IMERGHH*')

#if precip_find returns no results
if len(list(precip_find)) == 0:
    print("Warning: No precipitation data were found in the target_directory: \n{}.".format(target_directory))
    print("This script searches the target_directory for a precipitation data file:")
    print("- with a name containing the keyword 'GPM_3IMERGHH',")
    print("- in the format of a NASA GIOVANNI IMERGE .csv file.")
else:
    pass 
    
#Prompt for command line input whether program generates Precipitation.csv or Weather.csv
print("Please select one of two options for inserting the precipitation data, P or W.")
print("P = Precipitation.csv generated to create Weather.csv file using WeatherPrepConfig.xml.")
print("W = Weather.csv updated to contain the precipitation data.")
output_choice = input('Select a letter: Precipitation.csv generated or Weather.csv updated (P or W):')
#Substring output_choice to ensure accurate input
output_choice = output_choice[0:]

#Initiate flags
flag_Precipitation = 0
flag_Weather = 0

#Define 1st row of Giovanni IMERGE precipitation data
row_start_precipitation_input = 9

#If output_choice is P or p then only generate Precipitation.csv file
if (output_choice == 'P' or output_choice == 'p'):
    #Set flag_Precipitation to 1
    flag_Precipitation = 1
    #set flag_Weather to 0
    flag_Weather = 0
else:
    #set flag_Weather to 1
    flag_Weather = 1
    #Set flag_Precipitation to 0
    flag_Precipitation = 0

#precip_file defined by Python function os.path.join with 1st part of precip_find string variable
precip_file = os.path.join(os.getcwd(), str(precip_find[0]))

#Create Precipitation.csv as time series of Giovanni IMERGE precipitation 
precip_file_copy = os.path.join(os.getcwd(), 'Giovanni_IMERGE_Data.csv')
#Create working copies using shutil function
shutil.copy(precip_file, precip_file_copy)

if flag_Weather == 1:
    #find Weather file, create variable containing full path name. 
    weather_find = glob.glob('Weather.csv')  
    weather_file = os.path.join(os.getcwd(), str(weather_find[0]))
    #Create working copy directories
    print("Weather.wc.csv and Weather.2.csv will appear in your directory as temporary files during merge.")
    weather_file_copy = os.path.join(os.getcwd(), 'Weather.wc.csv')
    #Create working copies
    shutil.copy(weather_file, weather_file_copy)


#with open Python command uses precip_file_copy, named f0
# --- Read Giovanni IMERG CSV in the new format and convert to hourly m ---

# Read all non-empty lines
with open(precip_file_copy) as f0:
    all_lines = [line.strip() for line in f0 if line.strip() != ""]

# Find the header line that starts the actual time series
header_idx = None
for i, line in enumerate(all_lines):
    if line.startswith("Timestamp (UTC)"):
        header_idx = i
        break

if header_idx is None:
    raise RuntimeError("Could not find 'Timestamp (UTC)' header in Giovanni file.")

# Build a mini-CSV string from the header + data lines and read with pandas
data_block = "\n".join(all_lines[header_idx:])
df_raw = pd.read_csv(io.StringIO(data_block))

# Parse time and data; this handles both 'YYYY-MM-DD HH:MM' and 'YYYY-MM-DD HH:MM:SS'
df_raw["time_dt"] = pd.to_datetime(df_raw["Timestamp (UTC)"], errors="coerce")
df_raw["Data"] = pd.to_numeric(df_raw["Data"], errors="coerce")

# Keep only valid rows
df_raw = df_raw.dropna(subset=["time_dt", "Data"]).sort_values("time_dt")

# Print a helpful message similar to your original script
first_datetime = df_raw["time_dt"].iloc[0]
print(f"Preparing to merge precipitation data starting at {first_datetime}")
print("If a different date is needed, then stop and edit the script to filter the time range.")

# IMERG half-hourly: Data = mm/hr at 30-min resolution
HALF_HOUR_HOURS = 0.5
RATIO_M_TO_MM = 1.0 / 1000.0

# Convert rate (mm/hr) to 30-min depth (mm)
df_raw["precip_mm"] = df_raw["Data"] * HALF_HOUR_HOURS

# Aggregate to hourly total depth (mm)
hourly = (
    df_raw.set_index("time_dt")["precip_mm"]
          .resample("H")
          .sum()
          .reset_index()
)

# Convert mm → m and build string date/time columns
hourly["YYYYMMDD"] = hourly["time_dt"].dt.strftime("%Y%m%d")
hourly["HH:MM:SS"] = hourly["time_dt"].dt.strftime("%H:%M:%S")
hourly["Precipitation_m"] = hourly["precip_mm"] * RATIO_M_TO_MM

# === These lists feed directly into the rest of your script ===
output_data_date = hourly["YYYYMMDD"].tolist()
output_data_time = hourly["HH:MM:SS"].tolist()
Giovanni_precipitation_data_mph = hourly["Precipitation_m"].tolist()

#df_p panda dataframe is created to contain the three lists of Date, Time, Precipitation
df_p = pd.DataFrame({
    'YYYYMMDD': output_data_date,
    'HH:MM:SS': output_data_time,
    'Precipitation_Rate_mph(m/h)': Giovanni_precipitation_data_mph})

#output_file_precip combines the directory path and Precipitation.csv, using os.path.join function
output_file_precip = os.path.join(os.getcwd(), 'Precipitation.csv')
#df_p is converted to a csv file, using function .to_csv with output_file_precip
df_p.to_csv(output_file_precip, index=False)

print("Processed all Giovanni precipitation data")

#If flag_Weather is true then write precipitation to Weather.csv
#If flag_Weather is true then write precipitation to Weather.csv
if flag_Weather == 1:
    # output_data_weather will hold the updated Weather.csv rows
    output_data_weather = []

    # Build a lookup from (date, time) -> Giovanni precip (m)
    precip_map = {
        (output_data_date[i], output_data_time[i]): Giovanni_precipitation_data_mph[i]
        for i in range(len(Giovanni_precipitation_data_mph))
    }

    # Open weather file
    with open(weather_file_copy) as f1:
        lines_weather = csv.reader(f1)
        Weather_HydroPlus_data = [row for row in lines_weather]

        # Get row counts
        row_count_weather = len(Weather_HydroPlus_data)
        row_count_precip = len(Giovanni_precipitation_data_mph)
        print("Number of rows of precipitation data = {}.".format(row_count_precip))

        # Warnings about length mismatch (but do NOT fail)
        if row_count_weather == row_count_precip:
            print("Excellent: Date range matches for weather.csv and precipitation file.")
            print("Writing final weather file now.")
        if row_count_weather > row_count_precip:
            print("Concern: Weather observations appear to outnumber precipitation observations.")
            print("Giovanni precipitation will be merged only where dates/times match.")
        if row_count_weather < row_count_precip:
            print("Concern: Precipitation observations appear to outnumber weather observations.")
            print("Giovanni precipitation will be merged only where dates/times match.")

        # Header row
        Weather_header = Weather_HydroPlus_data[0]
        Weather_header_len = len(Weather_header)
        output_data_weather.append(Weather_header)

        # Stats for reporting
        n_match = 0
        n_nomatch = 0

        # Loop over weather rows (skip header at j = 0)
        for j in range(1, row_count_weather, 1):
            row = Weather_HydroPlus_data[j]

            # Skip empty rows defensively
            if not row:
                output_data_weather.append(row)
                continue

            YYYYMMDD = row[0]
            HHMMSS   = row[1]
            key = (YYYYMMDD, HHMMSS)

            # Determine existing precip column index and other fields
            if Weather_header_len == 9:
                # [0]YYYYMMDD [1]HHMMSS [2]Tair [3]Tdew [4]NetRad [5]WndSpd [6]AtmPres [7]Precip [8]SnowDepth
                Tair_F      = row[2]
                Tdew_F      = row[3]
                NetRad_Wpm2 = row[4]
                WndSpd_mps  = row[5]
                AtmPres_kPa = row[6]
                existing_precip = row[7]  # keep if no Giovanni match
                SnowDepth_m  = row[8]

                if key in precip_map:
                    n_match += 1
                    Precip_mph = f"{float(precip_map[key]):.8f}"
                else:
                    n_nomatch += 1
                    Precip_mph = existing_precip

                row_data_weather = [
                    YYYYMMDD, HHMMSS, Tair_F, Tdew_F,
                    NetRad_Wpm2, WndSpd_mps, AtmPres_kPa,
                    Precip_mph, SnowDepth_m
                ]

            elif Weather_header_len == 10:
                # [0]YYYYMMDD [1]HHMMSS [2]Tair [3]Tdew [4]NetRad [5]WndSpd [6]WindDir [7]AtmPres [8]Precip [9]SnowDepth
                Tair_F      = row[2]
                Tdew_F      = row[3]
                NetRad_Wpm2 = row[4]
                WndSpd_mps  = row[5]
                WindDir_deg = row[6]
                AtmPres_kPa = row[7]
                existing_precip = row[8]
                SnowDepth_m  = row[9]

                if key in precip_map:
                    n_match += 1
                    Precip_mph = f"{float(precip_map[key]):.8f}"
                else:
                    n_nomatch += 1
                    Precip_mph = existing_precip

                row_data_weather = [
                    YYYYMMDD, HHMMSS, Tair_F, Tdew_F,
                    NetRad_Wpm2, WndSpd_mps, WindDir_deg,
                    AtmPres_kPa, Precip_mph, SnowDepth_m
                ]
            else:
                # Unexpected header length; just pass row through unchanged
                n_nomatch += 1
                row_data_weather = row

            output_data_weather.append(row_data_weather)

    # Convert updated weather data to DataFrame and write Weather2.csv
    df_w = pd.DataFrame(output_data_weather)
    output_file_weather = os.path.join(os.getcwd(), 'Weather2.csv')
    df_w.to_csv(output_file_weather, header=False, index=False)

    print("Processed all Weather.csv and Giovanni data.")
    # --- REPORT MERGE STATUS AND WARN USER IF SERIES LENGTHS DIFFER ---

    print("")
    print("------------------------------------------------------------")
    print(f"Giovanni precip successfully merged into {n_match} Weather.csv rows.")

    if n_nomatch > 0:
        # Last Giovanni timestamp
        last_giovanni_time = hourly['time_dt'].max()
        last_giovanni_date = last_giovanni_time.strftime("%Y-%m-%d %H:%M:%S")

        print("************************************************************")
        print(" ***  WARNING: INCOMPLETE PRECIPITATION COVERAGE  ***")
        print("************************************************************")
        print(f"{n_nomatch} Weather.csv rows **did NOT** have matching Giovanni timestamps.")
        print("")
        print("These rows retain their existing precipitation values, which may")
        print("include zeros or placeholder values, leading to incorrect")
        print("precipitation totals for the affected dates.")
        print("")
        print(f"  ➤ Last available Giovanni precipitation time: {last_giovanni_date}")
        print("")
        print("Please verify whether the Weather.csv date range exceeds the")
        print("Giovanni file's coverage and adjust your WeatherPrepConfig.xml")
        print("date range or obtain additional Giovanni data if needed.")
        print("************************************************************")
    else:
        print("All Weather.csv rows had matching Giovanni timestamps.")
    print("------------------------------------------------------------")
    print("")


            
    #df_w panda dataframe created to contain output_data_weather
    df_w = pd.DataFrame(output_data_weather)
    #output_file_weather combines the directory path and Weather2.csv, using os.path.join function
    output_file_weather = os.path.join(os.getcwd(), 'Weather2.csv')
    #df_w is converted to a csv file, using function .to_csv with output_file_weather
    df_w.to_csv(output_file_weather, header= False, index= False)
    
    print("Processed all Weather.csv and Giovanni data")

#If flag_Weather is true then remove and copy certain files
if flag_Weather == 1:        
    os.remove(weather_file_copy)
    #copies new CSV (Weather2) to the same name as the old weather_file
    shutil.copy(output_file_weather, weather_file)
    os.remove(output_file_weather)
    os.remove(output_file_precip)
    print("Weather.csv was updated and is ready within the folder with the raw precipitation data.")
    print("File path: {}".format(weather_file))
else:
    print("Precipitation.csv was generated and is ready within the folder with the raw precipitation data.")
    print("File path: {}".format(output_file_precip))

print("Script has completed. Check above output files for results.")
print("Thank you for using i-Tree tools to improve the world!")

#delete all intermediate files 
os.remove(precip_file_copy)
