#!/usr/bin/env python3

"""
Create a mapping from country names (CTRY_NAME) to CC codes
using the file: emshr_lite_WBAN_or_USAF_to_GHCND.txt

Run this script from the directory containing that file:

    python z_getCountryCodes_GHCNd.py
"""

from collections import OrderedDict

FILENAME = "emshr_lite_WBAN_or_USAF_to_GHCND.txt"


def main():
    with open(FILENAME, encoding="utf-8") as f:
        # First line is the header with column names
        header = f.readline().rstrip("\n")

        # Detect column positions
        start_cc = header.index("CC")
        start_ctry = header.index("CTRY_NAME")

        # Find next column after CTRY_NAME (starts with " ST ")
        start_st = header.index(" ST", start_ctry + len("CTRY_NAME"))

        country_to_cc = {}

        # Process all remaining lines
        for line in f:
            stripped = line.strip()
            if not stripped:
                continue

            # Skip dashed separator lines
            if stripped.startswith("-"):
                continue

            # Ensure the line is long enough
            if len(line) <= start_cc:
                continue

            cc = line[start_cc:start_cc + 2].strip()
            ctry_name = line[start_ctry:start_st].strip()

            if not ctry_name or not cc:
                continue

            key = ctry_name.lower()

            # Store first-seen CC for each country
            if key not in country_to_cc:
                country_to_cc[key] = cc

    # Print as a Python dict
    print("# iso_guess is a dict of country names used to find GHCN country codes.")
    print("# ALL KEYS MUST BE LOWERCASE.")
    print("iso_guess = {")
    for country in sorted(country_to_cc):
        code = country_to_cc[country]
        print(f'    "{country}": "{code}",')
    print("}")


if __name__ == "__main__":
    main()
