#!/usr/bin/env python
import os
import sys
import math
import datetime as dt
import decimal
import re
import argparse
import pandas as pd
import numpy as np
import scipy as sp
from scipy.stats import genextreme as gev
from datetime import timedelta as td


###############################################################################
# Calculates metrics for a given daily stream data
#
# jwon - last edit 11/21/2017
# 06/06/2016 - Added -k option to keep NaN data with threshold
#            - Fixed the dates, wday, and flow optiosn
#            - Added -h header option
# 06/07/2016 - Fixed the -k options for duplicate entries.
#              Added -k implementations for months
#            - Fixed diagnostic view for missing months
# 06/20/2016 - Added decimals for trailing zeros
# 09/21/2017 - Added extreme statistic return year options
#            - Added option to choose peak and low day averages (q)
#            - Added option for frequency from magnitude
# 11/16/2017 - Added options to skip n lines in the beginning of file [--skip]
# 11/21/2017 - Added option to list na values [--na]
#            - Added backbone for option to change the delimiter [--ifs];
#              need to check -,/\t vs -, /\t compatability
# 04/21/2018 - Fixed return frequency functions pfreq and lfreq
###############################################################################

#ifile = "/home/disk/tsuga2/jswon11/workdir/2020-07_Snoho-autocalibration/WRF_runs/merge/class36_500k_giss-e2-h_RCP85_1969-2099/Pilchuck01.day"
wyfmt = 'A-SEP'

# Read data file and formats it to a dataframe. Expects year, month, day followed by data
def read_data(ifile, delim="[ \t:,-]+", hdr=None, skip=0):
    df = pd.read_csv(ifile, sep=delim, engine='python', header=hdr, skiprows=skip)
    dates = df.loc[:,0].astype(str) + " "  + df.loc[:,1].astype(str)
    dates = pd.to_datetime(dates, format="%Y %m")
    days = (round(df.loc[:,2] * 24)/24).apply(lambda x: td(days=x-1))
    dates = dates + days
    df.index = dates
    df = df[df.columns[3:]]
    return df


def filter_year(df, styr=0, edyr=9999):
    df = df[(df.index.year >= styr) & (df.index.year <= edyr)]
    return df    
    

def filter_wyear(df, styr, edyr):
    wyear = df.index.year + (df.index.month > 9)
    return df[(wyear >= styr) & (wyear <= edyr)]

# Apply monthly and yearly qc checks to the data based on provided threshold
def apply_threshold(df, t):
    stdt = df.index[0]
    eddt = df.index[-1]
    mins = round(((eddt - stdt) / len(df)).seconds / 60)
    dates = pd.date_range(stdt, eddt, freq="{}min".format(mins))
    dates = pd.DataFrame(dates, index=dates)
    
    # Apply monthly threshold
    m1 = df.resample('M').count()
    m2 = dates.resample('M').count()
    
    
    # Apply water year threshold
    


    

def save_table(df, out, naVal='NaN', keepHeader=False, verbose=False):
    df = df.fillna(naVal)

    if verbose:
        print(df)
        
    df.to_csv(out, index=False, header=keepHeader)
    
# Resample the timeseries to a different timstep
def calc_timeseries(df, freq, agg='mean'):
    df = df.resample(freq).agg(agg)


# Return dataframe aggregated to water yearly timeseries
def calc_wyearly(df, agg='mean'):
    df = df.resample(wyfmt).agg(agg)
    return df


def calc_monthlyavg(df, agg='mean'):
    df = df.groupby(df.index.month).agg(agg)
    df.index.name = 'Month'
    return df
    


# Returns peak value for each water year along with date of peak
def calc_peak(df, wdw=1):    
    df = df.rolling(window=wdw, center=True).mean().dropna()
    idx = calc_wyearly(df, agg=np.nanargmax).astype(int)
    shift = df.resample(wyfmt).count().shift(1).fillna(0).cumsum()
    idx = (idx + shift).astype(int).values.flatten()
    df = df.iloc[idx]
    #df['WYear'] = df.index.year + (df.index.month > 9)
    df.columns = ['PeakFlow']
    df.index.name = 'DATE'
    return df


# Returns low value for each water year along with date of low
def calc_low(df, wdw=7):
    df = df.rolling(window=wdw, center=True).mean().dropna()    
    idx = calc_wyearly(df, agg=np.nanargmin).astype(int)
    shift = df.resample(wyfmt).count().shift(1).fillna(0).cumsum()
    idx = (idx + shift).astype(int).values.flatten()
    df = df.iloc[idx]
    
    df.columns = ['LowFlow']
    df.index.name = 'DATE'

    return df

def custagg(x):
    idx = x[[np.nanargmax(x)]]
    print(idx, idx.index)
    return idx.index
    

def get_extreme_moments(df):
    edf = df.dropna()
    edf = pd.DataFrame(np.sort(edf.values, axis=0)[::-1], index=edf.index, columns=edf.columns)
    edf = edf.reset_index(drop=True)    
    edf['Rank'] = edf.index + 1
    n = len(edf.index)

    edf['B1i'] = (n - edf['Rank']) / (n * (n - 1)) * edf[edf.columns[0]]
    edf['B2i'] = edf['B1i'] * (n - 1 - edf['Rank']) / (n - 2)
    edf['B3i'] = edf['B2i'] * (n - 2 - edf['Rank']) / (n - 3)
    return edf


def save_quantile():
    return

def get_gev_params(edf):
    B = [0, 0, 0, 0, 0]
    L = [0, 0, 0, 0, 0]
    c = [0, 0, 0, 0]
    kappa = [0, 0, 0, 0]
    alpha = [0, 0, 0, 0]
    psi = [0, 0, 0, 0]
    gev = pd.DataFrame({"kappa": [3, 0, 0, 0],
                        "alpha": [2, 0, 0, 0],
                        "psi": [0, 0, 0, 0]})
    
    B[0] = edf[edf.columns[0]].mean()
    B[1] = edf['B1i'].sum()
    B[2] = edf['B2i'].sum()
    B[3] = edf['B3i'].sum()
    
    L[1] = B[0]
    L[2] = 2 * B[1] - B[0]
    L[3] = 6 * B[2] - 6 * B[1] + B[0]
    
    # #print("B0: " + str(B[0]) + " B1: " + str(B[1])
    # #      + " B2: " + str(B[2]) + " B3: " + str(B[3]))
    # print("L1: " + str(L[1]) + " L2: " + str(L[2]) + " L3: " + str(L[3]))
    
    # GEV distribution using L moments
    if (L[3] + 3 * L[2]) == 0:
        c[0] = 1 - 0.630930
    else:
        c[0] = (2 * L[2]) / (L[3] + 3 * L[2]) - 0.630930
        gev.loc[0, 'kappa'] = 7.8590 * c[0] + 2.9554 * c[0] * c[0]
        gamma = math.gamma(1 + gev.loc[0, 'kappa'])
        gev.loc[0, 'alpha'] = gev.loc[0, 'kappa'] * L[2] / (gamma * (1 - math.pow(2, -gev.loc[0, 'kappa'])))
        gev.loc[0, 'psi'] = L[1] + gev.loc[0, 'alpha'] * (gamma - 1) / gev.loc[0, 'kappa']
        
    # GEV parameters based on LH2 moments (Wang 1997)
    a2 = [0.5914, -2.3351, 0.6442, -0.1616]
    
    gev.loc[1, 'kappa'] = a2[0] + a2[1]
    gamma = math.gamma(1 + gev.loc[1,  'kappa'])
    # #alpha[1] = LH2[1] / (2 * gamma / kappa[1] * math.e(-1 * kappa[1] * math.log(3.0)) - math.e(-1 * kappa[1] * math.log(40)))
    # #psi[1] = LH2[0] - alpha[1] / kappa[1] * (1.0 - gamma * math.e(10 * kappa[1] * log(30)))
    
    # GEV parameters based on LH4 moments (Wang 1997)
    a4 = [0.7113, -2.5383, 0.5142, -0.1027]
    
    # EV1 parameters based on L moments
    gev.loc[3, 'alpha'] = 1.443 * L[1]
    gev.loc[3, 'psi'] = L[0] - 0.5772 * gev.loc[3, 'alpha']
    
    # #print("c: " + str(c[0]) + " k: " + str(kappa[0]) )
    # #print("gamma: " + str(gamma) + " alpha:  " + str(alpha[0]) + " psi: " + str(psi[0]))
    return gev


def get_extremes(gev, exList, isFlood):
    if(isFlood):
        edf = pd.DataFrame({"prob": 1 - 1 / exList})   # Peak Flows
    else:
        edf = pd.DataFrame({"prob": 1 / exList})       # Low Flows
    #edf = pd.DataFrame({"prob": 1 - 1 / exList})
    #edf = pd.DataFrame(index= 1-1/exList)    
    edf.index.name = 'prob'
    
    a = gev.loc[0, 'alpha']
    k = gev.loc[0, 'kappa']
    p = gev.loc[0, 'psi']
    
    #edf['dist0'] = gev.loc[0, 'psi'] + gev.loc[0, 'alpha'] / gev.loc[0, 'kappa'] * (1 - (np.power(-np.log(edf['prob']), gev.loc[0, 'kappa'])))
    edf['dist0'] = p + a / k * (1 - (np.power(-np.log(edf.prob), k)))
    #edf.loc[:, edf.columns != 'prob'] = (edf.loc[:, edf.columns != 'prob'].astype(np.double))
    edf = edf.astype(np.double)
    edf.index = exList.astype(str)
    edf.index.name = "RtnYr"
    
    return edf
                                            


def calc_extremes(df, probs, isFlood):
    #probs = np.array([1.0101,2,5,10,20,25,50,100,500])
    probs = np.array(probs)
    pgev = get_extreme_moments(df)
    pgev = get_gev_params(pgev)
    edf = get_extremes(pgev, probs, isFlood)        
    return edf


def use_bootstrap(df, func, ranks=[5,50,95], use_wyr=True, **kwargs):
    if use_wyr:
        yrs = (df.index.year + (df.index.month > 9)).unique()
    else:
        yrs = df.index.year.unique()
    
    mcs = [ df[df.index.year != x] for x in yrs ]
    mcs = [ func(x, **kwargs) for x in mcs ]
    #
    idxs = mcs[0].index
    cols = mcs[0].columns
    #
    mf = pd.concat(mcs, keys=range(len(mcs)), axis=0).reset_index(level=0)
    df = pd.DataFrame(index=idxs, columns=cols)
    pq = [ np.percentile(mf[mf.index==i][j], ranks) for i in idxs for j in cols ]    
    #
    df = pd.DataFrame(pq, index=idxs, columns=ranks) # Possible rework for multiple data poitns
    return df





def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('input_file')
    parser = argparse.ArgumentParser()
    parser.add_argument('input_file')
    # Threshold (between 0 to 1).
    parser.add_argument(
        '-t', nargs='?', default=0,
        help=('Apply a threshold for NaN calculations. Choose between 0 to 1.' + 
              " 0 accepts all. 1 requires all data points."))
    # d: Daily Data
    parser.add_argument(
        '-d', action='store_true', default=False,
        help='Creates a daily data file.')
    # m: Monthly Average
    parser.add_argument(
        '-m', action='store_true', default=False,
        help='Calculates average monthly data.')
    # p: Water yearly Peak flow + Date
    parser.add_argument(
        '-p', action='store_true', default=False,
        help='Calculates peak flow by water year.')
    # l: Water yearly min 7-day rolling flow
    parser.add_argument(
        '-l', action='store_true', default=False,
        help='Calculates minimum 7-day average flow by water year.')
    
    
    # parser.add_argument('-q', action='store_true', default=False,
    # help='Toggles calculation weekly low flow vs daily low flow.
    # Default weekly.') # q: 7q10 vs q10
    
    # pq: 7q10 vs q10
    parser.add_argument(
        '--pq', action='store', type=int, default=1,
        help='Choose the number of days to include in a rolling flow. (Peak)')
    # lq: 7q10 vs q10
    parser.add_argument(
        '--lq', action='store', type=int, default=7,
        help='Choose the number of days to include in a rolling flow. (Low)')
    
    # pfreq: Flood frequency from magnitude <- provide list
    parser.add_argument(
        '--pfreq', action='store', type=str,  default="",
        help='Calculates frequency from streamflow magnitudes')
    # lfreq: Low frequency from magnitude <- provide list
    parser.add_argument(
        '--lfreq', action='store', type=str,  default="",
        help='Calculates frequency from streamflow magnitudes')
    
    # g: Monthly Average + group by months
    parser.add_argument(
        '-g', action='store_true', default=False,
        help='Calculates monthly averages grouped by months.')
    # y: Water Yearly Average
    parser.add_argument(
        '-y', action='store_true', default=False,
        help='Calculates water yearly averages.')
    # e: Extreme statistics
    parser.add_argument(
        '-e', action='store_true', default=False,
        help='Calculates extreme statistics.')
    # gev:
    parser.add_argument(
        '--gev', action='store_true', default=False,
        help='Save GEV parameters as output when calculating ' +
        'frequency from streamflow magnitudes.')
    
    # expyr: Specify return years used for peak flow extreme statistics.
    #        <- provide comma delimited list
    parser.add_argument(
        '--expyr', type=str,  default="2,10,50,100",
        help='Specify return years to use for extreme statistics.'
        + ' Input comma delimited list. Default: 2,10,50,100')
    # Must be greater than 1
    # exlyr: Specify return years used for low flow extreme statistics.
    #        <- provide comma delimited list
    parser.add_argument(
        '--exlyr', type=str, default="2,10",
        help='Specify return years to use for extreme statistics. '
        + 'Input comma delimited list. Default: 2,10')
    # na: Provide list of values to treat as NA.
    #     <- provide comma delimited list
    parser.add_argument(
        '--na', type=str, default="NA,NaN, ",
        help='Provide list of values to treat as NA. '
        + 'Input comma delimited list. Default NA,NaN, .')
    # ifs: Provide list of values to treat as NA.
    #      <- provide string match expression
    # parser.add_argument('--ifs', type=str, default="-, /\t",
    # help='Provide list of values to treat as input delimiters.
    # Input string expression. Provide Default \"-, /\t\"')
    
    # s: Strips the date to water years
    parser.add_argument(
        '-s', action='store_true', default=False,
        help='Strips the dates to start and end on water year.')
    # x: Diagnostics
    parser.add_argument(
        '-x', action='store_true', default=False,
        help='Diagnostics table showing NA for each month and year')
    # k: Keep NaN with threshold
    parser.add_argument(
        '-k', action='store_true', default=False,
        help='Keep NaN in dataset when using threshold.')
    # v: Output results on screen
    parser.add_argument(
        '-v', action='store_true', default=False,
        help='Outputs results on screen.')
    # skip: Skip the first [n] line of files
    parser.add_argument(
        '--skip', nargs='?', type=int, default=0,
        help='Skips the first [n] lines of file.')
    # header: Uses the first line as header. Default assumes no header
    parser.add_argument(
        '--header', action='store_true', default=False,
        help='Uses the first line as header. Default assumes no header.')
    # headout: Turns off header on output
    parser.add_argument(
        '--headout', action='store_false', default=True,
        help='No header on output. Header enabled by default.')
    # start: Cut data to only include data after given start year
    parser.add_argument(
        '--start', nargs='?', type=int, default=0,
        help="Specify start year to cut.")
    # end: Cut data to only include data before given end year
    parser.add_argument(
        '--end', nargs='?', type=int, default=0,
        help="Specify end year to cut.")
    # dates:
    parser.add_argument(
        '--dates', action='store_true', default=False,
        help='Creates a dates only csv for peak or low flow.')
    # wday:
    parser.add_argument(
        '--wday', action='store_true', default=False,
        help='Creates a wday only csv for peak or low flow.')
    # flow:
    parser.add_argument(
        '--flow', action='store_true', default=False,
        help='Creates a flow only csv for peak or low flow.')
    # quantiles:
    parser.add_argument(
        '--quantiles', action='store_true', default=False,
        help='Create a quantile file during stat calculations.')
    # name:
    parser.add_argument(
        '--name', type=str, default=None,
        help='Use a custom filename for outputs.')
    parser.add_argument('out_dir', nargs='?', default=os.getcwd())
    args = parser.parse_args()
    infile = args.input_file
    
    if float(args.t) < 0 or float(args.t) > 1:
        print('ERROR: Threshold out of range. Please choose between 0 and 1.')
        sys.exit()

        
    thold = float(args.t)
    prec = 2
    naVal = 'NaN'


    # ---Variables---
    
    if (args.name is None):
        filename = os.path.splitext(os.path.basename(infile))[0].split('.')[0]
    else:
        filename = args.name

    

    # ---Check directory---
    out_dir = args.out_dir + '/'
    if (not os.path.isdir(args.out_dir)):
        print("Given output directory doesn't exist.")
        print("   " + out_dir)
        sys.exit()

    fout = out_dir + filename
    outDaily = fout + "_DailyFlows.csv"
    outMonthAvg = fout + "_MonthlyFlows.csv"
    outYearlyAvg = fout + "_YearlyFlows.csv"
    outPeak = fout + "_PeakFlows"
    outLow = fout + "_LowFlows"
    outGroupMonth = fout + "_MonthlyAvg.csv"
    outDiagn = fout + "_NAView"
    outExQPeak = fout + "_PeakQuantile.csv"
    outExQLow = fout + "_LowQuantile.csv"
    outExSPeak = fout + "_PeakStats.csv"
    outExSLow = fout + "_LowStats.csv"
    outFqPeak = fout + "_PeakFreq.csv"
    outFqLow = fout + "_LowFreq.csv"
    outGEVPeak = fout + "_PeakGEV.csv"
    outGEVLow = fout + "_LowGEV.csv"


    # ----------------------------------Calculations-------------------------------


    
    if args.d:
        ddf = calc_timeseries(df, 'D')     # Get Daily timeseries    
    if args.m:
        mdf = calc_timeseries(df, 'M')     # Get Monthly timeseries
    if args.y:
        wdf = calc_timeseries(df, 'A-OCT')    # Get Water Yearly timeseries




        
    
    
        
if "__name__" == "__main__":
    main()
            
#print(read_data(ifile))




'''
sys.exit()



def print_full(x):
    pd.set_option('display.max_rows', len(x))
    print(x)
    pd.reset_option('display.max_rows')









# ---Intializing dataframes---
mdf = pd.DataFrame()   # Monthly Average Dataframe
pdf = pd.DataFrame()   # Water year Peak Dataframe
ldf = pd.DataFrame()   # Water year min 7 day average Dataframe
gdf = pd.DataFrame()   # Monthly Grouped average Dataframe
pgev = pd.DataFrame()  # Peak GEV Params
lgev = pd.DataFrame()  # Low GEV Params
epdf = pd.DataFrame()  # Extreme PeakDataframe
eldf = pd.DataFrame()  # Extreme Low Dataframe
xdf = pd.DataFrame()   # Diagnostics Dataframe
tmdf = pd.DataFrame()  # Threshold Monthly Dataframe
tydf = pd.DataFrame()  # Threshold Yearly Dataframe
gy = pd.DataFrame()    # WYear only Dataframe (For adding back NaN)
gm = pd.DataFrame()    # WYear + Month only Dataframe (For adding back NaN)

# ---Prepare list params---
pl_func = lambda x: [float(y) for y in x.split(',') if x]
pfreqList = pl_func(args.pfreq)
lfreqList = pl_func(args.lfreq)
expyrList = np.array(pl_func(args.expyr))
exlyrList = np.array(pl_func(args.exlyr))
naList = args.na.split(',')
# ifsList = "[" + args.ifs + ']*' ### TODO: allow user provided delimiters
ifsList = "[-, /\t]+"

# ====================Preperation====================
# ---Load data---
if(args.header):
    df = pd.read_csv(infile, sep=ifsList, engine='python',
                     skiprows=args.skip, na_values=naList)
else:
    df = pd.read_csv(infile, sep=ifsList,engine='python', header=None,
                     skiprows=args.skip, na_values=naList)
df.columns = ['Year', 'Month', 'Day', 'Flow']


# Strip data to only include within given years
if(args.start > 0):
    df = df[df.Year >= args.start]

if(args.end > 0):
    df = df[df.Year <= args.end]
df = df.reset_index()

if (len(df) == 0):
    print("No data in range. Check data or adjust date range.")
    sys.exit()

# Filter out non-numeric data
# -> For older version of pandas
# #df['Flow'] = df['Flow'].convert_objects(convert_numeric=True)

# -> Requires pandas 0.17.0
df['Flow'] = pd.to_numeric(df['Flow'], errors='coerce')

# Get Water Years
df['WYear'] = np.vectorize(get_wyear)(df['Year'], df['Month'])
# #print_full(df)

# Strip dates to match water years
if (args.s):
    ind = 0

    # TODO: Failsafe checks for missing data
    # Behavior Check: If the following year is empty,
    # should it try the next year?

    # Earlier than 10/01
    if(df.iloc[0].Month < 10):
        # -> drop everything in that year to 10/01
        ind = df.loc[(df['Year'] == df.iloc[0].Year)
                     & (df['Month'] == 10)
                     & (df['Day'] == 1)].index[0]
    # Later than 10/01
    if(df.iloc[0].Month >= 10 and df.iloc[0].Day > 1):
        # -> drop everything till 10/01 of the following year
        ind = df.loc[(df['Year'] == df.iloc[0].Year + 1)
                     & (df['Month'] == 10)
                     & (df['Day'] == 1)].index[0]
    # Strip top
    if(ind > len(df.index)):
        print("ERROR: Not enough data to perform data stripping.")
        sys.exit()
    df = df[ind:].reset_index()

    # Ends later than 09/30
    if(df.iloc[-1].Month >= 10):
        # -> drop everything in that year to 9/30
        ind = df.loc[(df['Year'] == df.iloc[-1].Year)
                     & (df['Month'] == 9)
                     & (df['Day'] == 30)].index[0] + 1
        df = df[:ind]
    # Ends earlier than 09/30
    if(df.iloc[-1].Month <= 9 and df.iloc[-1].Day < 30):
        # -> drop everything till 09/30 of the previous year
        ind = df.loc[(df['Year'] == df.iloc[-1].Year-1)
                     & (df['Month'] == 9)
                     & (df['Day'] == 30)].index[0] + 1
        df = df[:ind]

    # TODO: check there is enough data when stripping from below


# Clear out data outside of threshold for months
def threshold_m(df, gm):
    tmdf = df[['Year', 'Month', 'Flow']].groupby(['Year', 'Month'])
    tmdf = tmdf.agg(['count', 'size']).reset_index()
    tmdf.columns = tmdf.columns.droplevel()
    tmdf.columns = ['Year', 'Month', 'count', 'size']
    tmdf['Thold'] = tmdf['count'].div(tmdf['size'], axis='index')
    tmdf = tmdf[tmdf.Thold >= thold]
    tmdf = pd.merge(tmdf[['Year', 'Month']], df,
                    on=['Year', 'Month'], how='left')
    if(args.k):
        gm = df.groupby(['Year', 'Month']).count()
        gm = gm.reset_index()[['Year', 'Month']]
    tmdf = tmdf[['WYear', 'Year', 'Month', 'Day', 'Flow']]

    # Check that there is enough data
    if(tmdf.empty):
        print("ERROR: Not enough data for processing.")
        sys.exit()
    return (tmdf, gm)


# Clear out data outside of threshold for water years
def threshold_y(df, gy):
    tydf = df[['WYear', 'Flow']].groupby(['WYear'])
    tydf = tydf.agg(['count', 'size']).reset_index()
    tydf.columns = tydf.columns.droplevel()
    tydf.columns = ['WYear', 'count', 'size']
    tydf['Thold'] = tydf['count'].div(tydf['size'], axis='index')
    tydf = tydf[tydf.Thold >= thold]
    tydf = pd.merge(tydf[['WYear']], df, on=['WYear'], how='left')
    if(args.k):
        # #tydf = pd.merge(df[['Year', 'Month', 'Day', 'WYear']], tydf,
        # #on=['Year', 'Month', 'Day', 'WYear'], how='left')
        gy = df.groupby(['WYear']).count()
        gy = gy.reset_index()[['WYear']]
    tydf = tydf[['WYear', 'Year', 'Month', 'Day', 'Flow']]

    # Check that there is enough data
    if(tydf.empty):
        print("ERROR: Not enough data for processing.")
        sys.exit()

    return (tydf, gy)


# ----------------------------------Calculations-------------------------------
# Daily Data
if (args.d):
    print('Formatting Daily Data: ' + filename)
    (tydf, gy) = threshold_y(df, gy)

    # Keep NaN
    if(args.k):
        tydf = pd.merge(
            df[['WYear', 'Year', 'Month', 'Day']],
            tydf, on=['WYear', 'Year', 'Month', 'Day'],
            how='left')

    # Verbose
    if(args.v):
        print(tydf)

    # Output
    out = tydf[['Year', 'Month', 'Day', 'Flow']].fillna(naVal)
    out.to_csv(outDaily, index=False, header=args.headout)


# -----------------------------------------------------------------------------
# Calculate monthly averages
def get_month(tmdf):
    mdf = tmdf[['Year', 'Month', 'Flow']].groupby(['Year', 'Month'])
    mdf = mdf.agg(['mean']).reset_index()
    mdf.columns = mdf.columns.droplevel()
    
    # Cleanup
    mdf.columns = ['Year', 'Month', 'Avg.Flow']
    mdf['Avg.Flow'] = np.round(mdf[['Avg.Flow']].astype(np.double), prec)
    return mdf


if (args.m):
    print('Calculating Monthly Averages: ' + filename)
    if(tmdf.empty):
        (tmdf, gm) = threshold_m(df, gm)
    mdf = get_month(tmdf) 

    # Keep NaN
    if(args.k):
        mdf = pd.merge(gm, mdf, on=['Year', 'Month'], how='left')

    # Verbose
    if(args.v):
        print(mdf)

    # Output
    mdf.fillna(naVal).to_csv(outMonthAvg, index=False, header=args.headout)


# -----------------------------------------------------------------------------
# Calculate water yearly averages
def get_wyear(tydf):
    ydf = tydf[['WYear', 'Flow']].groupby(['WYear'])
    ydf = ydf.agg(['mean']).reset_index()

    # Cleanup
    ydf.columns = ['Year', 'Avg.Flow']
    ydf['Avg.Flow'] = np.round(ydf[['Avg.Flow']].astype(np.double), prec)
    return ydf


if(args.y):
    print('Calculating Yearly Averages: ' + filename)
    if(tydf.empty):
        (tydf, gy) = threshold_y(df, gy)
    ydf = get_wyear(tydf)

    # Keep NaN
    if(args.k):
        ydf = pd.merge(gy, ydf, left_on='WYear', right_on='Year', how='left')
        ydf.drop('Year', axis=1, inplace=True)
        ydf.columns = ['Year', 'Avg.Flow']

    # Verbose
    if(args.v):
        print(ydf)

    # Output
    out = ydf.fillna(naVal)
    out.to_csv(outYearlyAvg, index=False, header=args.headout)


# -----------------------------------------------------------------------------
# Calculate Peak flow by water year
def get_peak(tydf):
    pdf = tydf
    pdf['PeakFlow'] = tydf['Flow'].rolling(window=args.pq, center=False).mean()
    wyears = pd.DataFrame({'WYear':pdf.WYear.unique()})
    pdf = pdf.loc[pdf.groupby('WYear')['PeakFlow'].idxmax().dropna()].reset_index()
    pdf = wyears.merge(pdf, on='WYear', how='left')
    pdf['WYear'] = pd.unique(tydf.WYear.ravel())
    pdf.drop_duplicates(['WYear'], inplace=True)

    # Cleanup
    pdf[['PeakFlow']] = np.round(pdf[['PeakFlow']].astype(np.double), prec)    
    pdf = pdf[['WYear', 'Year', 'Month', 'Day', 'PeakFlow']]

    # #pdf = tydf[tydf.groupby(['WYear'], sort=False)['Flow']
    # #pdf = pdf.transform(max)==tydf['Flow']].copy()
    # #pdf.drop_duplicates(['WYear'], inplace=True)
    # Cleanup
    # #pdf[['Flow']] = np.round(pdf[['Flow']].astype(np.double), prec)
    # #pdf.rename(columns={'Flow':'PeakFlow'}, inplace=True)

    return pdf


if (args.p):
    print('Calculating Water Yearly %d-day Peak Flows: %s'
          % (args.pq, filename))
    if(tydf.empty):
        (tydf, gy) = threshold_y(df, gy)
    pdf = get_peak(tydf)

    # Keep NaN
    if(args.k):
        pdf = pd.merge(gy, pdf, on=['WYear'], how='left')

    # Verbose
    if(args.v):
        print(pdf)

    # Output
    if(args.dates):
        pdft = pdf.copy()
        pdft['Date'] = pdft.apply(
            lambda x:str('{0: 0>4g}'.format(x.Year))
            + "-" + str('{0: 0>2g}'.format(x.Month))
            + "-" + str('{0: 0>2g}'.format(x.Day))
            if pd.notnull(x.PeakFlow) else x.PeakFlow, axis=1)
        out = pdft[['WYear', 'Date']].fillna(naVal)
        out.to_csv(outPeak + "-dates.csv", index=False, header=args.headout)

    if(args.wday):
        pdft = pdf.copy()
        pdft['FDate'] = pdft.apply(
            lambda x: str('{0: 0>4g}'.format(x.Year))
            + "-"
            + str('{0: 0>2g}'.format(x.Month))
            + "-"
            + str('{0: 0>2g}'.format(x.Day))
            if pd.notnull(x.PeakFlow)
            else x.PeakFlow, axis=1)
        pdft['FDate'] = pd.to_datetime(pdft['FDate'], format='%Y-%m-%d')
        pdft['SDate'] = pdft.apply(lambda x: str(x.WYear-1) + "-09-30", axis=1)
        pdft['SDate'] = pd.to_datetime(pdft['SDate'])
        pdft['DoWY'] = (pdft['FDate'] - pdft['SDate']) / np.timedelta64(1, 'D')

        out = pdft[['WYear', 'DoWY']].fillna(naVal)
        out.to_csv(outPeak + "-wday.csv", index=False, header=args.headout)

    if(args.flow):
        out = pdf[['WYear', 'PeakFlow']].fillna(naVal)
        out.to_csv(outPeak + "-Flow.csv", index=False, header=args.headout)

    if(not(args.dates & args.wday & args.flow)):
        out = pdf.fillna(naVal)
        out.to_csv(outPeak + ".csv", index=False, header=args.headout)


# -----------------------------------------------------------------------------
# Calculate 7-day minimum average flow by water year.
# Date returned is the last day of the window
def get_low(tydf):
    ldf = tydf
    # #ldf['LowFlow'] = pd.rolling_mean(tydf['Flow'], window=7)#.shift(-3)
    # # Shift for which day is the center  ## deprecated
    ldf['LowFlow'] = tydf['Flow'].rolling(window=args.lq, center=False).mean()   # Shift for which day is the center

    wyears = pd.DataFrame({'WYear':pdf.WYear.unique()})    
    ldf = ldf.loc[ldf.groupby('WYear')['LowFlow'].idxmin().dropna()].reset_index()
    ldf = wyears.merge(ldf, on='WYear', how='left')

    ldf['WYear'] = pd.unique(tydf.WYear.ravel())
    ldf.drop_duplicates(['WYear'], inplace=True)

    # Cleanup
    ldf[['LowFlow']] = np.round(ldf[['LowFlow']].astype(np.double), prec)
    ldf = ldf[['WYear', 'Year', 'Month', 'Day', 'LowFlow']]

    return ldf


if (args.l):
    print('Calculating Water Yearly %d-day Low Flows: %s'
          % (args.lq, filename))
    if(tydf.empty):
        (tydf, gy) = threshold_y(df, gy)
    ldf = get_low(tydf)

    # Keep NaN
    if(args.k):
        ldf = pd.merge(gy, ldf, on=['WYear'], how='left')

    # Verbose
    if(args.v):
        print(ldf)

    # Output
    if(args.dates):
        ldft = ldf.copy()
        ldft['Date'] = ldft.apply(
            lambda x: str('{0: 0>4g}'.format(x.Year))
            + "-"
            + str('{0: 0>2g}'.format(x.Month))
            + "-"
            + str('{0: 0>2g}'.format(x.Day))
            if pd.notnull(x.LowFlow)
            else x.LowFlow, axis=1)
        out = ldft[['WYear', 'Date']].fillna(naVal)
        out.to_csv(outLow + "-dates.csv", index=False, header=args.headout)

    if(args.wday):
        ldft = ldf.copy()
        ldft['FDate'] = ldft.apply(
            lambda x: str('{0: 0>4g}'.format(x.Year + 1))
            + "-"
            + str('{0: 0>2g}'.format(x.Month))
            + "-"
            + str('{0: 0>2g}'.format(x.Day))
            if pd.notnull(x.LowFlow)
            else x.LowFlow, axis=1)
        ldft['FDate'] = pd.to_datetime(ldft['FDate'])
        ldft['SDate'] = ldft.apply(lambda x: str(x.WYear) + "-09-30", axis=1)
        ldft['SDate'] = pd.to_datetime(ldft['SDate'])
        ldft['DoWY'] = (ldft['FDate'] - ldft['SDate']) / np.timedelta64(1, 'D')

        out = ldft[['WYear', 'DoWY']].fillna(naVal)
        out.to_csv(outLow + "-wday.csv", index=False, header=args.headout)

    if (args.flow):
        out = ldf[['WYear', 'LowFlow']].fillna(naVal)
        out.to_csv(outLow + "-Flow.csv", index=False, header=args.headout)

    if (not(args.dates & args.wday & args.flow)):
        out = ldf.fillna(naVal)
        out.to_csv(outLow + ".csv", index=False, header=args.headout)

# -----------------------------------------------------------------------------
# Calculate Monthly Averages grouped by month
if (args.g):
    print('Calculating Monthly Average grouped by Month: ' + filename)
    if(tmdf.empty):
        (tmdf, gm) = threshold_m(df, gm)
    if(mdf.empty):
        mdf = get_month(tmdf)
    gdf = mdf[['Month', 'Avg.Flow']].groupby('Month')
    gdf = gdf.agg(['mean']).reset_index()
    gdf.columns = gdf.columns.droplevel()
    gdf = gdf.reindex(index=np.roll(gdf.index, 3))
    gdf.columns = ['Month', 'Avg.Flow']

    # Cleanup
    gdf[['Avg.Flow']] = np.round(gdf[['Avg.Flow']].astype(np.double), prec)

    # Verbose
    if(args.v):
        print(gdf)

    # Output
    out = gdf.fillna(naVal)
    out.to_csv(outGroupMonth, index=False, header=args.headout)


# -----------------------------------------------------------------------------
#  Helper function to calculate extreme moments
def get_extreme_moment(edf):
    edf.dropna(inplace=True)
    # #edf.sort(['Flow'], ascending=False, inplace=True) ## DEPRECATED
    edf.sort_values(by=['Flow'], ascending=False, inplace=True)

    edf = edf.reset_index()[['Flow']]
    edf['Rank'] = edf.index + 1
    n = len(edf.index)

    # #edf['B1i'] = edf.apply(
    # #   lambda x: (n - x.Rank) / (n*(n - 1)) * x.Flow, axis=1)
    # #edf['B2i'] = edf.apply(lambda x: ((n - x.Rank) * (n - x.Rank - 1)) / (n * (n - 1) * (n - 2)) * x.Flow, axis=1)
    # #edf['B3i'] = edf.apply(lambda x: ((n - x.Rank) * (n - x.Rank - 1) * (n-x.Rank-2)) / (n * (n - 1) * (n - 2) * (n-3)) * x.Flow, axis=1)
    edf['B1i'] = (n - edf['Rank']) / (n * (n - 1)) * edf['Flow']
    edf['B2i'] = edf['B1i'] * (n - 1 - edf['Rank']) / (n - 2)
    edf['B3i'] = edf['B2i'] * (n - 2 - edf['Rank']) / (n - 3)
    return edf


# Helper function to save quantile outputs
def save_quantile(edf, outname):
    n = len(edf.index)
    edf['Quantile'] = edf.apply(lambda x: (x.Rank - 0.4) / (n + 0.2), axis=1)
    edf[['Quantile']] = np.round(edf[['Quantile']].astype(np.double), prec)

    out = edf[['Quantile', 'Flow']]
    out.to_csv(outname, index=False, header=args.headout)


# Helper function to calculate GEV parameters
def get_gev_param(edf):
    B = [0, 0, 0, 0, 0]
    L = [0, 0, 0, 0, 0]
    c = [0, 0, 0, 0]
    kappa = [0, 0, 0, 0]
    alpha = [0, 0, 0, 0]
    psi = [0, 0, 0, 0]
    gev = pd.DataFrame({"kappa": [3, 0, 0, 0],
                        "alpha": [2, 0, 0, 0],
                        "psi": [0, 0, 0, 0]})

    B[0] = edf['Flow'].mean()
    B[1] = edf['B1i'].sum()
    B[2] = edf['B2i'].sum()
    B[3] = edf['B3i'].sum()

    L[1] = B[0]
    L[2] = 2 * B[1] - B[0]
    L[3] = 6 * B[2] - 6 * B[1] + B[0]

    # #print("B0: " + str(B[0]) + " B1: " + str(B[1])
    # #      + " B2: " + str(B[2]) + " B3: " + str(B[3]))
    # print("L1: " + str(L[1]) + " L2: " + str(L[2]) + " L3: " + str(L[3]))

    # GEV distribution using L moments
    if (L[3] + 3 * L[2]) == 0:
        c[0] = 1 - 0.630930
    else:
        c[0] = (2 * L[2]) / (L[3] + 3 * L[2]) - 0.630930    
    gev.loc[0, 'kappa'] = 7.8590 * c[0] + 2.9554 * c[0] * c[0]
    gamma = math.gamma(1 + gev.loc[0, 'kappa'])
    gev.loc[0, 'alpha'] = gev.loc[0, 'kappa'] * L[2] / (gamma * (1 - math.pow(2, -gev.loc[0, 'kappa'])))
    gev.loc[0, 'psi'] = L[1] + gev.loc[0, 'alpha'] * (gamma - 1) / gev.loc[0, 'kappa']

    # GEV parameters based on LH2 moments (Wang 1997)
    a2 = [0.5914, -2.3351, 0.6442, -0.1616]

    gev.loc[1, 'kappa'] = a2[0] + a2[1]
    gamma = math.gamma(1 + gev.loc[1,  'kappa'])
    # #alpha[1] = LH2[1] / (2 * gamma / kappa[1] * math.e(-1 * kappa[1] * math.log(3.0)) - math.e(-1 * kappa[1] * math.log(40)))
    # #psi[1] = LH2[0] - alpha[1] / kappa[1] * (1.0 - gamma * math.e(10 * kappa[1] * log(30)))

    # GEV parameters based on LH4 moments (Wang 1997)
    a4 = [0.7113, -2.5383, 0.5142, -0.1027]

    # EV1 parameters based on L moments
    gev.loc[3, 'alpha'] = 1.443 * L[1]
    gev.loc[3, 'psi'] = L[0] - 0.5772 * gev.loc[3, 'alpha']

    # #print("c: " + str(c[0]) + " k: " + str(kappa[0]) )
    # #print("gamma: " + str(gamma) + " alpha:  " + str(alpha[0]) + " psi: " + str(psi[0]))
    return gev


# Helper function to calculate extremes
def get_extreme(gev, isFlood):
    if(isFlood):
        edf = pd.DataFrame({"prob": 1 - 1 / expyrList})   # Peak Flows
    else:
        edf = pd.DataFrame({"prob": 1 / exlyrList})       # Low Flows

    a = gev.loc[0, 'alpha']
    k = gev.loc[0, 'kappa']
    p = gev.loc[0, 'psi']

    #edf['dist0'] = gev.loc[0, 'psi'] + gev.loc[0, 'alpha'] / gev.loc[0, 'kappa'] * (1 - (np.power(-np.log(edf['prob']), gev.loc[0, 'kappa'])))
    edf['dist0'] = p + a / k * (1 - (np.power(-np.log(edf['prob']), k)))    
    edf.loc[:, edf.columns != 'prob'] = (edf.loc[:, edf.columns != 'prob'].astype(np.double))

    return edf


# Calculate extreme statistics
if (args.e):
    print('Calculating Extreme Flood Stats: {}'.format(filename))
    if(tydf.empty):
        (tydf, gy) = threshold_y(df, gy)
    if(pdf.empty):
        pdf = get_peak(tydf)
        
    pgev = pdf[['PeakFlow']].copy()
    pgev.columns = ['Flow']    
    pgev = get_extreme_moment(pgev)

    # Quantiles
    if(args.quantiles):
        save_quantile(pgev, outExQPeak)

    pgev = get_gev_param(pgev)
    epdf = get_extreme(pgev, True)
    print(pgev)

    # Verbose
    if(args.v):
        print("\nPeak Flows")
        print(epdf.set_index('prob').T)

    epdf['prob'] = epdf['prob'].apply(lambda x : int(round(1 / (1 - x), 0)))
    epdf = epdf.rename(columns={'prob': 'returnYr'})
    epdf.to_csv(outExSPeak, index=False, float_format='%.2f',
                header=args.headout)

    # --------------------------------------------------------------
    print('Calculating Extreme Low Stats: ' + filename)
    if(ldf.empty):
        ldf = get_low(tydf)

    lgev = ldf[['LowFlow']].copy()
    lgev.columns = ['Flow']
    lgev = get_extreme_moment(lgev)

    # Quantiles
    if(args.quantiles):
        save_quantile(lgev, outExQLow)

    lgev = get_gev_param(lgev)
    eldf = get_extreme(lgev, False)

    # Verbose
    if(args.v):
        print("\nLow Flows")
        print(eldf.set_index('prob').T)

    eldf['prob'] = eldf['prob'].apply(lambda x : int(round((1 / x), 0)))
    eldf = eldf.rename(columns={'prob': 'returnYr'})
    eldf.to_csv(outExSLow, index=False, float_format='%.2f',
                header=args.headout)


## Helper function to calculate frequency from magnitude
def get_freq(gev, freqList, isFlood):
    fqdf = pd.DataFrame({'flow': freqList})

    a = gev.loc[0, 'alpha']
    k = gev.loc[0, 'kappa']
    p = gev.loc[0, 'psi']
    
    fqdf['freq'] = np.exp(-np.power(1- ((fqdf['flow'] - p) * k / a), 1/k))
        
    if(isFlood):
        fqdf['freq'] = 1 / (1 - fqdf['freq'])
    else:
        fqdf['freq'] = 1 / fqdf['freq']

    return fqdf


# Calculate flood frequencies from magnitudes
if(pfreqList):
    print('Calculating flood frequencies from magnitude: ' + filename)
    if(pgev.empty):
        if(tydf.empty):
            (tydf, gy) = threshold_y(df, gy)
        if(pdf.empty):
            pdf = get_peak(tydf)
        pgev = pdf[['PeakFlow']].copy()
        pgev.columns = ['Flow']
        pgev = get_extreme_moment(pgev)
        pgev = get_gev_param(pgev)

    # Calculate magnitude
    pfdf = get_freq(pgev, pfreqList, True)

    # Save output
    if(args.gev):
        pgev.to_csv(outGEVPeak, index=False, float_format='%.2f',
                    header=args.headout)
    pfdf.to_csv(outFqPeak, index=False, float_format='%.2f',
                header=args.headout)

# Calculate low frequencies from magnitudes
if(lfreqList):
    print('Calculating low frequencies from magnitude: ' + filename)
    if(lgev.empty):
        if(tydf.empty):
            (tydf, gy) = threshold_y(df, gy)
        if(ldf.empty):
            ldf = get_low(tydf)
        lgev = ldf[['LowFlow']].copy()
        lgev.columns = ['Flow']
        lgev = get_extreme_moment(lgev)
        lgev = get_gev_param(lgev)

    # Calculate magnitude
    lfdf = get_freq(lgev, lfreqList, False)

    # Save output
    if(args.gev):
        lgev.to_csv(outGEVLow, index=False, float_format='%.2f',
                    header=args.headout)
    lfdf.to_csv(outFqLow, index=False, float_format='%.2f',
                header=args.headout)

# -----------------------------------------------------------------------------
# Creates a diagnostics table showing how many NA values are found
# in each month of each year
if (args.x):
    print('Creating diagnostic table: ' + filename)

    # By Year
    xdf = df[['Year', 'Month', 'Flow']].groupby(['Year', 'Month'])
    xdf = xdf.agg({'Flow': lambda x: x.isnull().sum()}).reset_index()
    xdf.columns = ['Year', 'Month', 'count']
    xdf = xdf.pivot_table(index='Year', columns='Month', values='count')
    for i in range(1, 12):
        if i not in xdf:
            xdf[str(i)] = 0
    xdf = xdf[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]
    xdf.columns = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                   'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

    # Verbose
    if(args.v):
        print_full(xdf)

    # By Water Year
    xdf2 = df[['WYear', 'Month', 'Flow']].groupby(['WYear', 'Month'])
    xdf2 = xdf2.agg({'Flow': lambda x: x.isnull().sum()}).reset_index()
    xdf2.columns = ['WYear', 'Month', 'count']
    xdf2 = xdf2.pivot_table(index='WYear', columns='Month', values='count')
    for i in range(1, 12):
        if i not in xdf2:
            xdf2[str(i)] = 0
    xdf2 = xdf2[[10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9]]
    xdf2.columns = ['Oct', 'Nov', 'Dec', 'Jan', 'Feb',
                    'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep']

    # Verbose
    if(args.v):
        print_full(xdf2)

    # Output
    xdf.to_csv(outDiagn + "-year.csv", header=args.headout)
    xdf2.to_csv(outDiagn + "-wyear.csv", header=args.headout)
'''
