import pandas as pd
import hydroeval as hev
from glob import glob
import os
import sys


keys = {'12134500':'SkykomishRNrGoldBar',
        '12147500':'NFToltRNrCarnation',
        '12149000':'SnoqualmieRNrCarnation',
        '12155300':'PilchuckRNrSnohomish',
        '12150800':'SnohomishRNrMonroe'}



names = {'12134500':'Skykomish River Near Goldbar',
         '12147500':'North Fork River Near Carnation',
         '12149000':'Snoqualmie River Near Carnation',
         '12155300':'Pilchuck River Near Snohomish',
         '12150800':'Snohomish River Near Monroe'}


data_dir = '/home/disk/tsuga2/jswon11/workdir/2020-07_Snoho-autocalibration/WRF_runs/pub/'
pnnl = '/home/disk/tsuga2/jswon11/workdir/2020-07_Snoho-autocalibration/WRF_runs/merge/class36_500k_pnnl_1980-2015/'
obs_dir = '/home/disk/tsuga2/jswon11/workdir/2020-07_Snoho-autocalibration/obs/stats/'
out_dir = '/home/disk/tsuga2/jswon11/workdir/2020-07_Snoho-autocalibration/writeup/output/'


#Table containing:
#(1) daily correlation and NSE,
#(2) monthly correlation and NSE, and
#(3) peak flow correlation and NSE for calibration site plus other key sites on the Snohomish 
print(obs_dir)

data = sorted(glob('{}/*_DailyFlows.csv'.format(obs_dir)))
data = set(sorted([ os.path.basename(x).split('_')[0] for x in data ]))


cnames = ['id', 'start', 'end', 'type', 'R', 'NSE', 'KGE', 'NAs']
tbl = pd.DataFrame(columns = cnames)
for d in data:
    # Daily 
    obsf = "{}/{}_DailyFlows_DailyFlows.csv".format(obs_dir, d)
    simf = "{}/{}_DailyFlows.csv".format(pnnl, d)
    odf = pd.read_csv(obsf)
    sdf = pd.read_csv(simf)
    sdf = sdf[274:]
    odf.index = pd.to_datetime(odf[odf.columns[:-1]])
    sdf.index = pd.to_datetime(sdf[sdf.columns[:-1]])
    odf = odf[[odf.columns[-1]]]
    sdf = sdf[[sdf.columns[-1]]]
    
    df = odf.join(sdf, lsuffix='Obs', rsuffix='Sim', how='inner')
    df.columns = ['Obs', 'PNNL']
    nas = df.Obs.isna().sum()
    
    ndf = df.dropna()
    a = ndf.PNNL.values    
    b = ndf.Obs.values
    r = hev.kge(a, b).round(2)[1][0]
    nse = hev.nse(a, b).round(2)
    kge = hev.kge(a, b).round(2)[0][0]
    st = df.index[0]
    ed = df.index[-1]    
    items = [d, st, ed, 'Daily', r, nse, kge, nas]
    tbl.loc[len(tbl)] = items
    

    # Monthly
    mdf = df.resample('M').mean()

    nas = mdf.Obs.isna().sum()
    ndf = mdf.dropna()
    a = ndf.PNNL.values
    b = ndf.Obs.values
    r = hev.kge(a, b).round(2)[1][0]
    nse = hev.nse(a, b).round(2)
    kge = hev.kge(a, b).round(2)[0][0]
    st = df.index[0]
    ed = df.index[-1]
    items = [d, st, ed, 'Monthly', r, nse, kge, nas]
    tbl.loc[len(tbl)] = items    


    # Peak Flow
    pdf = df.resample('A-OCT').max()

    nas = pdf.Obs.isna().sum()
    ndf = pdf.dropna()
    a = ndf.PNNL.values
    b = ndf.Obs.values    
    r = hev.kge(a, b).round(2)[1][0]
    nse = hev.nse(a, b).round(2)
    kge = hev.kge(a, b).round(2)[0][0]
    st = df.index[0]
    ed = df.index[-1]
    items = [d, st, ed, 'PeakFlows', r, nse, kge, nas]
    tbl.loc[len(tbl)] = items
    
    print(d)
    #sys.exit()
print(tbl)
tbl = tbl.sort_values(by=['type', 'id'])
tbl.to_csv('correlation_table.csv', index=False, float_format='%0.2f')



