#!/usr/bin/python3
import pandas as pd
import numpy as np
from glob import glob
import os
import sys

input_dir = "/home/disk/rocinante/DATA/temp/chico/hyak/writeup/pub_temp/"
loc_file = "/home/disk/rocinante/DATA/temp/chico/hyak/data_ref/tbl.csv"
ldf = pd.read_csv(loc_file)


def setup3():
    #locs = next(os.walk(input_dir))[1]
    #print(locs)

    def mk_summary(data):
        df = pd.DataFrame()
        for d in data:
            cdf = pd.read_csv(d)
            loc = os.path.basename(os.path.dirname(d))
            cdf.rename(columns={ cdf.columns[0]: "Period" }, inplace = True)
            cdf.insert(0, 'Location Name', loc)
            df = df.append(cdf)
        return df

    print('Make summary of Avg Max WY')
    data = glob("{}/*/*7DADMAX_AvgMaxWY_AllGCM.csv".format(input_dir))
    df = mk_summary(data)
    df.to_csv('{}/Summary_7DADMAX_AvgMaxWY.csv'.format(input_dir), index=False)
    
    print('Make summary of Days above 16C')
    data = glob("{}/*/*7DADMAX_Jun15-Sep15_DaysAbv16C_AllGCM.csv".format(input_dir))
    df = mk_summary(data)
    df.to_csv('{}/Summary_7DADMAX_Jun15-Sep15_DaysAbv16C.csv'.format(input_dir), index=False)    
    

setup3()
    








def setup2():
    locs = next(os.walk(input_dir))[1]       
    futs = [ ("2050s","2040-2069"), ("2080s","2070-2099")]
    
    gcms = sorted(glob("/home/disk/rocinante/DATA/temp/chico/hyak/data_atmos/*RCP*"))
    gcms = [ os.path.basename(g) for g in gcms ]
    
    pkdf = pd.DataFrame()
    lwdf = pd.DataFrame()
    
    for loc in locs:
        print(loc)
        for dur in dursP:            
            for (pds, yrs) in futs:            
                cdf = pd.DataFrame()
                
                for gcm in gcms:
                    his = pd.read_csv("{}/{}/{}_{}_1990s_{}_PeakStats.csv".format(input_dir, loc, loc, gcm, dur))
                    cdf["Recurrance Interval"] = his['RtnYr']

                    fut = pd.read_csv("{}/{}/{}_{}_{}_{}_PeakStats.csv".format(input_dir, loc, loc, gcm, pds, dur))
                    his50 = his["dist0"]
                    fut50 = fut["dist0"]
                    pch = (fut50 - his50) / his50 * 100
                    cdf[gcm] = pch

                cdf.insert(0, "Future Years", yrs)
                cdf.insert(0, "Historical Years", "1980-2009")
                cdf.insert(0, "Duration", dur)
                cdf.insert(0, "Site ID", loc)

                fullname = ldf[ldf.loc2==loc]['loc3']
                fullname = fullname.values[0] if not fullname.empty else np.nan
                cdf.insert(0, "Site Name", fullname)
                pkdf = pkdf.append(cdf)


        for dur in dursL:            
            for (pds, yrs) in futs:            
                cdf = pd.DataFrame()
                
                for gcm in gcms:
                    his = pd.read_csv("{}/{}/{}_{}_1990s_{}_LowStats.csv".format(input_dir, loc, loc, gcm, dur))
                    cdf["Recurrance Interval"] = his['RtnYr']

                    fut = pd.read_csv("{}/{}/{}_{}_{}_{}_LowStats.csv".format(input_dir, loc, loc, gcm, pds, dur))
                    his50 = his["dist0"]
                    fut50 = fut["dist0"]
                    pch = (fut50 - his50) / his50 * 100
                    cdf[gcm] = pch

                cdf.insert(0, "Future Years", yrs)
                cdf.insert(0, "Historical Years", "1980-2009")
                cdf.insert(0, "Duration", dur)
                cdf.insert(0, "Site ID", loc)

                fullname = ldf[ldf.loc2==loc]['loc3']
                fullname = fullname.values[0] if not fullname.empty else np.nan
                cdf.insert(0, "Site Name", fullname)
                lwdf = lwdf.append(cdf)

                
    #pkdf.to_csv("{}/Summary_PeakStats.csv".format(input_dir), index=False, float_format="%0.1f")
    #lwdf.to_csv("{}/Summary_LowStats.csv".format(input_dir), index=False, float_format="%0.1f")

    


def setup1():
    sites = sorted(glob("{}/*".format(input_dir)))
    locs = [ os.path.basename(site) for site in sites]
    durs = [ '1hour', '1day', '7day' ]
    rtis = [ 1.01,  2, 5, 10, 15, 20, 50, 100, 500]
    #futs = [ "2040-2069", "2070-2099" ]
    futs = [ '2050s', '2080s']
    
    df1 = pd.DataFrame({'Site ID' : locs})
    df2 = pd.DataFrame({'Duration' : durs})
    df3 = pd.DataFrame({'Recurrance Interval': rtis})
    df4 = pd.DataFrame({'Future Years' : futs})
    df = df1.assign(key=1).merge(df2.assign(key=1), on='key').drop('key', 1)
    df = df.assign(key=1).merge(df3.assign(key=1), on='key').drop('key', 1)
    
    df['Historical Years'] = "1981-2010"
    df = df.assign(key=1).merge(df4.assign(key=1), on='key').drop('key', 1)

    gcms = sorted(glob("/home/disk/tsuga2/jswon11/workdir/2020-07_Snoho-autocalibration/WRF_runs/data/*"))
    gcms = [ os.path.basename(g) for g in gcms ]
    for gcm in gcms:
        df[gcm] = np.nan
    

    for idx, row in df.iterrows():
        loc = row['Site ID']
        dur = row["Duration"]
        rti = row["Recurrance Interval"]
        pds = row["Future Years"]
        #print(loc)
        
        
        for gcm in gcms:
            print(loc, gcm)
            his = pd.read_csv("{}/{}/{}_{}_1990s_{}_PeakStats.csv".format(input_dir, loc, loc, gcm, dur))    
            fut = pd.read_csv("{}/{}/{}_{}_{}_{}_PeakStats.csv".format(input_dir, loc, loc, gcm, pds, dur))
            
            #print(his[his.RtnYr == rti])
            #print(rti)
            #print(fut)
            
            
            his_val = his[his.RtnYr == rti]["50"]
            fut_val = fut[fut.RtnYr == rti]["50"]
            print(his_val)
            print(fut_val)
            
            row[gcm] = (fut_val - his_val / his_val) * 100

            print(row)
            sys.exit()
    df.to_csv("{}/Summary_PeakStats.csv".format(input_dir))

#setup1()
#sys.exit()
