import pandas as pd



maca = pd.read_csv('/home/disk/rocinante/DATA/temp/kcp3/scripts/puyallup_summary/data/pool_maca_summary.csv')
wrf = pd.read_csv('/home/disk/rocinante/DATA/temp/kcp3/scripts/puyallup_summary/data/pool_wrf_summary.csv')

def process(df):
    df['period'] = df.apply(lambda x: "{}s".format(x.styr + 10), axis=1)
    df['base'] = df.gcm.str.rsplit('_', n=1).str.get(0)
    df = df.drop(['styr', 'edyr'], axis=1)
    his = df[df.period == '1980s']
    fut = df[df.period != '1980s']
    mrg = fut.merge(his, on=['lat', 'lon', 'base'])
    
    mrgF = mrg[['lat', 'lon', 'gcm_x', 'period_x']].copy()
    mrgF.columns = ['lat', 'lon', 'gcm', 'period']
    
    mrgF['temp_ANN'] = mrg.temp_ann_x - mrg.temp_ann_y
    mrgF['temp_JFM'] = mrg.temp_JFM_x - mrg.temp_JFM_y
    mrgF['temp_AMJ'] = mrg.temp_AMJ_x - mrg.temp_AMJ_y
    mrgF['temp_JAS'] = mrg.temp_JAS_x - mrg.temp_JAS_y
    mrgF['temp_OND'] = mrg.temp_OND_x - mrg.temp_OND_y    
    mrgF['prec_ANN'] = (mrg.prec_ann_x / mrg.prec_ann_y) * 100 - 100
    mrgF['prec_JFM'] = (mrg.prec_JFM_x / mrg.prec_JFM_y) * 100 - 100
    mrgF['prec_AMJ'] = (mrg.prec_AMJ_x / mrg.prec_AMJ_y) * 100 - 100
    mrgF['prec_JAS'] = (mrg.prec_JAS_x / mrg.prec_JAS_y) * 100 - 100
    mrgF['prec_OND'] = (mrg.prec_OND_x / mrg.prec_OND_y) * 100 - 100
  
    mrgF['RCP'] = mrgF['gcm'].str.rsplit('_', n=1).str.get(1).str.upper()
    mrgF['gcm'] = mrgF['gcm'].str.rsplit('_', n=1).str.get(0).str.replace('MACA_', '')
    
    return mrgF

def get_data(df_data, df_list, name):
    dq = pd.DataFrame()
    for idx, row in df_list.iterrows():
        q = df_data[(df_data.lat == row.lat) & (df_data.lon == row.lon)]
        dq = pd.concat([dq, q])

    dq = dq.groupby([dq.period, dq.gcm, dq.RCP]).mean(numeric_only=True)
    #dq = dq.groupby([dq.period, dq.gcm]).mean(numeric_only=True)
    dq['Dataset'] = name
    return dq





m1 = 'maca_point_pyl.csv'
m2 = 'maca_point_white.csv'
w1 = 'wrf_point_pyl.csv'
w2 = 'wrf_point_white.csv'

sdir = '/home/disk/rocinante/DATA/temp/kcp3/scripts/puyallup_summary/list/'
ddir = '/home/disk/rocinante/DATA/temp/kcp3/scripts/puyallup_summary/data/'

maca_tapps = "data_47.21875_-122.15625"
wrf_tapps = "data_47.23265_-122.14883"

dfm1 = pd.read_csv('{}/{}'.format(sdir, m1))
dfm2 = pd.read_csv('{}/{}'.format(sdir, m2))
dfw1 = pd.read_csv('{}/{}'.format(sdir, w1))
dfw2 = pd.read_csv('{}/{}'.format(sdir, w2))

df_maca = process(maca)
df_wrf = process(wrf)

d1 = get_data(df_maca, dfm1, 'MACA_pyl')
d2 = get_data(df_maca, dfm2, 'MACA_white')
d3 = get_data(df_wrf, dfw1, 'WRF_pyl')
d4 = get_data(df_wrf, dfw2, 'WRF_white')

dtm = pd.DataFrame({'lat':[47.21875], 'lon':[-122.15625]})
dtw = pd.DataFrame({'lat':[47.23265], 'lon':[-122.14883]})

d5 = get_data(df_maca, dtm, 'MACA_laketapps')
d6 = get_data(df_wrf, dtw, 'WRF_laketapps')

df = pd.concat([d1, d2, d3, d4, d5, d6])
df = df.drop(['lat', 'lon'], axis=1)

df.to_csv('data/scatter_data.csv')
print(df)



    
    
