#!/bin/usr/python3
import argparse
import pandas as pd
import numpy as np
import sys
from datetime import datetime as dt
import calendar as cal

###############################################################################
# Applies bias-correcting based on training table
###############################################################################

parser = argparse.ArgumentParser()
parser.add_argument('train', help='Observational data for reference')
parser.add_argument('sim', help='Historical simulation for training')
#parser.add_argument('--win', action='store', type=int, default=5, help='Seasonal window size. ')
parser.add_argument('out', help='Output file')
args = parser.parse_args()

hdr = ['Year', 'Month', 'Day', 'Flow']
train_tbl = args.train
sim_file = args.sim
out_file = args.out
# ----------------------------------------------------------------------------
# Calculate day of year value
def get_dofy(df):
    # Calculate datetime
    dates = pd.to_datetime(df['Year'].astype(str) + '/' +
                           df['Month'].astype(str) + '/' +
                           np.floor(df['Day']).astype(int).astype(str),
                           format='%Y/%m/%d')
    # Convert to day of year
    df['dofy'] = dates.apply(lambda x: dt.strftime(x, '%j')).astype(int)
    
    # Adjust for leap day so all years start on 1 and end on 366. 60 used for leap day
    cor = df['Year'].apply(lambda x: 0 if cal.isleap(x) else 1)
    cor[df['dofy'] < 60] = 0
    df['dofy'] += cor
    return df


# Read training table and simulation
tdf = pd.read_csv(train_tbl, sep="[-, /\t]*", engine='python').reset_index()
sim = pd.read_csv(sim_file, sep="[-, /\t]*", engine='python', header=None)
sim.columns = hdr


# Resolve which bin a flow belongs to and return the correct ratio
def get_ratio(flow, bins):
    bins = bins.reset_index(drop=True)
    bindex = np.digitize(flow, bins['Floor'], right=True)
    return bins.at[bindex - 1, 'Ratio'] 
    

# Bin current simulation according to table and apply corresponding factor
sim = get_dofy(sim)
factor = sim.apply(lambda x: get_ratio(x['Flow'], tdf[tdf['dofy'] == x['dofy']]), axis=1)
sim['Flow'] = sim['Flow'] * factor

# Write to file
sim.drop(['dofy'], axis=1, inplace=True)
sim.to_csv(out_file, index=False, header=False)
