#!/usr/bin/python3

import pandas as pd
import os
import sys

#ifile = '/home/disk/tsuga2/jswon11/workdir/2019_03_kitsap-dhsvm/rbm/data_rbm/kitsap_a0-b9_his/raw/kitsap_a0-b9_his.temp'
ifile = sys.argv[1]

df = pd.read_csv(ifile, header=None, sep=r'\s+')

# Time, nyear, nd, ncell, ns, t0, t_head(nr), dbt(ncell), depth(ncell), u(ncell), qin(ncell)
# <time>:   decimal year
# <nd>:     day of year
# <nr>:     reach index
# <ncell>:  node index in this reach
# <ns>:     segment index in this reach
# <T_0>:    stream temperature
# <T_head>: headwater temperature (should be the same for a certain reach)
# <dbt>:    air temperature (should be the same for the nodes which are in the same cell)

df = df[[0,1,2,3,5]]
df.columns = ['date', 'year', 'doy', 'rbm', 'temp']


print('--| Decode date')
ddf = df[df['rbm'] == 1]
n = int(len(df) / len(ddf))
gdf = df[['year', 'doy']].groupby(['year']).max()
gdf.columns = ['max_doy']
gdf = gdf.reset_index()
df = df.merge(gdf)

print('--| Format timestamp')
df['hour'] = (24 * ((df['date'] - df['year']) * df['max_doy'] - df['doy'] + 1)) - 1
df['hour'] = df['hour'].round(0).astype(int) % 24
df['date'] = pd.to_datetime(df['year'] * 100000 + df['doy'] * 100 + df['hour'], format='%Y%j%H')

print('--| Convert to dateset')
df = df[['date',  'rbm', 'temp']]
df = df.set_index(['date', 'rbm'])
ds = df.to_xarray()


ofile = ifile.replace('.temp', '.nc')
print('--| Write to file: ', ofile)
ds.to_netcdf(ofile)
