#!/usr/bin/python3

import xarray as xr
import pandas as pd
import multiprocessing as mp
from tqdm import tqdm
import itertools as it
import sys
import os

# Parallel processing for lat/lon to forc conversion
def process_cell(cell, odir):
    lat = cell.lat.data
    lon = cell.lon.data
    df = cell.to_dataframe()
    df = df[['PREC', 'TMAX', 'TMIN', 'WSPD']]
    df = df.fillna(method='bfill')
    df.loc[(df.index.month==2) & (df.index.day==29), 'PREC'] = 0
    out = "{}/data_{}_{}".format(odir, lat, lon)
    return (out, df)

    
args = sys.argv
ifile = args[1]
odir = args[2]
ds = xr.open_dataset(ifile)
gcm = os.path.basename(ifile).split('_bc.nc')[0]
odir = "{}/{}".format(odir, gcm)
os.makedirs(odir, exist_ok=True)

p = mp.Pool(10)
x = len(ds.lon)
y = len(ds.lat)
n = x * y
cells = []

for i,j in it.product(range(x), range(y)):
    ilon = ds.lon[i].data
    jlat = ds.lat[j].data
    
    cells+= [ds.sel(lon=ilon, lat=jlat)]

cell = cells[1]
df = cell.to_dataframe()
df = df[['PREC', 'TMAX', 'TMIN', 'WSPD']]
    
pbar = tqdm(total=n)
res = [p.apply_async(process_cell, args=(cells[k], odir,),
                         callback=lambda _:pbar.update(1)) for k in range(n)]
results = [p.get() for p in res]
pbar.close()


for out,df in results:
    print(out)
    df.to_csv(out, sep='\t', index=False, header=False, float_format='%.5f')



    
