import netCDF4
import matplotlib.pyplot as plt
import cartopy.feature as cfeature
import numpy as np
import pandas as pd
import cartopy.crs as ccrs
import os
from datetime import datetime
from datetime import timedelta
from dateutil.relativedelta import relativedelta

def ReadNETCDF(FilePath, VariName):
    f = netCDF4.Dataset(FilePath)
    MyArray = f.variables[VariName][:].ravel()
    return MyArray

def Computediff(VariName):
    print(VariName)

    # HR4 DAimsGHCN and DAghcn       
    DAFilePath1="/scratch2/NCEPDEV/stmp1/Youlong.Xia/landDA/cycle_land/C768/DA_imsGHCN_HR4/mem000/restarts/vector"
    DAFilePath2="/scratch2/NCEPDEV/stmp1/Youlong.Xia/landDA/cycle_land/C768/DA_GHCN_HR4/mem000/restarts/vector"
    openloopPath="/scratch2/NCEPDEV/stmp1/Youlong.Xia/landDA/cycle_land/C768/openloop_HR4/mem000/restarts/vector"
    snodasFilePath="/scratch2/NCEPDEV/land/data/evaluation/IMS/IMS4km/C768"

    RestartDate = "2019-08-01_00-00-00"
    DeltaTime = 1  # day
    SimulationDuration = 366  # days -366 for a year

    RestartDate = datetime.strptime(RestartDate, '%Y-%m-%d_%H-%M-%S')
#we want to create 3 dataframe: 1) Truth_stdDF: the rows are locations and columns are truth value at each time step and the last column will be the standard deviation of truth, 2) diff_averageDF: the rows are same as previous and each column
#is difference between OI and Truth at each time step, the last column will be average of diffs at each row (location), 3) diff2_rmseDF: rows same as previous but each column will be diff**2 and the last column will be RMSE that is sqrt of average diff**2

#   defined the array
    vegetation = ReadNETCDF(SpatialPath, "vegetation_category")
    basin = ReadNETCDF(SpatialPath, "land_mask")
    DA1 = np.empty([len(basin)])
    DA2 = np.empty([len(basin)])
    OP = np.empty([len(basin)])
    SNODAS = np.empty([len(basin)]) 

    DA1[:] = np.nan
    DA2[:] = np.nan
    OP[:] = np.nan
    SNODAS[:] = np.nan   

    SNODAS_avg = np.empty([SimulationDuration]) 
    DA_avg1 = np.empty([SimulationDuration])
    DA_avg2 = np.empty([SimulationDuration])
    OP_avg = np.empty([SimulationDuration])

    DA_avg1[:] = np.nan
    DA_avg2[:] = np.nan
    SNODAS_avg[:] = np.nan
    OP_avg[:] = np.nan

    xx = np.empty([SimulationDuration])
    veg_type_arryay = ReadNETCDF(SpatialPath, "vegetation_category")
    
    # North Central [255-265,40-50N]
    # Asia [60-120E,25-50N]
    # US [240-275, 35-50oN]
    # Europe [0-45oE, 40-70oN]
    # Russia [45-150oE,40-70oN]

    latmin = 35.0
    latmax = 50.0
    lonmin = 240.0
    lonmax = 275.0

    for SimDay in range(SimulationDuration):
        PassedTimeStepNo = (SimDay)
        ThisRestartDate = RestartDate + timedelta(days=PassedTimeStepNo * DeltaTime)
        print(ThisRestartDate)
        xx[SimDay] = SimDay

        thisyear, thismonth, thisday, thishr, thismin, thissec = ThisRestartDate.year, ThisRestartDate.month, ThisRestartDate.day, ThisRestartDate.hour, ThisRestartDate.minute, ThisRestartDate.second
          
# Read DA and open loop data files

        DAFile1 = DAFilePath1+ "/ufs_land_restart_back.%4d-%02d-%02d_%02d-%02d-%02d.nc" % (thisyear, thismonth, thisday, thishr, thismin, thissec)
        DAFile2 = DAFilePath2+ "/ufs_land_restart_back.%4d-%02d-%02d_%02d-%02d-%02d.nc" % (thisyear, thismonth, thisday, thishr, thismin, thissec)
        OPFile = openloopPath+ "/ufs_land_restart_back.%4d-%02d-%02d_%02d-%02d-%02d.nc" % (thisyear, thismonth, thisday, thishr, thismin, thissec)
        snodasFile = snodasFilePath+ "/IMSscf.C768.4km.%4d%02d%02d.nc" % (thisyear, thismonth, thisday) 
        
        DA_array1 = ReadNETCDF(DAFile1, VariName)
        DA_array2 = ReadNETCDF(DAFile2, VariName)
        OP_array = ReadNETCDF(OPFile, VariName)
        snodas_array = ReadNETCDF(snodasFile, VariName)

        if (SimDay == 0):
            latout = ReadNETCDF(snodasFile, "lat")
            lonout = ReadNETCDF(snodasFile, "lon")

        for i in range(len(basin)):
            if (latout[i]>=latmin) & (latout[i]<=latmax) & (lonout[i]>=lonmin) & (lonout[i]<=lonmax):
                #if vegetation[i] == 10:
                if snodas_array[i] >=0.0:
                    DA1[i] = DA_array1[i]
                    DA2[i] = DA_array2[i]
                    OP[i] = OP_array[i]
                    SNODAS[i] = 0.01*snodas_array[i]

        DAavg1 = np.nanmean(DA1)
        DAavg2 = np.nanmean(DA2)
        OPavg = np.nanmean(OP)

        DA_avg1[SimDay] = DAavg1
        DA_avg2[SimDay] = DAavg2
        OP_avg[SimDay] = OPavg
     
        snodasavg = np.nanmean(SNODAS)
        SNODAS_avg[SimDay] = snodasavg

    plt.figure(figsize=(12,8))
    ax1 = plt.subplot(1, 1, 1)
    ax1.set_ylim(0.0, 1.0)
    ax1.plot(xx,DA_avg1,'r-',label="DAimsGHCN")
    ax1.plot(xx,DA_avg2,'g-',label="DAghcn")
    ax1.plot(xx,OP_avg,'b-',label="Openloop")
    ax1.plot(xx,SNODAS_avg,'k-',label="IMS") 
    ax1.set_ylabel("Snow Cover Fraction", fontsize=20)
    ax1.set_xlabel("Simulation days since 01 August 2019", fontsize=20)
    ax1.set_title("US[85-120oW, 35-50oN]", pad=20, fontsize=20)
    ax1.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(PlotPath, fname))
    plt.close()

SpatialPath = "/scratch2/NCEPDEV/land/data/forcing/era5/static/C768/ufs-land_C768_hr3_static_fields.nc"
PlotPath= "./plots/HR4C768_DAimsGHCN/"
fname = "US_HR4_DAimsGHCN_DAghcn_openloop_SNOC_IMS_2019-2020.png"

VarList=["snow_cover_fraction"]
for var in VarList:
    Computediff(var)
