## Import some standard packages and define a few functions
import os
# Hide default info, logs, and warnings - comment out if you need to troubleshoot
'TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ[
import time
import shutil
import importlib
import numpy as np
import pandas as pd
# from matplotlib import cm
from sklearn import metrics
import matplotlib.pyplot as plt
from Scripts import PlotHelpers
# from matplotlib.colors import Normalize
from Scripts import ReadDB, MiscFuncs, NNetFuncs
# dbNames = {
# 'Clean/SecondStage/TA_1_1_1':'T air',
# 'Clean/SecondStage/RH_1_1_1':'RH',
# 'Clean/SecondStage/FC':'FCO2',
# 'Clean/SecondStage/FCH4':'FCH4',
# 'Clean/SecondStage/PPFD_IN_1_1_1':'PPFD',
# 'Clean/SecondStage/NETRAD_1_1_1':'Rn',
# 'Clean/SecondStage/P_1_1_1':'Precip',
# 'Flux/qc_co2_flux':'qc_FCO2',
# 'Flux/qc_ch4_flux':'qc_FCH4',
# 'Clean/SecondStage/USTAR':'u*',
# 'Clean/SecondStage/TS_1':'T soil 5cm',
# 'Clean/SecondStage/TS_2':'T soil 10cm',
# 'Clean/SecondStage/TS_3':'T soil 50cm',
# 'Clean/SecondStage/wind_speed':'Wind speed',
# 'Clean/SecondStage/wind_dir':'Wind dir',
# 'Clean/SecondStage/WTD_1_1_1':'Water table',
# }
# Local = '/mnt/c/Users/User/PostDoc_Work/database/'
# Remote = '/mnt/w/'
# Dir = Local
# read_new = True
# if read_new == True:
# Data = ReadDB.get_Traces(Site,list(dbNames.keys()),Dir=Dir)
# Data = Data.rename(columns=dbNames)
# Data.to_csv(f'InputData/{Site}_Data.csv')
# else:
# Data = pd.read_csv(f'InputData/{Site}_Data.csv',parse_dates=['TIMESTAMP'],index_col='TIMESTAMP')
Neural Networks for Eddy Covarinace
= 'BB'
Site = pd.read_csv(f'InputData/NN_Data_{Site}.csv',parse_dates=['TIMESTAMP'],index_col='TIMESTAMP')
Data # BB2 = pd.read_csv('InputData/NN_Data_BB2.csv',parse_dates=['TIMESTAMP'],index_col='TIMESTAMP')
# def Q_clip(df,trace,q=[.001,.999]):
# qv=df[trace].quantile(q).values
# df.loc[((df[trace]<qv[0])|df[trace]>qv[-1]),trace]=np.nan
# return(df[trace])
= Data.loc[Data.index.year>=2018].copy()
Data
= {'WTD_1_1_1':'Water table',
Vars 'TA_1_1_1':'T air',
'RH_1_1_1':'RH',
'FC':'FCO2',
'FCH4':'FCH4',
'PPFD_IN_1_1_1':'PPFD',
'NETRAD_1_1_1':'Rn',
'P_1_1_1':'Precip',
'qc_co2_flux':'qc_FCO2',
'qc_ch4_flux':'qc_FCH4',
'USTAR':'u*',
'TS_1':'T soil 5cm',
'TS_2':'T soil 10cm',
'TS_3':'T soil 50cm',
'WS_1_1_1':'Wind speed',
'WD_1_1_1':'Wind dir'}
= Data.rename(columns=Vars)
Data = Data.loc[Data.index.date==pd.Timestamp('2020-03-18').date(),'Water table'].mean()
reff 'Water table'] = Data['Water table'] - reff
Data[# BB1 = BB1.rename(columns=Vars)
# BB2 = BB2.rename(columns=Vars)
# BB1['Site'] = 1
# BB1_reff = BB1.loc[BB1.index.date==pd.Timestamp('2020-03-18').date(),'Water table'].mean()
# BB1['Water table'] = BB1['Water table'] - BB1_reff
# BB2['Site'] = 2
# BB2_reff = BB2.loc[BB2.index.date==pd.Timestamp('2020-03-18').date(),'Water table'].mean()
# BB2['Water table'] = BB2['Water table'] - BB2_reff
# bearing = 166
# BB1.loc[BB1['Wind dir']-bearing]
# Data = pd.concat([BB1,BB2])
= Data.drop(columns=['SW_IN_1_1_1','LW_IN_1_1_1','SW_OUT_1_1_1','LW_OUT_1_1_1','VPD_1_1_1','PA_1_1_1'])
Data print(Data.shape)
= Data.dropna(how='any')
Data print(Data.shape)
(105169, 16)
(29988, 16)
Pre-processing
reload(ReadDB)
importlib.
'VPD'] = MiscFuncs.Calc_VPD(Data['T air'],Data['RH'])
Data['Water table'] = -1*(70-Data['Water table'])
Data['DOY'] = Data.index.dayofyear
Data[
= ['FCO2','FCH4']
target
'Rand']=np.random.random(Data['FCO2'].values.shape)
Data['Rand_Binary'] = Data['Rand']-.5
Data['Rand_Skew'] = Data['Rand']**.25
Data['Rand_Binary']>0,'Rand_Binary']=1
Data.loc[Data['Rand_Binary']<0,'Rand_Binary']=-1
Data.loc[Data[=['Rand','Rand_Binary','Rand_Skew']
Rand_Scalars
print(Data[['FCO2','FCH4']].describe())
filter = ReadDB.filterFlux(Data,target)
filter.QA_QC()
# filter.dir_mask('Wind dir',[[0,45],[315,360]])
filter.rain('Precip',thresh=0)
# filter.MAD(z=5)
filter.uStar('u*',u_thresh=0.1)
'FCO2_Clean','FCH4_Clean']] = filter.df[['FCO2','FCH4']].copy()
Data[[
print(Data[['FCO2_Clean','FCH4_Clean']].describe())
= ['fco2','fch4','precip']
excludes
= []
Full_inputs
for val in list(Data.columns):
= 0
exct for ex in excludes:
if ex in val.lower():
+= 1
exct if exct < 1:
Full_inputs.append(val)
Data.count()
FCO2 FCH4
count 29988.000000 29988.000000
mean -0.455273 48.452158
std 2.314013 65.492819
min -36.371260 -198.555830
25% -1.783542 9.644733
50% -0.224877 31.494045
75% 0.666671 82.179884
max 45.147030 686.522200
FCO2_Clean FCH4_Clean
count 15396.000000 17450.000000
mean -1.196605 55.352024
std 1.856238 49.478366
min -16.697300 -176.868300
25% -2.545481 13.166115
50% -1.185378 40.418017
75% 0.259368 93.315557
max 23.026184 514.325700
Rn 29988
PPFD 29988
T air 29988
T soil 5cm 29988
T soil 10cm 29988
T soil 50cm 29988
RH 29988
Precip 29988
Wind speed 29988
Wind dir 29988
Water table 29988
u* 29988
FCO2 29988
FCH4 29988
qc_FCO2 29988
qc_FCH4 29988
VPD 29988
DOY 29988
Rand 29988
Rand_Binary 29988
Rand_Skew 29988
FCO2_Clean 15396
FCH4_Clean 17450
dtype: int64
=plt.subplots()
fig,ax'FCO2'],Data['PPFD'])
ax.scatter(Data['FCO2_Clean'],Data['PPFD'],facecolors='none', edgecolors='r')
ax.scatter(Data['FCO2_Clean']<0)&(Data['PPFD']<5))] Data.loc[((Data[
Rn | PPFD | T air | T soil 5cm | T soil 10cm | T soil 50cm | RH | Precip | Wind speed | Wind dir | ... | FCH4 | qc_FCO2 | qc_FCH4 | VPD | DOY | Rand | Rand_Binary | Rand_Skew | FCO2_Clean | FCH4_Clean | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
TIMESTAMP | |||||||||||||||||||||
2018-09-07 01:00:00 | -43.128560 | 0.008049 | 10.359350 | 16.032904 | 15.806022 | 15.322218 | 95.989540 | 0.0 | 0.447382 | 182.968890 | ... | 109.246170 | 0.0 | 0.0 | 5.063089e-01 | 250 | 0.364599 | -1.0 | 0.777059 | -0.251189 | 109.246170 |
2018-09-07 21:30:00 | -5.673309 | 0.010729 | 14.604237 | 15.951805 | 15.627492 | 15.252795 | 98.143490 | 0.0 | 0.989659 | 86.495705 | ... | 91.680466 | 0.0 | 0.0 | 3.097148e-01 | 250 | 0.586010 | 1.0 | 0.874936 | -0.692962 | 91.680466 |
2018-09-08 19:30:00 | -17.488834 | 0.064360 | 15.771963 | 16.176157 | 15.903015 | 15.447454 | 95.503620 | 0.0 | 0.905848 | 27.063473 | ... | 62.703990 | 0.0 | 0.0 | 8.085810e-01 | 251 | 0.074613 | -1.0 | 0.522640 | -1.475593 | 62.703990 |
2018-09-08 20:00:00 | -15.425125 | 0.058013 | 15.348302 | 16.163733 | 15.850715 | 15.435830 | 95.449990 | 0.0 | 0.569989 | 161.495360 | ... | 66.677345 | 0.0 | 0.0 | 7.963082e-01 | 251 | 0.004228 | -1.0 | 0.254997 | -1.900765 | 66.677345 |
2018-09-08 20:30:00 | -12.295860 | 0.013409 | 15.044191 | 16.164623 | 15.738058 | 15.450691 | 96.822020 | 0.0 | 1.152366 | 135.747680 | ... | 70.325740 | 0.0 | 0.0 | 5.454207e-01 | 251 | 0.917991 | 1.0 | 0.978835 | -1.721321 | 70.325740 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-02-16 23:00:00 | -75.242165 | 0.000000 | 5.555014 | 3.498761 | 3.629818 | 6.416536 | 79.725365 | 0.0 | 4.218254 | 319.816000 | ... | 2.240025 | 0.0 | 0.0 | 1.845651e+00 | 47 | 0.575242 | 1.0 | 0.870889 | -1.093810 | 2.240025 |
2021-02-22 21:00:00 | -36.564200 | 0.000000 | 6.883492 | 6.075706 | 5.752314 | 6.460669 | 75.322860 | 0.0 | 4.409684 | 308.870450 | ... | -0.343134 | 0.0 | 1.0 | 2.462149e+00 | 53 | 0.185709 | -1.0 | 0.656460 | -0.975886 | NaN |
2022-01-27 00:30:00 | -4.966819 | 0.346958 | 1.410606 | 5.505546 | 5.795523 | 7.158379 | 100.000000 | 0.0 | 1.002757 | 18.365236 | ... | 5.716230 | 0.0 | 1.0 | -1.110223e-15 | 27 | 0.202556 | -1.0 | 0.670867 | -0.438759 | NaN |
2023-01-22 00:30:00 | -34.673756 | 0.000000 | 4.101948 | 5.547119 | 5.923764 | 7.107648 | 96.597960 | 0.0 | 1.953380 | 331.442750 | ... | 18.144798 | 0.0 | 1.0 | 2.798234e-01 | 22 | 0.491008 | -1.0 | 0.837090 | -0.784649 | NaN |
2023-03-24 23:30:00 | -5.295106 | 0.005812 | 3.700751 | 8.030537 | 8.247041 | 7.808859 | 100.000000 | 0.0 | 1.226768 | 59.598522 | ... | -36.799400 | 0.0 | 1.0 | 0.000000e+00 | 83 | 0.924235 | 1.0 | 0.980495 | -0.735831 | NaN |
71 rows × 23 columns
Make Scenarios
def Configure_Run(run,name,df,Base='',H=[10],N_scale=1):
print(df.shape)
'config']['Name']=name
run['config']['Base']=Base
run[
# Nodes / layer = the number of inputs times a multiplier
# Defaults to a scaled function of the number of inputs
= [h*int(len(run['config']['inputs'])**N_scale) for h in H]
Nodes 'config']['Nodes']=Nodes
run['config']['Activation']=['relu' for h in H]
run['config']['epochs']=250
run[
# Drop_Thresh = RI'^(Prune_scale[0])+Prune_scale[1]
# RI' is the sum of RI for the random scalar inputs
'config']['Prune_scale']=[1,0]
run[
'config']['Norm']=True
run['config']['RF_comp']=False
run['config']['validation_split']=.2
run['config']['patience']=2
run['config']['N_models']=10
run[
'Training'] = {}
run[# run['Evaluation'] = {}
= Model_Runs[key]['config']['target']
target = Model_Runs[key]['config']['inputs']
inputs # Scale to inputs before dropping all NaN rows
'config']['mean'] = df[inputs].mean().values
Model_Runs[key]['config']['variance'] = df[inputs].std().values**2
Model_Runs[key][# print(df[target+inputs].shape)
# print('!!!')
= df[target+inputs].dropna().copy()
df
'Training']['X'] = df[inputs].values
Model_Runs[key]['Training']['Y'] = df[target].values.flatten()
Model_Runs[key][# print(df[target].isna())
# print(df.loc[df[target].isna().values])
# Larger batch > runs faster > but won't fit as well
= np.ceil(Model_Runs[key]['Training']['Y'].shape[0]*.02)
batch if batch<32:
=32
batch'config']['batch_size']=int(batch)
run[
return(run)
= {
Model_Runs 'Full_Model_FCH4':{
'config':{
'target':['FCH4_Clean'],
'inputs':Full_inputs,
'Rand_Scalars':Rand_Scalars,
},
},'Full_Model_FCO2':{
'config':{
'target':['FCO2_Clean'],
'inputs':Full_inputs,
'Rand_Scalars':Rand_Scalars,
},
},
}
for key in Model_Runs.keys():
print(key)
= key.split('_')[-1]
Base = Configure_Run(Model_Runs[key],key,Data.copy(),Base=Base)
Model_Runs[key] 'config'] Model_Runs[key][
Full_Model_FCH4
(48159, 24)
Full_Model_FCO2
(48159, 24)
{'target': ['FCO2_Clean'],
'inputs': ['Rn',
'PPFD',
'T air',
'T soil 5cm',
'T soil 10cm',
'T soil 50cm',
'RH',
'Wind speed',
'Wind dir',
'Water table',
'u*',
'Site',
'VPD',
'DOY',
'Rand',
'Rand_Binary',
'Rand_Skew'],
'Rand_Scalars': ['Rand', 'Rand_Binary', 'Rand_Skew'],
'Name': 'Full_Model_FCO2',
'Base': 'FCO2',
'Nodes': [170],
'Activation': ['relu'],
'epochs': 250,
'Prune_scale': [1, 0],
'Norm': True,
'RF_comp': False,
'validation_split': 0.2,
'patience': 2,
'N_models': 10,
'mean': array([ 1.76824452e+02, 5.02972458e+02, 1.36348069e+01, 1.31667047e+01,
1.25963842e+01, 1.20687096e+01, 7.43447710e+01, 2.37326404e+00,
1.71828349e+02, -8.07328006e+01, 2.17614683e-01, 1.37731265e+00,
5.02962089e+00, 1.72169190e+02, 4.97900904e-01, -6.16707158e-03,
7.98814945e-01]),
'variance': array([4.53543913e+04, 2.67568226e+05, 5.04487360e+01, 2.76159388e+01,
2.12257250e+01, 1.42717735e+01, 3.05131902e+02, 1.99271070e+00,
6.64751506e+03, 2.40256658e+02, 1.53389998e-02, 2.34952693e-01,
2.39933935e+01, 6.56642152e+03, 8.32717641e-02, 9.99982731e-01,
2.68468931e-02]),
'batch_size': 455}
print(Data.shape)
print(Data[['FCH4_Clean']+Full_inputs].dropna().shape)
(48159, 24)
(17231, 18)
Build and train model
reload(NNetFuncs)
importlib.
def Build_Train_Eval(Run,print_sum=False):
= Run['config']
config = Run['Training']
Training =print_sum)
NNetFuncs.make_Dense_model(config,print_sum=NNetFuncs.train_model(config,Training)
Eval=NNetFuncs.run_Model(config,Eval)
_
for Run in Model_Runs.keys():
print(Run)
=True)
Build_Train_Eval(Model_Runs[Run],print_sumprint('\n\n')
Full_Model_FCH4
Model: "Full_Model_FCH4"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 17)] 0
normalization (Normalizatio (None, 17) 0
n)
dense (Dense) (None, 170) 3060
dense_1 (Dense) (None, 1) 171
=================================================================
Total params: 3,231
Trainable params: 3,231
Non-trainable params: 0
_________________________________________________________________
None
(12923,) 0
(12923,) 0
(12923,) 0
(12923,) 0
(12923,) 0
(12923,) 0
(12923,) 0
(12923,) 0
(12923,) 0
(12923,) 0
Training Time:
102.1 Seconds
(4308,)
(10, 4308, 1)
/home/jskeeter/NN_Applications/Scripts/NNetFuncs.py:179: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
Mean_Output[f'dy_d{xi}_norm_CI95']=full_out['dy_dx_norm'].std(axis=0)[:,i]/(N)**.5*t_score
/home/jskeeter/NN_Applications/Scripts/NNetFuncs.py:177: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
Mean_Output[f'dy_d{xi}_norm']=full_out['dy_dx_norm'].mean(axis=0)[:,i]
/home/jskeeter/NN_Applications/Scripts/NNetFuncs.py:178: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
Mean_Output[f'{xi}_norm']=full_out['X_norm'].mean(axis=0)[:,i]
/home/jskeeter/NN_Applications/Scripts/NNetFuncs.py:179: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
Mean_Output[f'dy_d{xi}_norm_CI95']=full_out['dy_dx_norm'].std(axis=0)[:,i]/(N)**.5*t_score
NN Model
Validation metrics (ensemble mean):
r2 = 0.5698
RMSE = 9.52423
Run Time:
1.03 Seconds
10 models
Mean epochs/model: 64.0
Full_Model_FCO2
Model: "Full_Model_FCO2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 17)] 0
normalization (Normalizatio (None, 17) 0
n)
dense (Dense) (None, 170) 3060
dense_1 (Dense) (None, 1) 171
=================================================================
Total params: 3,231
Trainable params: 3,231
Non-trainable params: 0
_________________________________________________________________
None
(17059,) 0
(17059,) 0
(17059,) 0
(17059,) 0
(17059,) 0
(17059,) 0
(17059,) 0
(17059,) 0
(17059,) 0
(17059,) 0
Training Time:
56.89 Seconds
(5687,)
(10, 5687, 1)
/home/jskeeter/NN_Applications/Scripts/NNetFuncs.py:179: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
Mean_Output[f'dy_d{xi}_norm_CI95']=full_out['dy_dx_norm'].std(axis=0)[:,i]/(N)**.5*t_score
/home/jskeeter/NN_Applications/Scripts/NNetFuncs.py:177: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
Mean_Output[f'dy_d{xi}_norm']=full_out['dy_dx_norm'].mean(axis=0)[:,i]
/home/jskeeter/NN_Applications/Scripts/NNetFuncs.py:178: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
Mean_Output[f'{xi}_norm']=full_out['X_norm'].mean(axis=0)[:,i]
/home/jskeeter/NN_Applications/Scripts/NNetFuncs.py:179: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
Mean_Output[f'dy_d{xi}_norm_CI95']=full_out['dy_dx_norm'].std(axis=0)[:,i]/(N)**.5*t_score
NN Model
Validation metrics (ensemble mean):
r2 = 0.88828
RMSE = 0.46135
Run Time:
1.53 Seconds
10 models
Mean epochs/model: 39.2
Evaluate and prune the model
reload(NNetFuncs)
importlib.
for key in ['Full_Model_FCH4','Full_Model_FCO2']:
= 1
Continue_Prune = time.time()
T1 while Continue_Prune > 0:
= Model_Runs[key]['config']
config = NNetFuncs.Prune(config,Verbose=True)
RI = list(RI.loc[RI['Drop']==0].index)
Pruned_Inputs print('Inputs: ',Pruned_Inputs,'\n\n')
if len(Pruned_Inputs)<len(config['inputs'])-len(Rand_Scalars) and len([i for i in Rand_Scalars if i in list(RI.index)]):
= f"Pruned_Model_{config['Base']}"
key if len(Pruned_Inputs)<len(Rand_Scalars):
= RI.loc[RI.index.isin(Rand_Scalars)].sort_values(by='RI_bar').index[0]
drop_rand 'Rand_Scalars'].remove(drop_rand)
config[for r in config['Rand_Scalars']:
if r not in Pruned_Inputs:
Pruned_Inputs.append(r)
={
Model_Runs[key]'config':{
'target':config['target'],
'inputs':Pruned_Inputs,
'Rand_Scalars':Rand_Scalars,
},
}
= Configure_Run(Model_Runs[key],key,Data.copy(),config['Base'])
Model_Runs[key] print('Re-training on ',Model_Runs[key]['config']['inputs'])
Build_Train_Eval(Model_Runs[key])print()
else:
= 0
Continue_Prune = f"Final_Model_{config['Base']}"
key ={
Model_Runs[key]'config':{
'target':config['target'],
'inputs':Pruned_Inputs
},
}
print('Final inputs: ',Model_Runs[key]['config']['inputs'])#[:-1],'\n\n')
= Configure_Run(Model_Runs[key],key,Data.copy(),config['Base'])
Model_Runs[key] 'config']['N_models']=30
Model_Runs[key]['config']['patience']=2
Model_Runs[key][# Build_Train_Eval(Model_Runs[key],print_sum=True)
# shutil.rmtree(f'Models/{Base}/Pruned_Model')
= time.time()
T2
print("Pruning Time: ", T2-T1)
Rn T air Wind speed Rand_Binary Wind dir T soil 5cm u* \
RI_bar 1.05 1.17 1.18 1.35 1.81 1.84 2.00
lower_bound 0.93 1.03 0.98 1.23 1.68 1.71 1.71
Drop 7.76 7.76 7.76 7.76 7.76 7.76 7.76
Rand Rand_Skew VPD PPFD Water table RH T soil 10cm \
RI_bar 3.05 3.36 3.88 3.89 4.38 4.79 8.68
lower_bound 2.75 3.03 3.51 3.44 2.77 4.56 7.50
Drop 7.76 7.76 7.76 7.76 7.76 7.76 7.76
Site DOY T soil 50cm
RI_bar 9.05 18.03 30.49
lower_bound 8.29 17.30 27.38
Drop 0.00 0.00 0.00
Inputs: ['Site', 'DOY', 'T soil 50cm']
(48159, 24)
Re-training on ['Site', 'DOY', 'T soil 50cm', 'Rand', 'Rand_Binary', 'Rand_Skew']
Training Time:
143.61 Seconds
(4308,)
(10, 4308, 1)
NN Model
Validation metrics (ensemble mean):
r2 = 0.52758
RMSE = 9.9806
Run Time:
0.49 Seconds
10 models
Mean epochs/model: 97.2
Rand_Skew Rand Rand_Binary Site DOY T soil 50cm
RI_bar 1.96 1.98 2.16 15.82 16.18 61.90
lower_bound 1.53 1.63 1.94 14.18 15.58 59.81
Drop 6.10 6.10 6.10 0.00 0.00 0.00
Inputs: ['Site', 'DOY', 'T soil 50cm']
Final inputs: ['Site', 'DOY', 'T soil 50cm']
(48159, 24)
Pruning Time: 144.25185298919678
Wind dir Wind speed u* T air DOY Water table RH \
RI_bar 0.28 0.85 0.92 0.98 1.23 1.27 1.45
lower_bound 0.26 0.76 0.83 0.79 1.12 1.24 1.29
Drop 6.00 6.00 6.00 6.00 6.00 6.00 6.00
Rand_Binary Rand T soil 5cm Rand_Skew VPD Site \
RI_bar 1.57 1.82 2.26 2.61 3.60 3.89
lower_bound 1.41 1.56 1.84 2.24 3.12 3.37
Drop 6.00 6.00 6.00 6.00 6.00 6.00
T soil 50cm T soil 10cm Rn PPFD
RI_bar 6.28 7.00 15.79 48.18
lower_bound 5.91 5.97 14.88 43.79
Drop 6.00 6.00 0.00 0.00
Inputs: ['Rn', 'PPFD']
(48159, 24)
Re-training on ['Rn', 'PPFD', 'Rand', 'Rand_Skew']
Training Time:
76.58 Seconds
(5687,)
(10, 5687, 1)
NN Model
Validation metrics (ensemble mean):
r2 = 0.64158
RMSE = 0.82636
Run Time:
0.64 Seconds
10 models
Mean epochs/model: 36.7
Rand Rand_Skew Rn PPFD
RI_bar 8.82 13.27 20.25 57.66
lower_bound 6.46 9.30 17.78 51.92
Drop 22.08 22.08 22.08 0.00
Inputs: ['PPFD']
(48159, 24)
Re-training on ['PPFD', 'Rand_Skew']
Training Time:
95.23 Seconds
(5687,)
(10, 5687, 1)
NN Model
Validation metrics (ensemble mean):
r2 = 0.63319
RMSE = 0.83598
Run Time:
0.65 Seconds
10 models
Mean epochs/model: 47.0
Rand_Skew PPFD
RI_bar 0.19 99.81
lower_bound 0.14 98.10
Drop 0.19 0.00
Inputs: ['PPFD']
Final inputs: ['PPFD']
(48159, 24)
Pruning Time: 173.2687132358551
for key in ['Final_Model_FCH4','Final_Model_FCO2']:
'config']['N_models']=30
Model_Runs[key]['config']['patience']=2
Model_Runs[key][=True)
Build_Train_Eval(Model_Runs[key],print_sum Model_Runs.keys()
Model: "Final_Model_FCH4"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 3)] 0
normalization (Normalizatio (None, 3) 0
n)
dense (Dense) (None, 30) 120
dense_1 (Dense) (None, 1) 31
=================================================================
Total params: 151
Trainable params: 151
Non-trainable params: 0
_________________________________________________________________
None
Training Time:
609.65 Seconds
(4308,)
(30, 4308, 1)
NN Model
Validation metrics (ensemble mean):
r2 = 0.52796
RMSE = 9.72222
Run Time:
1.34 Seconds
30 models
Mean epochs/model: 113.83333333333333
Model: "Final_Model_FCO2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 1)] 0
normalization (Normalizatio (None, 1) 0
n)
dense (Dense) (None, 10) 20
dense_1 (Dense) (None, 1) 11
=================================================================
Total params: 31
Trainable params: 31
Non-trainable params: 0
_________________________________________________________________
None
Training Time:
274.08 Seconds
(5687,)
(30, 5687, 1)
NN Model
Validation metrics (ensemble mean):
r2 = 0.63214
RMSE = 0.85985
Run Time:
1.05 Seconds
30 models
Mean epochs/model: 64.76666666666667
dict_keys(['Full_Model_FCH4', 'Full_Model_FCO2', 'Pruned_Model_FCH4', 'Final_Model_FCH4', 'Pruned_Model_FCO2', 'Final_Model_FCO2'])
Inspect pruned model
reload(PlotHelpers)
importlib.
= 'Final_Model_FCO2'
key
= Model_Runs[key]['config']
config = Model_Runs[key]['Training']
Eval =NNetFuncs.run_Model(config,Eval)
full_out# print('\n\n')
= Model_Runs[key]['config']['Base']
Base = Model_Runs[key]['config']['Name']
Name
# run = 'Feature_Detection'
if config['Base']=='FCH4':
= '$nmol m^{-2} s^{-1}$'
unit
else:
= '$\mu mol m^{-2} s^{-1}$'
unit
= pd.read_csv(f'Models/{Base}/{Name}/model_output.csv',index_col=[0])
df
=plt.subplots()
fig,ax= PlotHelpers.make1_1_Plot(ax,df,'target','y_bar',unit=unit)
ax 'NN Estimate')
ax.set_ylabel('EC Observation')
ax.set_xlabel(
= pd.read_csv(f'Models/{Base}/{Name}/model_RI.csv',index_col=[0])
RI
= RI.sort_values(by=f'RI_bar',ascending=True)
RI =plt.subplots()
fig,ax
PlotHelpers.makeRI_plot(ax,RI)
print(RI.round(2))
(22746,)
(30, 22746, 1)
NN Model
Validation metrics (ensemble mean):
r2 = 0.63968
RMSE = 0.84153
Run Time:
1.96 Seconds
30 models
Mean epochs/model: 64.76666666666667
RI_bar RI_CI95
PPFD 100.0 2.9
reload(PlotHelpers)
importlib.reload(MiscFuncs)
importlib.
=6
Max= list(RI.sort_values(by=f'RI_bar',ascending=False).index)
Top
= ['HR']
exclude for ex in exclude:
try:
Top.remove(ex)except:
pass
= Top[:Max]
Top
= 2
cols =len(RI.index)
npi= int(np.ceil(len(Top)/2))
rows
=plt.subplots(rows,cols,sharey=True,
fig,axes=(6,6))
figsize
= axes.flatten()
axes
= ''
mod
for i,xi in enumerate(Top):
print(xi)
# ax.axvspan(0,0,edgecolor='k',linewidth=1.5)
= MiscFuncs.byInterval(df,f'{xi}{mod}',[f'dy_d{xi}{mod}'],bins=25)
df_int f'dy_d{xi}{mod}')
PlotHelpers.CI_Plot(axes[i],df_int,
plt.tight_layout()
axes[i].get_ylabel()
for ax in axes:
= ax.get_ylabel().split('_norm')[0]
l '_',' / ').replace('y',config['Base']))
ax.set_ylabel(l.replace('_d')[-1]+' '+mod.split('_')[-1])
ax.set_xlabel(l.split( plt.tight_layout()
T soil 50cm
Site
DOY
# df = pd.read_csv(f'Models/{run}/random_forest_output.csv',index_col=[0])
# unit = ''
# fig,ax=plt.subplots()
# ax.scatter(df['target'],df['y_bar'])
# ax.plot(df['target'],df['target'],color='k')
# r2 = np.round(metrics.r2_score(df['target'],df['y_bar']),2)
# RMSE = np.round(metrics.mean_squared_error(df['target'],df['y_bar'])**.5,2)
# ax = PlotHelpers.make1_1_Plot(ax,df,'target','y_bar',unit=unit)
# ax.set_ylabel('NN Estimate')
# ax.set_xlabel('EC Observation')
# RF_RI = pd.read_csv(f'Models/{run}/random_forest_RI.csv',index_col=[0])
# mod = ''
# RF_RI = RF_RI.sort_values(by = f'RI{mod}_bar')
# fig,ax=plt.subplots()
# ax.barh(RF_RI.index,RF_RI[f'RI{mod}_bar'],xerr=RF_RI[f'RI{mod}_CI95'])
# ax.grid()
# ax.set_xscale('log')
# RF_RI
# NN_RI = pd.read_csv(f'{run}/model_RI.csv',index_col=[0])
# mod = '_norm'
# NN_RI = NN_RI.sort_values(by=f'RI{mod}_bar',ascending=True)
# fig,ax=plt.subplots()
# ax.barh(NN_RI.index,NN_RI[f'RI{mod}_bar'],yerr=NN_RI[f'RI{mod}_CI95'])
# ax.grid()
# RF_Output = pd.read_csv(f'{run}/random_forest_output.csv',index_col=[0])
# RF_Output.head()