EasyHybrid Example: Synthetic Data Analysis

This example demonstrates how to use EasyHybrid to train a hybrid model on synthetic data for respiration modeling with Q10 temperature sensitivity.

julia

using EasyHybrid

Data Loading and Preprocessing

Load synthetic dataset from GitHub into DataFrame

julia

df = load_timeseries_netcdf("https://github.com/bask0/q10hybrid/raw/master/data/Synthetic4BookChap.nc");

Select a subset of data for faster execution

julia

df = df[1:20000, :];
first(df, 5)

5×6 DataFrame

Row	time	sw_pot	dsw_pot	ta	reco	rb
	DateTime	Float64?	Float64?	Float64?	Float64?	Float64?
1	2003-01-01T00:15:00	109.817	115.595	2.1	0.844741	1.42522
2	2003-01-01T00:45:00	109.817	115.595	1.98	0.840641	1.42522
3	2003-01-01T01:15:00	109.817	115.595	1.89	0.837579	1.42522
4	2003-01-01T01:45:00	109.817	115.595	2.06	0.843372	1.42522
5	2003-01-01T02:15:00	109.817	115.595	2.09	0.844399	1.42522

KeyedArray

KeyedArray from AxisKeys.jl works, but cannot handle DateTime type

julia

dfnot = Float32.(df[!, Not(:time)]);

ka = to_keyedArray(dfnot);

DimensionalData

julia

using DimensionalData
mat = Array(Matrix(dfnot)')
da = DimArray(mat, (Dim{:variable}(Symbol.(names(dfnot))), Dim{:batch_size}(1:size(dfnot, 1))));

Define the Physical Model

RbQ10 model: Respiration model with Q10 temperature sensitivity

Parameters:

ta: air temperature [°C]
Q10: temperature sensitivity factor [-]
rb: basal respiration rate [μmol/m²/s]
tref: reference temperature [°C] (default: 15.0)

julia

function RbQ10(; ta, Q10, rb, tref = 15.0f0)
    reco = rb .* Q10 .^ (0.1f0 .* (ta .- tref))
    return (; reco, Q10, rb)
end

RbQ10 (generic function with 1 method)

Define Model Parameters

Parameter specification: (default, lower_bound, upper_bound)

julia

parameters = (
    # Parameter name | Default | Lower | Upper      | Description
    rb = (3.0f0, 0.0f0, 13.0f0),  # Basal respiration [μmol/m²/s]
    Q10 = (2.0f0, 1.0f0, 4.0f0),   # Temperature sensitivity factor [-]
)

(rb = (3.0f0, 0.0f0, 13.0f0), Q10 = (2.0f0, 1.0f0, 4.0f0))

Configure Hybrid Model Components

Define input variables

julia

forcing = [:ta]                    # Forcing variables (temperature)

1-element Vector{Symbol}:
 :ta

Target variable

julia

target = [:reco]                   # Target variable (respiration)

1-element Vector{Symbol}:
 :reco

Parameter classification

julia

global_param_names = [:Q10]        # Global parameters (same for all samples)
neural_param_names = [:rb]         # Neural network predicted parameters

1-element Vector{Symbol}:
 :rb

Single NN Hybrid Model Training

using WGLMakie Create single NN hybrid model using the unified constructor

julia

predictors_single_nn = [:sw_pot, :dsw_pot]   # Predictor variables (solar radiation, and its derivative)

single_nn_hybrid_model = constructHybridModel(
    predictors_single_nn,              # Input features
    forcing,                 # Forcing variables
    target,                  # Target variables
    RbQ10,                  # Process-based model function
    parameters,              # Parameter definitions
    neural_param_names,      # NN-predicted parameters
    global_param_names,      # Global parameters
    hidden_layers = [16, 16], # Neural network architecture
    activation = sigmoid,      # Activation function
    scale_nn_outputs = true, # Scale neural network outputs
    input_batchnorm = true   # Apply batch normalization to inputs
)

Hybrid Model (Single NN)
Neural Network: 
  Chain(
      layer_1 = BatchNorm(2, affine=false, track_stats=true),
      layer_2 = Dense(2 => 16, σ),                  # 48 parameters
      layer_3 = Dense(16 => 16, σ),                 # 272 parameters
      layer_4 = Dense(16 => 1),                     # 17 parameters
  )         # Total: 337 parameters,
            #        plus 5 states.
Configuration:
  predictors = [:sw_pot, :dsw_pot]
  forcing = [:ta]
  targets = [:reco]
  mechanistic_model = RbQ10
  neural_param_names = [:rb]
  global_param_names = [:Q10]
  fixed_param_names = Symbol[]
  scale_nn_outputs = true
  start_from_default = true
  config = (; hidden_layers = [16, 16], activation = σ, scale_nn_outputs = true, input_batchnorm = true, start_from_default = true,)

Parameters:
  Hybrid Parameters
    ┌─────┬─────────┬───────┬───────┐
    │     │ default │ lower │ upper │
    ├─────┼─────────┼───────┼───────┤
    │  rb │     3.0 │   0.0 │  13.0 │
    │ Q10 │     2.0 │   1.0 │   4.0 │
    └─────┴─────────┴───────┴───────┘

train on DataFrame

julia

extra_loss = function (ŷ)
    return (; a = sum((5.0 .- ŷ.rb) .^ 2))
end

#3 (generic function with 1 method)

Train the hybrid model

julia

single_nn_out = train(
    single_nn_hybrid_model,
    df,
    ();
    nepochs = 10,           # Number of training epochs
    batchsize = 512,         # Batch size for training
    opt = AdamW(0.1),   # Optimizer and learning rate
    monitor_names = [:rb, :Q10], # Parameters to monitor during training
    yscale = identity,       # Scaling for outputs
    shuffleobs = true,
    loss_types = [:mse, :nse],
    show_progress = false,
    extra_loss = extra_loss
)

  train_history: (11,)
    mse         (reco, sum)
    nse         (reco, sum)
    extra_loss  (a, sum)
  val_history: (11,)
    mse         (reco, sum)
    nse         (reco, sum)
    extra_loss  (a, sum)
  ps_history: (11,)
    ϕ        ()
    monitor  (train, val)
  train_obs_pred: 16000×3 DataFrame
    reco, index, reco_pred
  val_obs_pred: 4000×3 DataFrame
    reco, index, reco_pred
  train_diffs: 
    Q10         (1,)
    rb          (16000,)
    parameters  (rb, Q10)
  val_diffs: 
    Q10         (1,)
    rb          (4000,)
    parameters  (rb, Q10)
  ps: (338,)
  st: 
    st_nn  (layer_1, layer_2, layer_3, layer_4)
    fixed  ()
  best_epoch: 1
  best_loss: 2.1385992

train on KeyedArray

julia

single_nn_out = train(
    single_nn_hybrid_model,
    ka,
    ();
    nepochs = 10,           # Number of training epochs
    batchsize = 512,         # Batch size for training
    opt = AdamW(0.1),   # Optimizer and learning rate
    monitor_names = [:rb, :Q10], # Parameters to monitor during training
    yscale = identity,       # Scaling for outputs
    show_progress = false,
    shuffleobs = true
)

  train_history: (11,)
    mse  (reco, sum)
    r2   (reco, sum)
  val_history: (11,)
    mse  (reco, sum)
    r2   (reco, sum)
  ps_history: (11,)
    ϕ        ()
    monitor  (train, val)
  train_obs_pred: 16000×3 DataFrame
    reco, index, reco_pred
  val_obs_pred: 4000×3 DataFrame
    reco, index, reco_pred
  train_diffs: 
    Q10         (1,)
    rb          (16000,)
    parameters  (rb, Q10)
  val_diffs: 
    Q10         (1,)
    rb          (4000,)
    parameters  (rb, Q10)
  ps: (338,)
  st: 
    st_nn  (layer_1, layer_2, layer_3, layer_4)
    fixed  ()
  best_epoch: 8
  best_loss: 0.0037768923

train on DimensionalData

julia

single_nn_out = train(
    single_nn_hybrid_model,
    da,
    ();
    nepochs = 10,           # Number of training epochs
    batchsize = 512,         # Batch size for training
    opt = AdamW(0.1),   # Optimizer and learning rate
    monitor_names = [:rb, :Q10], # Parameters to monitor during training
    yscale = identity,       # Scaling for outputs
    show_progress = false,
    shuffleobs = true
)

LuxCore.testmode(single_nn_out.st)
mean(df.dsw_pot)
mean(df.sw_pot)

281.9084998377153

Multi NN Hybrid Model Training

julia

predictors_multi_nn = (rb = [:sw_pot, :dsw_pot],)

multi_nn_hybrid_model = constructHybridModel(
    predictors_multi_nn,              # Input features
    forcing,                 # Forcing variables
    target,                  # Target variables
    RbQ10,                  # Process-based model function
    parameters,              # Parameter definitions
    global_param_names,      # Global parameters
    hidden_layers = [16, 16], # Neural network architecture
    activation = sigmoid,      # Activation function
    scale_nn_outputs = true, # Scale neural network outputs
    input_batchnorm = true   # Apply batch normalization to inputs
)

multi_nn_out = train(
    multi_nn_hybrid_model,
    ka,
    ();
    nepochs = 10,           # Number of training epochs
    batchsize = 512,         # Batch size for training
    opt = AdamW(0.1),   # Optimizer and learning rate
    monitor_names = [:rb, :Q10], # Parameters to monitor during training
    yscale = identity,       # Scaling for outputs
    show_progress = false,
    shuffleobs = true
)

LuxCore.testmode(multi_nn_out.st)
mean(df.dsw_pot)
mean(df.sw_pot)

281.9084998377153

EasyHybrid Example: Synthetic Data Analysis ​

Data Loading and Preprocessing ​

KeyedArray ​

DimensionalData ​

Define the Physical Model ​

Define Model Parameters ​

Configure Hybrid Model Components ​

Single NN Hybrid Model Training ​

train on DataFrame ​

train on KeyedArray ​

train on DimensionalData ​

Multi NN Hybrid Model Training ​

EasyHybrid Example: Synthetic Data Analysis

Data Loading and Preprocessing

KeyedArray

DimensionalData

Define the Physical Model

Define Model Parameters

Configure Hybrid Model Components

Single NN Hybrid Model Training

train on DataFrame

train on KeyedArray

train on DimensionalData

Multi NN Hybrid Model Training