Skip to content

CC BY-SA 4.0

EasyHybrid Example: Synthetic Data Analysis

This example demonstrates how to use EasyHybrid to train a hybrid model on synthetic data for respiration modeling with Q10 temperature sensitivity.

julia
using EasyHybrid

Data Loading and Preprocessing

Load synthetic dataset from GitHub into DataFrame

julia
df = load_timeseries_netcdf("https://github.com/bask0/q10hybrid/raw/master/data/Synthetic4BookChap.nc");

Select a subset of data for faster execution

julia
df = df[1:20000, :];
first(df, 5)
5×6 DataFrame
Rowtimesw_potdsw_pottarecorb
DateTimeFloat64?Float64?Float64?Float64?Float64?
12003-01-01T00:15:00109.817115.5952.10.8447411.42522
22003-01-01T00:45:00109.817115.5951.980.8406411.42522
32003-01-01T01:15:00109.817115.5951.890.8375791.42522
42003-01-01T01:45:00109.817115.5952.060.8433721.42522
52003-01-01T02:15:00109.817115.5952.090.8443991.42522

KeyedArray

KeyedArray from AxisKeys.jl works, but cannot handle DateTime type

julia
dfnot = Float32.(df[!, Not(:time)]);

ka = to_keyedArray(dfnot);

DimensionalData

julia
using DimensionalData
mat = Array(Matrix(dfnot)')
da = DimArray(mat, (Dim{:variable}(Symbol.(names(dfnot))), Dim{:batch_size}(1:size(dfnot, 1))));

Define the Physical Model

RbQ10 model: Respiration model with Q10 temperature sensitivity

Parameters:

  • ta: air temperature [°C]

  • Q10: temperature sensitivity factor [-]

  • rb: basal respiration rate [μmol/m²/s]

  • tref: reference temperature [°C] (default: 15.0)

julia
function RbQ10(; ta, Q10, rb, tref = 15.0f0)
    reco = rb .* Q10 .^ (0.1f0 .* (ta .- tref))
    return (; reco, Q10, rb)
end
RbQ10 (generic function with 1 method)

Define Model Parameters

Parameter specification: (default, lower_bound, upper_bound)

julia
parameters = (
    # Parameter name | Default | Lower | Upper      | Description
    rb = (3.0f0, 0.0f0, 13.0f0),  # Basal respiration [μmol/m²/s]
    Q10 = (2.0f0, 1.0f0, 4.0f0),   # Temperature sensitivity factor [-]
)
(rb = (3.0f0, 0.0f0, 13.0f0), Q10 = (2.0f0, 1.0f0, 4.0f0))

Configure Hybrid Model Components

Define input variables

julia
forcing = [:ta]                    # Forcing variables (temperature)
1-element Vector{Symbol}:
 :ta

Target variable

julia
target = [:reco]                   # Target variable (respiration)
1-element Vector{Symbol}:
 :reco

Parameter classification

julia
global_param_names = [:Q10]        # Global parameters (same for all samples)
neural_param_names = [:rb]         # Neural network predicted parameters
1-element Vector{Symbol}:
 :rb

Single NN Hybrid Model Training

using WGLMakie Create single NN hybrid model using the unified constructor

julia
predictors_single_nn = [:sw_pot, :dsw_pot]   # Predictor variables (solar radiation, and its derivative)

single_nn_hybrid_model = constructHybridModel(
    predictors_single_nn,              # Input features
    forcing,                 # Forcing variables
    target,                  # Target variables
    RbQ10,                  # Process-based model function
    parameters,              # Parameter definitions
    neural_param_names,      # NN-predicted parameters
    global_param_names,      # Global parameters
    hidden_layers = [16, 16], # Neural network architecture
    activation = sigmoid,      # Activation function
    scale_nn_outputs = true, # Scale neural network outputs
    input_batchnorm = true   # Apply batch normalization to inputs
)
Hybrid Model (Single NN)
Neural Network: 
  Chain(
      layer_1 = BatchNorm(2, affine=false, track_stats=true),
      layer_2 = Dense(2 => 16, σ),                  # 48 parameters
      layer_3 = Dense(16 => 16, σ),                 # 272 parameters
      layer_4 = Dense(16 => 1),                     # 17 parameters
  )         # Total: 337 parameters,
            #        plus 5 states.
Configuration:
  predictors = [:sw_pot, :dsw_pot]
  forcing = [:ta]
  targets = [:reco]
  mechanistic_model = RbQ10
  neural_param_names = [:rb]
  global_param_names = [:Q10]
  fixed_param_names = Symbol[]
  scale_nn_outputs = true
  start_from_default = true
  config = (; hidden_layers = [16, 16], activation = σ, scale_nn_outputs = true, input_batchnorm = true, start_from_default = true,)

Parameters:
  Hybrid Parameters
    ┌─────┬─────────┬───────┬───────┐
    │     │ default │ lower │ upper │
    ├─────┼─────────┼───────┼───────┤
    │  rb │     3.0 │   0.0 │  13.0 │
    │ Q10 │     2.0 │   1.0 │   4.0 │
    └─────┴─────────┴───────┴───────┘

train on DataFrame

julia
extra_loss = function (ŷ)
    return (; a = sum((5.0 .-.rb) .^ 2))
end
#3 (generic function with 1 method)

Train the hybrid model

julia
single_nn_out = train(
    single_nn_hybrid_model,
    df,
    ();
    nepochs = 10,           # Number of training epochs
    batchsize = 512,         # Batch size for training
    opt = AdamW(0.1),   # Optimizer and learning rate
    monitor_names = [:rb, :Q10], # Parameters to monitor during training
    yscale = identity,       # Scaling for outputs
    shuffleobs = true,
    loss_types = [:mse, :nse],
    show_progress = false,
    extra_loss = extra_loss
)
  train_history: (11,)
    mse         (reco, sum)
    nse         (reco, sum)
    extra_loss  (a, sum)
  val_history: (11,)
    mse         (reco, sum)
    nse         (reco, sum)
    extra_loss  (a, sum)
  ps_history: (11,)
    ϕ        ()
    monitor  (train, val)
  train_obs_pred: 16000×3 DataFrame
    reco, index, reco_pred
  val_obs_pred: 4000×3 DataFrame
    reco, index, reco_pred
  train_diffs: 
    Q10         (1,)
    rb          (16000,)
    parameters  (rb, Q10)
  val_diffs: 
    Q10         (1,)
    rb          (4000,)
    parameters  (rb, Q10)
  ps: (338,)
  st: 
    st_nn  (layer_1, layer_2, layer_3, layer_4)
    fixed  ()
  best_epoch: 1
  best_loss: 2.1385992

train on KeyedArray

julia
single_nn_out = train(
    single_nn_hybrid_model,
    ka,
    ();
    nepochs = 10,           # Number of training epochs
    batchsize = 512,         # Batch size for training
    opt = AdamW(0.1),   # Optimizer and learning rate
    monitor_names = [:rb, :Q10], # Parameters to monitor during training
    yscale = identity,       # Scaling for outputs
    show_progress = false,
    shuffleobs = true
)
  train_history: (11,)
    mse  (reco, sum)
    r2   (reco, sum)
  val_history: (11,)
    mse  (reco, sum)
    r2   (reco, sum)
  ps_history: (11,)
    ϕ        ()
    monitor  (train, val)
  train_obs_pred: 16000×3 DataFrame
    reco, index, reco_pred
  val_obs_pred: 4000×3 DataFrame
    reco, index, reco_pred
  train_diffs: 
    Q10         (1,)
    rb          (16000,)
    parameters  (rb, Q10)
  val_diffs: 
    Q10         (1,)
    rb          (4000,)
    parameters  (rb, Q10)
  ps: (338,)
  st: 
    st_nn  (layer_1, layer_2, layer_3, layer_4)
    fixed  ()
  best_epoch: 8
  best_loss: 0.0037768923

train on DimensionalData

julia
single_nn_out = train(
    single_nn_hybrid_model,
    da,
    ();
    nepochs = 10,           # Number of training epochs
    batchsize = 512,         # Batch size for training
    opt = AdamW(0.1),   # Optimizer and learning rate
    monitor_names = [:rb, :Q10], # Parameters to monitor during training
    yscale = identity,       # Scaling for outputs
    show_progress = false,
    shuffleobs = true
)

LuxCore.testmode(single_nn_out.st)
mean(df.dsw_pot)
mean(df.sw_pot)
281.9084998377153

Multi NN Hybrid Model Training

julia
predictors_multi_nn = (rb = [:sw_pot, :dsw_pot],)

multi_nn_hybrid_model = constructHybridModel(
    predictors_multi_nn,              # Input features
    forcing,                 # Forcing variables
    target,                  # Target variables
    RbQ10,                  # Process-based model function
    parameters,              # Parameter definitions
    global_param_names,      # Global parameters
    hidden_layers = [16, 16], # Neural network architecture
    activation = sigmoid,      # Activation function
    scale_nn_outputs = true, # Scale neural network outputs
    input_batchnorm = true   # Apply batch normalization to inputs
)

multi_nn_out = train(
    multi_nn_hybrid_model,
    ka,
    ();
    nepochs = 10,           # Number of training epochs
    batchsize = 512,         # Batch size for training
    opt = AdamW(0.1),   # Optimizer and learning rate
    monitor_names = [:rb, :Q10], # Parameters to monitor during training
    yscale = identity,       # Scaling for outputs
    show_progress = false,
    shuffleobs = true
)

LuxCore.testmode(multi_nn_out.st)
mean(df.dsw_pot)
mean(df.sw_pot)
281.9084998377153