Train Individual Models

train_models(
  run_info,
  run_global_models = FALSE,
  run_local_models = TRUE,
  global_model_recipes = c("R1"),
  feature_selection = FALSE,
  negative_forecast = FALSE,
  parallel_processing = NULL,
  inner_parallel = FALSE,
  num_cores = NULL,
  seed = 123
)

Arguments

run_info: run info using the set_run_info() function
run_global_models: If TRUE, run multivariate models on the entire data set (across all time series) as a global model. Can be override by models_not_to_run. Default of NULL runs global models for all date types except week and day.
run_local_models: If TRUE, run models by individual time series as local models.
global_model_recipes: Recipes to use in global models.
feature_selection: Implement feature selection before model training
negative_forecast: If TRUE, allow forecasts to dip below zero.
parallel_processing: Default of NULL runs no parallel processing and forecasts each individual time series one after another. 'local_machine' leverages all cores on current machine Finn is running on. 'spark' runs time series in parallel on a spark cluster in Azure Databricks or Azure Synapse.
inner_parallel: Run components of forecast process inside a specific time series in parallel. Can only be used if parallel_processing is set to NULL or 'spark'.
num_cores: Number of cores to run when parallel processing is set up. Used when running parallel computations on local machine or within Azure. Default of NULL uses total amount of cores on machine minus one. Can't be greater than number of cores on machine minus 1.
seed: Set seed for random number generator. Numeric value.

Value

trained model outputs are written to disk.

Examples

# \donttest{
data_tbl <- timetk::m4_monthly %>%
  dplyr::rename(Date = date) %>%
  dplyr::mutate(id = as.character(id)) %>%
  dplyr::filter(
    Date >= "2013-01-01",
    Date <= "2015-06-01"
  )

run_info <- set_run_info()
#> Finn Submission Info
#> • Experiment Name: finn_fcst
#> • Run Name: finn_fcst-20241029T144929Z
#> 

prep_data(run_info,
  input_data = data_tbl,
  combo_variables = c("id"),
  target_variable = "value",
  date_type = "month",
  forecast_horizon = 3
)
#> ℹ Prepping Data
#> ✔ Prepping Data [3.5s]
#> 

prep_models(run_info,
  models_to_run = c("arima", "glmnet"),
  num_hyperparameters = 2,
  back_test_scenarios = 6,
  run_ensemble_models = FALSE
)
#> ℹ Creating Model Workflows
#> ✔ Creating Model Workflows [204ms]
#> 
#> ℹ Creating Model Hyperparameters
#> ✔ Creating Model Hyperparameters [221ms]
#> 
#> ℹ Creating Train Test Splits
#> ✔ Creating Train Test Splits [304ms]
#> 

train_models(run_info)
#> ℹ Training Individual Models
#> ✔ Training Individual Models [31.6s]
#> 
# }