MLP model for time series forecasting

torchts_mlp(
  formula,
  data,
  learn_rate = 0.001,
  hidden_units,
  dropout = FALSE,
  timesteps = 20,
  horizon = 1,
  jump = horizon,
  optim = optim_adam(),
  validation = NULL,
  stateful = FALSE,
  batch_size = 1,
  epochs = 10,
  shuffle = TRUE,
  scale = TRUE,
  sample_frac = 0.5,
  loss_fn = nnf_mae,
  device = NULL
)

Arguments

formula

(formula) A formula describing, how to use the data

data

(data.frame) A input data.frame.

learn_rate

(numeric) Learning rate.

hidden_units

(integer) Number of hidden units.

timesteps

(integer) Number of timesteps used to produce a forecast.

horizon

(integer) Forecast horizon.

jump

(integer) Input window shift.

optim

(function) A function returning a torch optimizer (like optim_adam) or R expression like optim_adam(amsgrad = TRUE). Such expression will be handled and feed with params and lr arguments.

validation

(data.frame or numeric) Validation dataset or percent of TODO.

batch_size

(integer) Batch size.

epochs

(integer) Number of epochs to train the network.

shuffle

(logical) A dataloader argument - shuffle rows or not?

scale

(logical or list)

sample_frac

(numeric) A fraction of time series to be sampled.

loss_fn

(function) A torch loss function.

device

(character) A torch device.

Examples

library(dplyr, warn.conflicts = FALSE)
library(torch)
library(torchts)
library(timetk)

# Preparing a dataset
tiny_m5_sample <-
  tiny_m5 %>%
  filter(item_id == "FOODS_3_586", store_id == "CA_1") %>%
  mutate(value = as.numeric(value))

tk_summary_diagnostics(tiny_m5_sample)
#> tk_augment_timeseries_signature(): Using the following .date_var variable: date
#> # A tibble: 1 × 12
#>   n.obs start      end        units scale tzone diff.minimum diff.q1 diff.median
#>   <int> <date>     <date>     <chr> <chr> <chr>        <dbl>   <dbl>       <dbl>
#> 1  1913 2011-01-29 2016-04-24 days  day   UTC          86400   86400       86400
#> # … with 3 more variables: diff.mean <dbl>, diff.q3 <dbl>, diff.maximum <dbl>
glimpse(tiny_m5_sample)
#> Rows: 1,913
#> Columns: 18
#> $ item_id      <chr> "FOODS_3_586", "FOODS_3_586", "FOODS_3_586", "FOODS_3_586…
#> $ dept_id      <chr> "FOODS_3", "FOODS_3", "FOODS_3", "FOODS_3", "FOODS_3", "F…
#> $ cat_id       <chr> "FOODS", "FOODS", "FOODS", "FOODS", "FOODS", "FOODS", "FO…
#> $ store_id     <chr> "CA_1", "CA_1", "CA_1", "CA_1", "CA_1", "CA_1", "CA_1", "…
#> $ state_id     <chr> "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA…
#> $ value        <dbl> 42, 36, 30, 23, 27, 34, 30, 59, 54, 37, 22, 38, 33, 38, 5…
#> $ date         <date> 2011-01-29, 2011-01-30, 2011-01-31, 2011-02-01, 2011-02-…
#> $ wm_yr_wk     <int> 11101, 11101, 11101, 11101, 11101, 11101, 11101, 11102, 1…
#> $ weekday      <chr> "Saturday", "Sunday", "Monday", "Tuesday", "Wednesday", "…
#> $ wday         <int> 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, …
#> $ month        <int> 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
#> $ year         <int> 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 201…
#> $ event_name_1 <chr> "", "", "", "", "", "", "", "", "SuperBowl", "", "", "", …
#> $ event_type_1 <chr> "", "", "", "", "", "", "", "", "Sporting", "", "", "", "…
#> $ event_name_2 <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
#> $ event_type_2 <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
#> $ snap         <int> 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, …
#> $ sell_price   <dbl> 1.48, 1.48, 1.48, 1.48, 1.48, 1.48, 1.48, 1.48, 1.48, 1.4…

TIMESTEPS <- 20

data_split <-
  time_series_split(
    tiny_m5_sample, date,
    initial = "4 years",
    assess  = "1 year",
    lag     = TIMESTEPS
  )

# Training
mlp_model <-
  torchts_mlp(
    value ~ date + value + sell_price + wday,
    data = training(data_split),
    hidden_units = 10,
    timesteps = TIMESTEPS,
    horizon   = 1,
    epochs = 10,
    batch_size = 32
  )
#> Categorical variables found (1): wday
#> 
#> Training started
#>  | train: 0.79929  
#>  | train: 0.79051  
#>  | train: 0.77501  
#>  | train: 0.77450  
#>  | train: 0.77773  
#>  | train: 0.77323  
#>  | train: 0.77331  
#>  | train: 0.76293  
#>  | train: 0.76901  
#>  | train: 0.76394  

# Prediction
cleared_new_data <-
  testing(data_split) %>%
  clear_outcome(date, value, TIMESTEPS)

forecast <-
  predict(rnn_model, cleared_new_data)
#> Error in predict(rnn_model, cleared_new_data): object 'rnn_model' not found