MLP model for time series forecasting
torchts_mlp(
formula,
data,
learn_rate = 0.001,
hidden_units,
dropout = FALSE,
timesteps = 20,
horizon = 1,
jump = horizon,
optim = optim_adam(),
validation = NULL,
stateful = FALSE,
batch_size = 1,
epochs = 10,
shuffle = TRUE,
scale = TRUE,
sample_frac = 0.5,
loss_fn = nnf_mae,
device = NULL
)(formula) A formula describing, how to use the data
(data.frame) A input data.frame.
(numeric) Learning rate.
(integer) Number of hidden units.
(integer) Number of timesteps used to produce a forecast.
(integer) Forecast horizon.
(integer) Input window shift.
(function) A function returning a torch optimizer (like optim_adam)
or R expression like optim_adam(amsgrad = TRUE). Such expression will be handled and feed with
params and lr arguments.
(data.frame or numeric) Validation dataset or percent of TODO.
(integer) Batch size.
(integer) Number of epochs to train the network.
(logical) A dataloader argument - shuffle rows or not?
(logical or list)
(numeric) A fraction of time series to be sampled.
(function) A torch loss function.
(character) A torch device.
library(dplyr, warn.conflicts = FALSE)
library(torch)
library(torchts)
library(timetk)
# Preparing a dataset
tiny_m5_sample <-
tiny_m5 %>%
filter(item_id == "FOODS_3_586", store_id == "CA_1") %>%
mutate(value = as.numeric(value))
tk_summary_diagnostics(tiny_m5_sample)
#> tk_augment_timeseries_signature(): Using the following .date_var variable: date
#> # A tibble: 1 × 12
#> n.obs start end units scale tzone diff.minimum diff.q1 diff.median
#> <int> <date> <date> <chr> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 1913 2011-01-29 2016-04-24 days day UTC 86400 86400 86400
#> # … with 3 more variables: diff.mean <dbl>, diff.q3 <dbl>, diff.maximum <dbl>
glimpse(tiny_m5_sample)
#> Rows: 1,913
#> Columns: 18
#> $ item_id <chr> "FOODS_3_586", "FOODS_3_586", "FOODS_3_586", "FOODS_3_586…
#> $ dept_id <chr> "FOODS_3", "FOODS_3", "FOODS_3", "FOODS_3", "FOODS_3", "F…
#> $ cat_id <chr> "FOODS", "FOODS", "FOODS", "FOODS", "FOODS", "FOODS", "FO…
#> $ store_id <chr> "CA_1", "CA_1", "CA_1", "CA_1", "CA_1", "CA_1", "CA_1", "…
#> $ state_id <chr> "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA…
#> $ value <dbl> 42, 36, 30, 23, 27, 34, 30, 59, 54, 37, 22, 38, 33, 38, 5…
#> $ date <date> 2011-01-29, 2011-01-30, 2011-01-31, 2011-02-01, 2011-02-…
#> $ wm_yr_wk <int> 11101, 11101, 11101, 11101, 11101, 11101, 11101, 11102, 1…
#> $ weekday <chr> "Saturday", "Sunday", "Monday", "Tuesday", "Wednesday", "…
#> $ wday <int> 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, …
#> $ month <int> 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
#> $ year <int> 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 201…
#> $ event_name_1 <chr> "", "", "", "", "", "", "", "", "SuperBowl", "", "", "", …
#> $ event_type_1 <chr> "", "", "", "", "", "", "", "", "Sporting", "", "", "", "…
#> $ event_name_2 <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
#> $ event_type_2 <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
#> $ snap <int> 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, …
#> $ sell_price <dbl> 1.48, 1.48, 1.48, 1.48, 1.48, 1.48, 1.48, 1.48, 1.48, 1.4…
TIMESTEPS <- 20
data_split <-
time_series_split(
tiny_m5_sample, date,
initial = "4 years",
assess = "1 year",
lag = TIMESTEPS
)
# Training
mlp_model <-
torchts_mlp(
value ~ date + value + sell_price + wday,
data = training(data_split),
hidden_units = 10,
timesteps = TIMESTEPS,
horizon = 1,
epochs = 10,
batch_size = 32
)
#> Categorical variables found (1): wday
#>
#> Training started
#> | train: 0.79929
#> | train: 0.79051
#> | train: 0.77501
#> | train: 0.77450
#> | train: 0.77773
#> | train: 0.77323
#> | train: 0.77331
#> | train: 0.76293
#> | train: 0.76901
#> | train: 0.76394
# Prediction
cleared_new_data <-
testing(data_split) %>%
clear_outcome(date, value, TIMESTEPS)
forecast <-
predict(rnn_model, cleared_new_data)
#> Error in predict(rnn_model, cleared_new_data): object 'rnn_model' not found