Prepare input/taget arrays for time series models

Usage

make_arrays(
  data,
  key,
  index,
  lookback,
  horizon,
  stride = 1,
  target,
  numeric = NULL,
  categorical = NULL,
  static = NULL,
  past = NULL,
  future = NULL,
  shuffle = TRUE,
  sample_frac = 1,
  y_past_sep = FALSE,
  ...
)

Arguments

data: A [data.table::data.table()] instance
lookback: The length of the context from the past
horizon: The forecast length
stride: Stride of the moving window
target: Target variable(s)
numeric: Numeric variables
categorical: Categorical variables
static: Static variables
shuffle: Shuffle samples. Set FALSE for the test dataset.
y_past_sep: Return past values of the target variable as a separate array. Typically, it returned as a first feature of the x_past_num array. However, for some models (such as NBEATS) it may be easier for further processing to keep these values as a separate array.

Value

A list of arrays. The maximal possible content embraces eight arrays:

(y_past)
x_past_num
x_past_cat
y_fut
x_fut_num
x_fut_cat
x_static_num
x_static_cat

y_past is optional, if y_past_sep = TRUE, otherwise those values are part of the x_past_num array. Some array may miss depending on the specified variables.

Examples

library(m5)
library(recipes, warn.conflicts=FALSE)
#> Loading required package: dplyr
#> 
#> Attaching package: ‘dplyr’
#> The following objects are masked from ‘package:stats’:
#> 
#>     filter, lag
#> The following objects are masked from ‘package:base’:
#> 
#>     intersect, setdiff, setequal, union
library(zeallot)
library(dplyr, warn.conflicts=FALSE)
library(data.table, warn.conflicts=FALSE)

# ==========================================================================
#                          PREPARING THE DATA
# ==========================================================================
train <- tiny_m5[date < '2016-01-01']
test  <- tiny_m5[date >= '2016-01-01']

m5_recipe <-
   recipe(value ~ ., data=train) %>%
   step_mutate(item_id_idx=item_id, store_id_idx=store_id) %>%
   step_integer(item_id_idx, store_id_idx,
                wday, month,
                event_name_1, event_type_1,
                event_name_2, event_type_2,
                zero_based=TRUE) %>%
   step_naomit(all_predictors()) %>%
   prep()

train <- bake(m5_recipe, train)
test  <- bake(m5_recipe, test)

TARGET      <- 'value'
STATIC      <- c('item_id_idx', 'store_id_idx')
CATEGORICAL <- c('event_name_1', 'event_type_1', STATIC)
NUMERIC     <- c('sell_price', 'sell_price')
KEY         <- c('item_id', 'store_id')
INDEX       <- 'date'
LOOKBACK    <- 28
HORIZON     <- 14
STRIDE      <- LOOKBACK
# ==========================================================================
#                           CREATING ARRAYS
# ==========================================================================
train_arrays <-
   make_arrays(
       data        = train,
       key         = KEY,
       index       = INDEX,
       lookback    = LOOKBACK,
       horizon     = HORIZON,
       stride      = STRIDE,
       target      = TARGET,
       static      = STATIC,
       categorical = CATEGORICAL,
       numeric     = NUMERIC
   )

print(names(train_arrays))
#> NULL
print(dim(train_arrays$x_past_num))
#> NULL

test_arrays <-
   make_arrays(
       data        = train,
       key         = KEY,
       index       = INDEX,
       lookback    = LOOKBACK,
       horizon     = HORIZON,
       stride      = STRIDE,
       target      = TARGET,
       static      = STATIC,
       categorical = CATEGORICAL,
       numeric     = NUMERIC
   )

print(names(test_arrays))
#> NULL
print(dim(test_arrays$x_past_num))
#> NULL