Skip to contents

Prepare input/taget arrays for time series models

Usage

make_arrays(
  data,
  key,
  index,
  lookback,
  horizon,
  stride = 1,
  target,
  numeric = NULL,
  categorical = NULL,
  static = NULL,
  past = NULL,
  future = NULL,
  shuffle = TRUE,
  sample_frac = 1,
  y_past_sep = FALSE,
  ...
)

Arguments

data

A [data.table::data.table()] instance

lookback

The length of the context from the past

horizon

The forecast length

stride

Stride of the moving window

target

Target variable(s)

numeric

Numeric variables

categorical

Categorical variables

static

Static variables

shuffle

Shuffle samples. Set FALSE for the test dataset.

y_past_sep

Return past values of the target variable as a separate array. Typically, it returned as a first feature of the x_past_num array. However, for some models (such as NBEATS) it may be easier for further processing to keep these values as a separate array.

Value

A list of arrays. The maximal possible content embraces eight arrays:

  • (y_past)

  • x_past_num

  • x_past_cat

  • y_fut

  • x_fut_num

  • x_fut_cat

  • x_static_num

  • x_static_cat

y_past is optional, if y_past_sep = TRUE, otherwise those values are part of the x_past_num array. Some array may miss depending on the specified variables.

Examples

library(m5)
library(recipes, warn.conflicts=FALSE)
#> Loading required package: dplyr
#> 
#> Attaching package: ‘dplyr’
#> The following objects are masked from ‘package:stats’:
#> 
#>     filter, lag
#> The following objects are masked from ‘package:base’:
#> 
#>     intersect, setdiff, setequal, union
library(zeallot)
library(dplyr, warn.conflicts=FALSE)
library(data.table, warn.conflicts=FALSE)

# ==========================================================================
#                          PREPARING THE DATA
# ==========================================================================
train <- tiny_m5[date < '2016-01-01']
test  <- tiny_m5[date >= '2016-01-01']

m5_recipe <-
   recipe(value ~ ., data=train) %>%
   step_mutate(item_id_idx=item_id, store_id_idx=store_id) %>%
   step_integer(item_id_idx, store_id_idx,
                wday, month,
                event_name_1, event_type_1,
                event_name_2, event_type_2,
                zero_based=TRUE) %>%
   step_naomit(all_predictors()) %>%
   prep()

train <- bake(m5_recipe, train)
test  <- bake(m5_recipe, test)

TARGET      <- 'value'
STATIC      <- c('item_id_idx', 'store_id_idx')
CATEGORICAL <- c('event_name_1', 'event_type_1', STATIC)
NUMERIC     <- c('sell_price', 'sell_price')
KEY         <- c('item_id', 'store_id')
INDEX       <- 'date'
LOOKBACK    <- 28
HORIZON     <- 14
STRIDE      <- LOOKBACK
# ==========================================================================
#                           CREATING ARRAYS
# ==========================================================================
train_arrays <-
   make_arrays(
       data        = train,
       key         = KEY,
       index       = INDEX,
       lookback    = LOOKBACK,
       horizon     = HORIZON,
       stride      = STRIDE,
       target      = TARGET,
       static      = STATIC,
       categorical = CATEGORICAL,
       numeric     = NUMERIC
   )

print(names(train_arrays))
#> NULL
print(dim(train_arrays$x_past_num))
#> NULL

test_arrays <-
   make_arrays(
       data        = train,
       key         = KEY,
       index       = INDEX,
       lookback    = LOOKBACK,
       horizon     = HORIZON,
       stride      = STRIDE,
       target      = TARGET,
       static      = STATIC,
       categorical = CATEGORICAL,
       numeric     = NUMERIC
   )

print(names(test_arrays))
#> NULL
print(dim(test_arrays$x_past_num))
#> NULL