Skip to contents

The advantage of the generator over the explicit arrays creation by make_arrays at the beginning is the lower RAM space volume needed for this kind of operation. When full arrays are created, we have to allocate space for all the examples selected from the passed data.frame. If we use ts_generator instead, the following examples are deliver as the batch sub-arrays and created on fly. It means that we don't have to store all the examples in RAM at the same time.

Usage

ts_generator(
  data,
  key,
  index,
  lookback,
  horizon,
  stride = 1,
  target,
  numeric = NULL,
  categorical = NULL,
  static = NULL,
  past = NULL,
  future = NULL,
  shuffle = TRUE,
  sample_frac = 1,
  y_past_sep = FALSE,
  batch_size = 1,
  ...
)

Arguments

data

A [data.table::data.table()] instance

lookback

The length of the context from the past

horizon

The forecast length

stride

Stride of the moving window

target

Target variable(s)

numeric

Numeric variables

categorical

Categorical variables

static

Static variables

shuffle

Shuffle samples. Set FALSE for the test dataset.

y_past_sep

Return past values of the target variable as a separate array. Typically, it returned as a first feature of the x_past_num array. However, for some models (such as NBEATS) it may be easier for further processing to keep these values as a separate array.

batch_size

Batch size

Examples

library(m5)
library(recipes, warn.conflicts=FALSE)
library(zeallot)
library(dplyr, warn.conflicts=FALSE)
library(data.table, warn.conflicts=FALSE)

# ==========================================================================
#                          PREPARING THE DATA
# ==========================================================================
train <- tiny_m5[date < '2016-01-01']
test  <- tiny_m5[date >= '2016-01-01']

m5_recipe <-
   recipe(value ~ ., data=train) %>%
   step_mutate(item_id_idx=item_id, store_id_idx=store_id) %>%
   step_integer(item_id_idx, store_id_idx,
                wday, month,
                event_name_1, event_type_1,
                event_name_2, event_type_2,
                zero_based=TRUE) %>%
   step_naomit(all_predictors()) %>%
   prep()

train <- bake(m5_recipe, train)
test  <- bake(m5_recipe, test)

TARGET      <- 'value'
STATIC      <- c('item_id_idx', 'store_id_idx')
CATEGORICAL <- c('event_name_1', 'event_type_1', STATIC)
NUMERIC     <- c('sell_price', 'sell_price')
KEY         <- c('item_id', 'store_id')
INDEX       <- 'date'
LOOKBACK    <- 28
HORIZON     <- 14
STRIDE      <- LOOKBACK
BATCH_SIZE  <- 32
# ==========================================================================
#                          CREATING GENERATOR
# ==========================================================================
c(train_generator, train_steps) %<-%
   ts_generator(
       data = train,
       key = KEY,
       index = INDEX,
       lookback = LOOKBACK,
       horizon = HORIZON,
       stride = STRIDE,
       target=TARGET,
       static=STATIC,
       categorical=CATEGORICAL,
       numeric=NUMERIC,
       batch_size=BATCH_SIZE
   )
batch <- train_generator()
print(names(batch[[1]]))
#> [1] "X_past_num"   "X_past_cat"   "X_fut_num"    "X_fut_cat"    NA            
#> [6] "x_static_cat"