Return size of categorical variables in the data.frame
dict_size(data)
(data.frame
) A data.frame containing categorical variables.
The function automatically finds categorical variables,
calling internally is_categorical function.
Named logical vector
glimpse(tiny_m5)
#> Rows: 535,640
#> Columns: 18
#> $ item_id <chr> "HOBBIES_1_330", "HOBBIES_1_330", "HOBBIES_1_330", "HOBBI…
#> $ dept_id <chr> "HOBBIES_1", "HOBBIES_1", "HOBBIES_1", "HOBBIES_1", "HOBB…
#> $ cat_id <chr> "HOBBIES", "HOBBIES", "HOBBIES", "HOBBIES", "HOBBIES", "H…
#> $ store_id <chr> "CA_1", "CA_1", "CA_1", "CA_1", "CA_1", "CA_1", "CA_1", "…
#> $ state_id <chr> "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA…
#> $ value <int> 0, 0, 0, 0, 2, 4, 3, 5, 4, 2, 0, 0, 4, 2, 5, 6, 1, 0, 3, …
#> $ date <date> 2011-01-29, 2011-01-30, 2011-01-31, 2011-02-01, 2011-02-…
#> $ wm_yr_wk <int> 11101, 11101, 11101, 11101, 11101, 11101, 11101, 11102, 1…
#> $ weekday <chr> "Saturday", "Sunday", "Monday", "Tuesday", "Wednesday", "…
#> $ wday <int> 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, …
#> $ month <int> 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
#> $ year <int> 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 201…
#> $ event_name_1 <chr> "", "", "", "", "", "", "", "", "SuperBowl", "", "", "", …
#> $ event_type_1 <chr> "", "", "", "", "", "", "", "", "Sporting", "", "", "", "…
#> $ event_name_2 <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
#> $ event_type_2 <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
#> $ snap <int> 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, …
#> $ sell_price <dbl> 7.44, 7.44, 7.44, 7.44, 7.44, 7.44, 7.44, 7.44, 7.44, 7.4…
dict_size(tiny_m5)
#> item_id dept_id cat_id store_id state_id value
#> 28 7 3 10 3 223
#> wm_yr_wk weekday wday month year event_name_1
#> 274 7 7 12 6 31
#> event_type_1 event_name_2 event_type_2 snap
#> 5 5 3 2
# We can choose only the features we want - otherwise it automatically
# selects logical, factor, character or integer vectors
tiny_m5 %>%
select(store_id, event_name_1) %>%
dict_size()
#> store_id event_name_1
#> 10 31