Return size of categorical variables in the data.frame

dict_size(data)

Arguments

data

(data.frame) A data.frame containing categorical variables. The function automatically finds categorical variables, calling internally is_categorical function.

Value

Named logical vector

Examples

glimpse(tiny_m5)
#> Rows: 535,640
#> Columns: 18
#> $ item_id      <chr> "HOBBIES_1_330", "HOBBIES_1_330", "HOBBIES_1_330", "HOBBI…
#> $ dept_id      <chr> "HOBBIES_1", "HOBBIES_1", "HOBBIES_1", "HOBBIES_1", "HOBB…
#> $ cat_id       <chr> "HOBBIES", "HOBBIES", "HOBBIES", "HOBBIES", "HOBBIES", "H…
#> $ store_id     <chr> "CA_1", "CA_1", "CA_1", "CA_1", "CA_1", "CA_1", "CA_1", "…
#> $ state_id     <chr> "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA…
#> $ value        <int> 0, 0, 0, 0, 2, 4, 3, 5, 4, 2, 0, 0, 4, 2, 5, 6, 1, 0, 3, …
#> $ date         <date> 2011-01-29, 2011-01-30, 2011-01-31, 2011-02-01, 2011-02-…
#> $ wm_yr_wk     <int> 11101, 11101, 11101, 11101, 11101, 11101, 11101, 11102, 1…
#> $ weekday      <chr> "Saturday", "Sunday", "Monday", "Tuesday", "Wednesday", "…
#> $ wday         <int> 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, …
#> $ month        <int> 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
#> $ year         <int> 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 201…
#> $ event_name_1 <chr> "", "", "", "", "", "", "", "", "SuperBowl", "", "", "", …
#> $ event_type_1 <chr> "", "", "", "", "", "", "", "", "Sporting", "", "", "", "…
#> $ event_name_2 <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
#> $ event_type_2 <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "…
#> $ snap         <int> 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, …
#> $ sell_price   <dbl> 7.44, 7.44, 7.44, 7.44, 7.44, 7.44, 7.44, 7.44, 7.44, 7.4…
dict_size(tiny_m5)
#>      item_id      dept_id       cat_id     store_id     state_id        value 
#>           28            7            3           10            3          223 
#>     wm_yr_wk      weekday         wday        month         year event_name_1 
#>          274            7            7           12            6           31 
#> event_type_1 event_name_2 event_type_2         snap 
#>            5            5            3            2 

# We can choose only the features we want - otherwise it automatically
# selects logical, factor, character or integer vectors

tiny_m5 %>%
  select(store_id, event_name_1) %>%
  dict_size()
#>     store_id event_name_1 
#>           10           31