Wrap base::cut.default to make it convenient to use different binning methods (see get_breaks), and custom functions for labels and number formats

discretize(
  x,
  n = 5,
  method = break_methods(),
  labels_formatter = label_breaks_value,
  number_formatter = scales::number_format(big.mark = ","),
  ordered_result = TRUE
)

Arguments

x

a numeric vector

n

integer indicating the number of intervals/bins desired

method

a character of length 1, indicating the method to use. Possible values are c("jenks", "isowidth", "quantile")

labels_formatter

a function to produce the labels to pass to cut

number_formatter

a function to format numbers, before building labels

ordered_result

logical: should the result be an ordered factor?

Value

a vector as returned by cut, which is a factor, unless labels = FALSE which results in an integer vector of level codes.

Examples

set.seed(1234567)

# quick example
set.seed(1234567)
discretize(runif(100))
#>   [1] 0.63 0.83 1.00 0.23 0.83 0.63 0.23 1.00 0.63 1.00 0.63 0.43 0.23 0.63 0.83
#>  [16] 0.63 0.23 1.00 0.63 0.83 1.00 0.43 1.00 0.43 0.43 0.43 0.43 0.23 0.43 0.83
#>  [31] 0.83 1.00 0.23 1.00 0.63 0.23 0.63 0.83 0.63 1.00 0.23 0.43 0.83 0.83 0.23
#>  [46] 1.00 0.83 0.23 0.43 0.23 0.63 1.00 0.83 0.43 0.63 0.23 0.23 0.63 0.83 0.43
#>  [61] 0.63 1.00 0.43 0.63 0.43 0.83 0.83 1.00 0.43 0.63 0.23 0.43 0.23 0.63 0.43
#>  [76] 0.43 0.43 0.63 0.43 0.63 0.43 0.23 0.63 0.83 0.43 0.63 1.00 0.43 0.43 0.23
#>  [91] 0.63 0.83 0.23 0.23 0.63 0.83 1.00 0.63 0.83 0.63
#> Levels: 0.23 < 0.43 < 0.63 < 0.83 < 1.00

# customize binning method
set.seed(1234567)
discretize(runif(100), method = "quantile")
#>   [1] 0.57 0.80 1.00 0.26 0.80 0.57 0.26 1.00 0.57 1.00 0.80 0.26 0.26 0.57 1.00
#>  [16] 0.57 0.26 1.00 0.57 0.80 1.00 0.42 1.00 0.57 0.42 0.57 0.42 0.26 0.42 1.00
#>  [31] 0.80 1.00 0.26 1.00 0.57 0.26 0.57 0.80 0.57 1.00 0.26 0.42 0.80 1.00 0.26
#>  [46] 1.00 0.80 0.26 0.42 0.26 0.57 1.00 0.80 0.42 0.80 0.26 0.26 0.57 0.80 0.42
#>  [61] 0.80 1.00 0.42 0.57 0.42 0.80 0.80 1.00 0.42 0.57 0.26 0.42 0.26 0.80 0.42
#>  [76] 0.42 0.42 0.80 0.42 0.57 0.42 0.26 0.80 1.00 0.42 0.80 1.00 0.42 0.42 0.26
#>  [91] 0.57 0.80 0.26 0.26 0.57 0.80 1.00 0.57 1.00 0.57
#> Levels: 0.26 < 0.42 < 0.57 < 0.80 < 1.00

# customize number format
set.seed(1234567)
discretize(runif(100), number_formatter = scales::percent)
#>   [1] 63%  83%  100% 23%  83%  63%  23%  100% 63%  100% 63%  43%  23%  63%  83% 
#>  [16] 63%  23%  100% 63%  83%  100% 43%  100% 43%  43%  43%  43%  23%  43%  83% 
#>  [31] 83%  100% 23%  100% 63%  23%  63%  83%  63%  100% 23%  43%  83%  83%  23% 
#>  [46] 100% 83%  23%  43%  23%  63%  100% 83%  43%  63%  23%  23%  63%  83%  43% 
#>  [61] 63%  100% 43%  63%  43%  83%  83%  100% 43%  63%  23%  43%  23%  63%  43% 
#>  [76] 43%  43%  63%  43%  63%  43%  23%  63%  83%  43%  63%  100% 43%  43%  23% 
#>  [91] 63%  83%  23%  23%  63%  83%  100% 63%  83%  63% 
#> Levels: 23% < 43% < 63% < 83% < 100%

# customize labels
set.seed(1234567)
discretize(runif(100), labels_formatter = label_breaks_interval)
#>   [1] 0.43-0.63 0.63-0.83 0.83-1.00 0.01-0.23 0.63-0.83 0.43-0.63 0.01-0.23
#>   [8] 0.83-1.00 0.43-0.63 0.83-1.00 0.43-0.63 0.23-0.43 0.01-0.23 0.43-0.63
#>  [15] 0.63-0.83 0.43-0.63 0.01-0.23 0.83-1.00 0.43-0.63 0.63-0.83 0.83-1.00
#>  [22] 0.23-0.43 0.83-1.00 0.23-0.43 0.23-0.43 0.23-0.43 0.23-0.43 0.01-0.23
#>  [29] 0.23-0.43 0.63-0.83 0.63-0.83 0.83-1.00 0.01-0.23 0.83-1.00 0.43-0.63
#>  [36] 0.01-0.23 0.43-0.63 0.63-0.83 0.43-0.63 0.83-1.00 0.01-0.23 0.23-0.43
#>  [43] 0.63-0.83 0.63-0.83 0.01-0.23 0.83-1.00 0.63-0.83 0.01-0.23 0.23-0.43
#>  [50] 0.01-0.23 0.43-0.63 0.83-1.00 0.63-0.83 0.23-0.43 0.43-0.63 0.01-0.23
#>  [57] 0.01-0.23 0.43-0.63 0.63-0.83 0.23-0.43 0.43-0.63 0.83-1.00 0.23-0.43
#>  [64] 0.43-0.63 0.23-0.43 0.63-0.83 0.63-0.83 0.83-1.00 0.23-0.43 0.43-0.63
#>  [71] 0.01-0.23 0.23-0.43 0.01-0.23 0.43-0.63 0.23-0.43 0.23-0.43 0.23-0.43
#>  [78] 0.43-0.63 0.23-0.43 0.43-0.63 0.23-0.43 0.01-0.23 0.43-0.63 0.63-0.83
#>  [85] 0.23-0.43 0.43-0.63 0.83-1.00 0.23-0.43 0.23-0.43 0.01-0.23 0.43-0.63
#>  [92] 0.63-0.83 0.01-0.23 0.01-0.23 0.43-0.63 0.63-0.83 0.83-1.00 0.43-0.63
#>  [99] 0.63-0.83 0.43-0.63
#> Levels: 0.01-0.23 < 0.23-0.43 < 0.43-0.63 < 0.63-0.83 < 0.83-1.00

set.seed(1234567)
discretize(runif(100), labels_formatter = label_breaks_cut)
#>   [1] (0.43, 0.63] (0.63, 0.83] (0.83, 1.00] [0.01, 0.23] (0.63, 0.83]
#>   [6] (0.43, 0.63] [0.01, 0.23] (0.83, 1.00] (0.43, 0.63] (0.83, 1.00]
#>  [11] (0.43, 0.63] (0.23, 0.43] [0.01, 0.23] (0.43, 0.63] (0.63, 0.83]
#>  [16] (0.43, 0.63] [0.01, 0.23] (0.83, 1.00] (0.43, 0.63] (0.63, 0.83]
#>  [21] (0.83, 1.00] (0.23, 0.43] (0.83, 1.00] (0.23, 0.43] (0.23, 0.43]
#>  [26] (0.23, 0.43] (0.23, 0.43] [0.01, 0.23] (0.23, 0.43] (0.63, 0.83]
#>  [31] (0.63, 0.83] (0.83, 1.00] [0.01, 0.23] (0.83, 1.00] (0.43, 0.63]
#>  [36] [0.01, 0.23] (0.43, 0.63] (0.63, 0.83] (0.43, 0.63] (0.83, 1.00]
#>  [41] [0.01, 0.23] (0.23, 0.43] (0.63, 0.83] (0.63, 0.83] [0.01, 0.23]
#>  [46] (0.83, 1.00] (0.63, 0.83] [0.01, 0.23] (0.23, 0.43] [0.01, 0.23]
#>  [51] (0.43, 0.63] (0.83, 1.00] (0.63, 0.83] (0.23, 0.43] (0.43, 0.63]
#>  [56] [0.01, 0.23] [0.01, 0.23] (0.43, 0.63] (0.63, 0.83] (0.23, 0.43]
#>  [61] (0.43, 0.63] (0.83, 1.00] (0.23, 0.43] (0.43, 0.63] (0.23, 0.43]
#>  [66] (0.63, 0.83] (0.63, 0.83] (0.83, 1.00] (0.23, 0.43] (0.43, 0.63]
#>  [71] [0.01, 0.23] (0.23, 0.43] [0.01, 0.23] (0.43, 0.63] (0.23, 0.43]
#>  [76] (0.23, 0.43] (0.23, 0.43] (0.43, 0.63] (0.23, 0.43] (0.43, 0.63]
#>  [81] (0.23, 0.43] [0.01, 0.23] (0.43, 0.63] (0.63, 0.83] (0.23, 0.43]
#>  [86] (0.43, 0.63] (0.83, 1.00] (0.23, 0.43] (0.23, 0.43] [0.01, 0.23]
#>  [91] (0.43, 0.63] (0.63, 0.83] [0.01, 0.23] [0.01, 0.23] (0.43, 0.63]
#>  [96] (0.63, 0.83] (0.83, 1.00] (0.43, 0.63] (0.63, 0.83] (0.43, 0.63]
#> 5 Levels: [0.01, 0.23] < (0.23, 0.43] < (0.43, 0.63] < ... < (0.83, 1.00]