Wrap base::cut.default to make it convenient to use different binning methods (see get_breaks), and custom functions for labels and number formats
discretize(
x,
n = 5,
method = break_methods(),
labels_formatter = label_breaks_value,
number_formatter = scales::number_format(big.mark = ","),
ordered_result = TRUE
)
a numeric vector
integer indicating the number of intervals/bins desired
a character of length 1, indicating the method to use. Possible values are c("jenks", "isowidth", "quantile")
a function to produce the labels to pass to cut
a function to format numbers, before building labels
logical: should the result be an ordered factor?
a vector as returned by cut, which is a factor, unless labels = FALSE which results in an integer vector of level codes.
set.seed(1234567)
# quick example
set.seed(1234567)
discretize(runif(100))
#> [1] 0.63 0.83 1.00 0.23 0.83 0.63 0.23 1.00 0.63 1.00 0.63 0.43 0.23 0.63 0.83
#> [16] 0.63 0.23 1.00 0.63 0.83 1.00 0.43 1.00 0.43 0.43 0.43 0.43 0.23 0.43 0.83
#> [31] 0.83 1.00 0.23 1.00 0.63 0.23 0.63 0.83 0.63 1.00 0.23 0.43 0.83 0.83 0.23
#> [46] 1.00 0.83 0.23 0.43 0.23 0.63 1.00 0.83 0.43 0.63 0.23 0.23 0.63 0.83 0.43
#> [61] 0.63 1.00 0.43 0.63 0.43 0.83 0.83 1.00 0.43 0.63 0.23 0.43 0.23 0.63 0.43
#> [76] 0.43 0.43 0.63 0.43 0.63 0.43 0.23 0.63 0.83 0.43 0.63 1.00 0.43 0.43 0.23
#> [91] 0.63 0.83 0.23 0.23 0.63 0.83 1.00 0.63 0.83 0.63
#> Levels: 0.23 < 0.43 < 0.63 < 0.83 < 1.00
# customize binning method
set.seed(1234567)
discretize(runif(100), method = "quantile")
#> [1] 0.57 0.80 1.00 0.26 0.80 0.57 0.26 1.00 0.57 1.00 0.80 0.26 0.26 0.57 1.00
#> [16] 0.57 0.26 1.00 0.57 0.80 1.00 0.42 1.00 0.57 0.42 0.57 0.42 0.26 0.42 1.00
#> [31] 0.80 1.00 0.26 1.00 0.57 0.26 0.57 0.80 0.57 1.00 0.26 0.42 0.80 1.00 0.26
#> [46] 1.00 0.80 0.26 0.42 0.26 0.57 1.00 0.80 0.42 0.80 0.26 0.26 0.57 0.80 0.42
#> [61] 0.80 1.00 0.42 0.57 0.42 0.80 0.80 1.00 0.42 0.57 0.26 0.42 0.26 0.80 0.42
#> [76] 0.42 0.42 0.80 0.42 0.57 0.42 0.26 0.80 1.00 0.42 0.80 1.00 0.42 0.42 0.26
#> [91] 0.57 0.80 0.26 0.26 0.57 0.80 1.00 0.57 1.00 0.57
#> Levels: 0.26 < 0.42 < 0.57 < 0.80 < 1.00
# customize number format
set.seed(1234567)
discretize(runif(100), number_formatter = scales::percent)
#> [1] 63% 83% 100% 23% 83% 63% 23% 100% 63% 100% 63% 43% 23% 63% 83%
#> [16] 63% 23% 100% 63% 83% 100% 43% 100% 43% 43% 43% 43% 23% 43% 83%
#> [31] 83% 100% 23% 100% 63% 23% 63% 83% 63% 100% 23% 43% 83% 83% 23%
#> [46] 100% 83% 23% 43% 23% 63% 100% 83% 43% 63% 23% 23% 63% 83% 43%
#> [61] 63% 100% 43% 63% 43% 83% 83% 100% 43% 63% 23% 43% 23% 63% 43%
#> [76] 43% 43% 63% 43% 63% 43% 23% 63% 83% 43% 63% 100% 43% 43% 23%
#> [91] 63% 83% 23% 23% 63% 83% 100% 63% 83% 63%
#> Levels: 23% < 43% < 63% < 83% < 100%
# customize labels
set.seed(1234567)
discretize(runif(100), labels_formatter = label_breaks_interval)
#> [1] 0.43-0.63 0.63-0.83 0.83-1.00 0.01-0.23 0.63-0.83 0.43-0.63 0.01-0.23
#> [8] 0.83-1.00 0.43-0.63 0.83-1.00 0.43-0.63 0.23-0.43 0.01-0.23 0.43-0.63
#> [15] 0.63-0.83 0.43-0.63 0.01-0.23 0.83-1.00 0.43-0.63 0.63-0.83 0.83-1.00
#> [22] 0.23-0.43 0.83-1.00 0.23-0.43 0.23-0.43 0.23-0.43 0.23-0.43 0.01-0.23
#> [29] 0.23-0.43 0.63-0.83 0.63-0.83 0.83-1.00 0.01-0.23 0.83-1.00 0.43-0.63
#> [36] 0.01-0.23 0.43-0.63 0.63-0.83 0.43-0.63 0.83-1.00 0.01-0.23 0.23-0.43
#> [43] 0.63-0.83 0.63-0.83 0.01-0.23 0.83-1.00 0.63-0.83 0.01-0.23 0.23-0.43
#> [50] 0.01-0.23 0.43-0.63 0.83-1.00 0.63-0.83 0.23-0.43 0.43-0.63 0.01-0.23
#> [57] 0.01-0.23 0.43-0.63 0.63-0.83 0.23-0.43 0.43-0.63 0.83-1.00 0.23-0.43
#> [64] 0.43-0.63 0.23-0.43 0.63-0.83 0.63-0.83 0.83-1.00 0.23-0.43 0.43-0.63
#> [71] 0.01-0.23 0.23-0.43 0.01-0.23 0.43-0.63 0.23-0.43 0.23-0.43 0.23-0.43
#> [78] 0.43-0.63 0.23-0.43 0.43-0.63 0.23-0.43 0.01-0.23 0.43-0.63 0.63-0.83
#> [85] 0.23-0.43 0.43-0.63 0.83-1.00 0.23-0.43 0.23-0.43 0.01-0.23 0.43-0.63
#> [92] 0.63-0.83 0.01-0.23 0.01-0.23 0.43-0.63 0.63-0.83 0.83-1.00 0.43-0.63
#> [99] 0.63-0.83 0.43-0.63
#> Levels: 0.01-0.23 < 0.23-0.43 < 0.43-0.63 < 0.63-0.83 < 0.83-1.00
set.seed(1234567)
discretize(runif(100), labels_formatter = label_breaks_cut)
#> [1] (0.43, 0.63] (0.63, 0.83] (0.83, 1.00] [0.01, 0.23] (0.63, 0.83]
#> [6] (0.43, 0.63] [0.01, 0.23] (0.83, 1.00] (0.43, 0.63] (0.83, 1.00]
#> [11] (0.43, 0.63] (0.23, 0.43] [0.01, 0.23] (0.43, 0.63] (0.63, 0.83]
#> [16] (0.43, 0.63] [0.01, 0.23] (0.83, 1.00] (0.43, 0.63] (0.63, 0.83]
#> [21] (0.83, 1.00] (0.23, 0.43] (0.83, 1.00] (0.23, 0.43] (0.23, 0.43]
#> [26] (0.23, 0.43] (0.23, 0.43] [0.01, 0.23] (0.23, 0.43] (0.63, 0.83]
#> [31] (0.63, 0.83] (0.83, 1.00] [0.01, 0.23] (0.83, 1.00] (0.43, 0.63]
#> [36] [0.01, 0.23] (0.43, 0.63] (0.63, 0.83] (0.43, 0.63] (0.83, 1.00]
#> [41] [0.01, 0.23] (0.23, 0.43] (0.63, 0.83] (0.63, 0.83] [0.01, 0.23]
#> [46] (0.83, 1.00] (0.63, 0.83] [0.01, 0.23] (0.23, 0.43] [0.01, 0.23]
#> [51] (0.43, 0.63] (0.83, 1.00] (0.63, 0.83] (0.23, 0.43] (0.43, 0.63]
#> [56] [0.01, 0.23] [0.01, 0.23] (0.43, 0.63] (0.63, 0.83] (0.23, 0.43]
#> [61] (0.43, 0.63] (0.83, 1.00] (0.23, 0.43] (0.43, 0.63] (0.23, 0.43]
#> [66] (0.63, 0.83] (0.63, 0.83] (0.83, 1.00] (0.23, 0.43] (0.43, 0.63]
#> [71] [0.01, 0.23] (0.23, 0.43] [0.01, 0.23] (0.43, 0.63] (0.23, 0.43]
#> [76] (0.23, 0.43] (0.23, 0.43] (0.43, 0.63] (0.23, 0.43] (0.43, 0.63]
#> [81] (0.23, 0.43] [0.01, 0.23] (0.43, 0.63] (0.63, 0.83] (0.23, 0.43]
#> [86] (0.43, 0.63] (0.83, 1.00] (0.23, 0.43] (0.23, 0.43] [0.01, 0.23]
#> [91] (0.43, 0.63] (0.63, 0.83] [0.01, 0.23] [0.01, 0.23] (0.43, 0.63]
#> [96] (0.63, 0.83] (0.83, 1.00] (0.43, 0.63] (0.63, 0.83] (0.43, 0.63]
#> 5 Levels: [0.01, 0.23] < (0.23, 0.43] < (0.43, 0.63] < ... < (0.83, 1.00]