Categorize continuous variables

filter_categorize(
  sam_table,
  sample_condition,
  new_label,
  nbins = NULL,
  bin_breaks = c(),
  bin_labels = c()
)

Arguments

sam_table

A sample x condition dataframe

sample_condition

Continuous variable to categorize

new_label

Column name for categorized variable

nbins

Auto select ranges for n bins/categories

bin_breaks

Manually select ranges for bins/categories

bin_labels

Manually label bins/categories

Value

A list with an updated sample table and before/after plots

Examples

library(SummarizedExperiment)
#> Loading required package: MatrixGenerics
#> Loading required package: matrixStats
#> 
#> Attaching package: ‘MatrixGenerics’
#> The following objects are masked from ‘package:matrixStats’:
#> 
#>     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
#>     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
#>     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
#>     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
#>     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
#>     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
#>     colWeightedMeans, colWeightedMedians, colWeightedSds,
#>     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
#>     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
#>     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
#>     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
#>     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
#>     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
#>     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
#>     rowWeightedSds, rowWeightedVars
#> Loading required package: GenomicRanges
#> Loading required package: stats4
#> Loading required package: BiocGenerics
#> 
#> Attaching package: ‘BiocGenerics’
#> The following objects are masked from ‘package:stats’:
#> 
#>     IQR, mad, sd, var, xtabs
#> The following objects are masked from ‘package:base’:
#> 
#>     Filter, Find, Map, Position, Reduce, anyDuplicated, aperm, append,
#>     as.data.frame, basename, cbind, colnames, dirname, do.call,
#>     duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
#>     lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
#>     pmin.int, rank, rbind, rownames, sapply, setdiff, sort, table,
#>     tapply, union, unique, unsplit, which.max, which.min
#> Loading required package: S4Vectors
#> 
#> Attaching package: ‘S4Vectors’
#> The following object is masked from ‘package:utils’:
#> 
#>     findMatches
#> The following objects are masked from ‘package:base’:
#> 
#>     I, expand.grid, unname
#> Loading required package: IRanges
#> Loading required package: GenomeInfoDb
#> Loading required package: Biobase
#> Welcome to Bioconductor
#> 
#>     Vignettes contain introductory material; view with
#>     'browseVignettes()'. To cite Bioconductor, see
#>     'citation("Biobase")', and for packages 'citation("pkgname")'.
#> 
#> Attaching package: ‘Biobase’
#> The following object is masked from ‘package:MatrixGenerics’:
#> 
#>     rowMedians
#> The following objects are masked from ‘package:matrixStats’:
#> 
#>     anyMissing, rowMedians
data_dir <- system.file("extdata/MAE.rds", package = "animalcules")
toy_data <- readRDS(data_dir)
microbe <- MultiAssayExperiment::experiments(toy_data)[[1]]
samples <- as.data.frame(colData(microbe))
result <- filter_categorize(samples,
  sample_condition = "AGE",
  new_label = "AGE_GROUP",
  bin_breaks = c(0, 55, 75, 100),
  bin_labels = c("Young", "Adult", "Elderly")
)
result$sam_table
#>            AGE    SEX  DISEASE GROUP AGE_GROUP
#> subject_1   34 Female positive     A     Young
#> subject_2   61   Male positive     A     Adult
#> subject_3   62   Male positive     A     Adult
#> subject_4   95 Female positive     B   Elderly
#> subject_5   30 Female positive     A     Young
#> subject_6   80 Female positive     B   Elderly
#> subject_7   59   Male positive     B     Adult
#> subject_8   60   Male positive     C     Adult
#> subject_9   55   Male positive     B     Young
#> subject_10  60   Male positive     B     Adult
#> subject_11  71 Female negative     C     Adult
#> subject_12  91   Male positive     A   Elderly
#> subject_13   8 Female positive     B     Young
#> subject_14  60   Male negative     A     Adult
#> subject_15   1 Female negative     B     Young
#> subject_16  40 Female positive     A     Young
#> subject_17  48   Male negative     B     Young
#> subject_18  21   Male negative     A     Young
#> subject_19  66   Male positive     B     Adult
#> subject_20  20 Female negative     B     Young
#> subject_21   6 Female negative     A     Young
#> subject_22  19   Male negative     A     Young
#> subject_23  75   Male negative     C     Adult
#> subject_24  99   Male negative     C   Elderly
#> subject_25  30 Female negative     C     Young
#> subject_26  77 Female negative     B   Elderly
#> subject_27  36 Female negative     B     Young
#> subject_28  63 Female negative     A     Adult
#> subject_29  91   Male negative     A   Elderly
#> subject_30  62 Female positive     B     Adult
#> subject_31  24 Female positive     B     Young
#> subject_32  84   Male positive     B   Elderly
#> subject_33  77   Male positive     B   Elderly
#> subject_34  13 Female positive     A     Young
#> subject_35  60   Male negative     A     Adult
#> subject_36  66   Male positive     B     Adult
#> subject_37  89   Male negative     C   Elderly
#> subject_38  98   Male positive     A   Elderly
#> subject_39  37   Male positive     A     Young
#> subject_40  48   Male positive     A     Young
#> subject_41  35   Male positive     B     Young
#> subject_42  23   Male positive     C     Young
#> subject_43  56   Male negative     B     Adult
#> subject_44  78   Male negative     A   Elderly
#> subject_45  29 Female positive     C     Young
#> subject_46  53   Male negative     A     Young
#> subject_47  78   Male positive     B   Elderly
#> subject_48  35 Female positive     A     Young
#> subject_49  92 Female negative     C   Elderly
#> subject_50  36 Female negative     C     Young
result$plot.unbinned
result$plot.binned