Categorize continuous variables
A sample x condition dataframe
Continuous variable to categorize
Column name for categorized variable
Auto select ranges for n bins/categories
Manually select ranges for bins/categories
Manually label bins/categories
A list with an updated sample table and before/after plots
library(SummarizedExperiment)
#> Loading required package: MatrixGenerics
#> Loading required package: matrixStats
#>
#> Attaching package: ‘MatrixGenerics’
#> The following objects are masked from ‘package:matrixStats’:
#>
#> colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
#> colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
#> colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
#> colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
#> colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
#> colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
#> colWeightedMeans, colWeightedMedians, colWeightedSds,
#> colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
#> rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
#> rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
#> rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
#> rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
#> rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
#> rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
#> rowWeightedSds, rowWeightedVars
#> Loading required package: GenomicRanges
#> Loading required package: stats4
#> Loading required package: BiocGenerics
#>
#> Attaching package: ‘BiocGenerics’
#> The following objects are masked from ‘package:stats’:
#>
#> IQR, mad, sd, var, xtabs
#> The following objects are masked from ‘package:base’:
#>
#> Filter, Find, Map, Position, Reduce, anyDuplicated, aperm, append,
#> as.data.frame, basename, cbind, colnames, dirname, do.call,
#> duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
#> lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
#> pmin.int, rank, rbind, rownames, sapply, setdiff, sort, table,
#> tapply, union, unique, unsplit, which.max, which.min
#> Loading required package: S4Vectors
#>
#> Attaching package: ‘S4Vectors’
#> The following object is masked from ‘package:utils’:
#>
#> findMatches
#> The following objects are masked from ‘package:base’:
#>
#> I, expand.grid, unname
#> Loading required package: IRanges
#> Loading required package: GenomeInfoDb
#> Loading required package: Biobase
#> Welcome to Bioconductor
#>
#> Vignettes contain introductory material; view with
#> 'browseVignettes()'. To cite Bioconductor, see
#> 'citation("Biobase")', and for packages 'citation("pkgname")'.
#>
#> Attaching package: ‘Biobase’
#> The following object is masked from ‘package:MatrixGenerics’:
#>
#> rowMedians
#> The following objects are masked from ‘package:matrixStats’:
#>
#> anyMissing, rowMedians
data_dir <- system.file("extdata/MAE.rds", package = "animalcules")
toy_data <- readRDS(data_dir)
microbe <- MultiAssayExperiment::experiments(toy_data)[[1]]
samples <- as.data.frame(colData(microbe))
result <- filter_categorize(samples,
sample_condition = "AGE",
new_label = "AGE_GROUP",
bin_breaks = c(0, 55, 75, 100),
bin_labels = c("Young", "Adult", "Elderly")
)
result$sam_table
#> AGE SEX DISEASE GROUP AGE_GROUP
#> subject_1 34 Female positive A Young
#> subject_2 61 Male positive A Adult
#> subject_3 62 Male positive A Adult
#> subject_4 95 Female positive B Elderly
#> subject_5 30 Female positive A Young
#> subject_6 80 Female positive B Elderly
#> subject_7 59 Male positive B Adult
#> subject_8 60 Male positive C Adult
#> subject_9 55 Male positive B Young
#> subject_10 60 Male positive B Adult
#> subject_11 71 Female negative C Adult
#> subject_12 91 Male positive A Elderly
#> subject_13 8 Female positive B Young
#> subject_14 60 Male negative A Adult
#> subject_15 1 Female negative B Young
#> subject_16 40 Female positive A Young
#> subject_17 48 Male negative B Young
#> subject_18 21 Male negative A Young
#> subject_19 66 Male positive B Adult
#> subject_20 20 Female negative B Young
#> subject_21 6 Female negative A Young
#> subject_22 19 Male negative A Young
#> subject_23 75 Male negative C Adult
#> subject_24 99 Male negative C Elderly
#> subject_25 30 Female negative C Young
#> subject_26 77 Female negative B Elderly
#> subject_27 36 Female negative B Young
#> subject_28 63 Female negative A Adult
#> subject_29 91 Male negative A Elderly
#> subject_30 62 Female positive B Adult
#> subject_31 24 Female positive B Young
#> subject_32 84 Male positive B Elderly
#> subject_33 77 Male positive B Elderly
#> subject_34 13 Female positive A Young
#> subject_35 60 Male negative A Adult
#> subject_36 66 Male positive B Adult
#> subject_37 89 Male negative C Elderly
#> subject_38 98 Male positive A Elderly
#> subject_39 37 Male positive A Young
#> subject_40 48 Male positive A Young
#> subject_41 35 Male positive B Young
#> subject_42 23 Male positive C Young
#> subject_43 56 Male negative B Adult
#> subject_44 78 Male negative A Elderly
#> subject_45 29 Female positive C Young
#> subject_46 53 Male negative A Young
#> subject_47 78 Male positive B Elderly
#> subject_48 35 Female positive A Young
#> subject_49 92 Female negative C Elderly
#> subject_50 36 Female negative C Young
result$plot.unbinned
result$plot.binned