Create a multi-assay experiment from MetaScope output for usage with animalcules

Upon completion of the MetaScope pipeline, users can analyze and visualize abundances in their samples using the animalcules package. This function allows interoperability of metascope_id output with both animalcules and QIIME. After running this function, the user should save the returned MAE to an RDS file using a function like saveRDS to upload the output into the animalcules package.

Usage

convert_animalcules(
  meta_counts,
  annot_path,
  which_annot_col,
  end_string = ".metascope_id.csv",
  qiime_biom_out = FALSE,
  path_to_write = ".",
  accession_path = NULL
)

Arguments

meta_counts: A vector of filepaths to the counts ID CSVs output by metascope_id().
annot_path: The filepath to the CSV annotation file for the samples. This CSV metadata/annotation file should contain at least two columns, one with names of all samples WITHOUT the extension listed in end_string, e.g. for output file "sample_x76.metascope_id.csv", the column specified in which_annot_col should contain the entry "sample_x76". Sample names containing characters "_", "-", and "." are fine, however sample names beginning with numbers should be renamed to have a prefix, e.g. "777897sample" should be renamed to "X777897sample" for both the output file name and the annotation name.
which_annot_col: The name of the column of the annotation file containing the sample IDs. These should be the same as the meta_counts root filenames.
end_string: The end string used at the end of the metascope_id files. Default is ".metascope_id.csv".
qiime_biom_out: Would you also like a qiime-compatible biom file output? If yes, two files will be saved: one is a biom file of the counts table, and the other is a specifically formatted mapping file of metadata information. Default is FALSE.
path_to_write: If qiime_biom_out = TRUE, where should output QIIME files be written? Should be a character string of the folder path. Default is '.', i.e. the current working directory.
accession_path: (character) Path to taxonomizr accessions. See taxonomizr::prepareDatabase().

Value

Returns a MultiAssay Experiment file of combined sample counts data and/or biom file and mapping file for analysis with QIIME. The MultiAssay Experiment will have a counts assay ("MGX").

Examples

tempfolder <- tempfile()
dir.create(tempfolder)

# Create three different samples
samp_names <- c("X123", "X456", "X789")
all_files <- file.path(tempfolder,
                       paste0(samp_names, ".csv"))

create_IDcsv <- function (out_file) {
  final_taxids <- c("273036", "418127", "11234")
  final_genomes <- c(
    "Staphylococcus aureus RF122, complete sequence",
    "Staphylococcus aureus subsp. aureus Mu3, complete sequence",
    "Measles virus, complete genome")
  best_hit <- sample(seq(100, 1050), 3)
  proportion <- best_hit/sum(best_hit) |> round(2)
  EMreads <- best_hit + round(runif(3), 1)
  EMprop <- proportion + 0.003
  dplyr::tibble(TaxonomyID = final_taxids,
                Genome = final_genomes,
                read_count = best_hit, Proportion = proportion,
                EMreads = EMreads, EMProportion = EMprop) |>
    dplyr::arrange(dplyr::desc(.data$read_count)) |>
    utils::write.csv(file = out_file, row.names = FALSE)
  message("Done!")
  return(out_file)
}
out_files <- vapply(all_files, create_IDcsv, FUN.VALUE = character(1))
#> Done!
#> Done!
#> Done!

# Create annotation data for samples
annot_dat <- file.path(tempfolder, "annot.csv")
dplyr::tibble(Sample = samp_names, RSV = c("pos", "neg", "pos"),
              month = c("March", "July", "Aug"),
              yrsold = c(0.5, 0.6, 0.2)) |>
  utils::write.csv(file = annot_dat,
                   row.names = FALSE)

# Create temporary taxonomizr accession
tmp_accession <- system.file("extdata", "example_accessions.sql", package = "MetaScope")

# Convert samples to MAE
outMAE <- convert_animalcules(meta_counts = out_files,
                              annot_path = annot_dat,
                              which_annot_col = "Sample",
                              end_string = ".metascope_id.csv",
                              qiime_biom_out = FALSE,
                              accession_path = tmp_accession)
#> Looking up taxon UIDs in NCBI database

unlink(tempfolder, recursive = TRUE)