diff --git a/DESCRIPTION b/DESCRIPTION index ebd2f98..f761884 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -77,4 +77,4 @@ VignetteBuilder: Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.1 +RoxygenNote: 7.2.3 diff --git a/NAMESPACE b/NAMESPACE index 682dd6f..ea1a1fb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,7 +14,8 @@ export(log_snakemake) export(parse_tax) export(paste_oxford_list) export(pool_taxon_counts) -export(read_dist) +export(read_lt_as_tbl) +export(read_lt_mat) export(read_tax) export(set_knitr_opts) export(theme_lucas) diff --git a/R/read_dist.R b/R/read_dist.R index c086531..e19357c 100644 --- a/R/read_dist.R +++ b/R/read_dist.R @@ -1,8 +1,15 @@ -#' Read in lower left triangular matrix from file +#' @name read_dist +#' @title Read lower triangular distance files as tibbles and matrices +NULL + +#' @describeIn read_dist Read in lower left triangular matrix as a long tibble #' -#' @param dist_filename filename of lower left triangular matrix (.dist) +#' Assumes the file is a phylip-formatted lower left triangular matrix +#' as described in \url{https://mothur.org/wiki/phylip-formatted_distance_matrix/} #' -#' @return distance matrix as a tibble +#' @param filename file name of a lower left triangular matrix (`.dist`) +#' +#' @return matrix as a `tibble` in long format #' @export #' @author Nick Lesniak, \email{nlesniak@@umich.edu} #' @@ -13,7 +20,7 @@ #' ) #' dist_tbl <- read_dist(dist_filepath) #' head(dist_tbl) -read_dist <- function(dist_filename) { +read_lt_as_tbl <- function(filename) { distances <- rows <- NULL # TODO: input validation - make sure file has expected format & throw errors if it doesn't # read in the first row to determine the matrix dimensions @@ -39,3 +46,44 @@ read_dist <- function(dist_filename) { dplyr::filter(!is.na(distances)) ) } + +#' @describeIn read_dist Read in lower left triangular matrix +#' +#' +#' @return matrix +#' @export +#' @author Pat Schloss, \email{pschloss@@umich.edu} +read_lt_mat <- function(filename){ + + file <- scan(file_name, + what=character(), + quiet=TRUE, + sep="\n") + + n_samples <- as.numeric(file[1]) + file <- file[-1] + + file_split <- strsplit(file, "\t") + + fill_in <- function(x, length){ + c(x, rep("0", length - length(x))) + } + + filled <- lapply(file_split, fill_in, length=n_samples + 1) + + samples_distance_matrix <- do.call(rbind, filled) + + samples <- samples_distance_matrix[,1] + + dist_matrix <- samples_distance_matrix[,-1] + dist_matrix <- matrix(as.numeric(dist_matrix), nrow=n_samples) + + if(sum(dist_matrix[upper.tri(dist_matrix)]) == 0){ + dist_matrix <- dist_matrix+t(dist_matrix) + } + + rownames(dist_matrix) <- samples + colnames(dist_matrix) <- samples + + return(dist_matrix) +} diff --git a/man/read_dist.Rd b/man/read_dist.Rd index 3f76f6e..3d38120 100644 --- a/man/read_dist.Rd +++ b/man/read_dist.Rd @@ -2,19 +2,35 @@ % Please edit documentation in R/read_dist.R \name{read_dist} \alias{read_dist} -\title{Read in lower left triangular matrix from file} +\alias{read_lt_as_tbl} +\alias{read_lt_mat} +\title{Read lower triangular distance files as tibbles and matrices} \usage{ -read_dist(dist_filename) +read_lt_as_tbl(filename) + +read_lt_mat(filename) } \arguments{ -\item{dist_filename}{filename of lower left triangular matrix (.dist)} +\item{filename}{file name of a lower left triangular matrix (\code{.dist})} } \value{ -distance matrix as a tibble +matrix as a \code{tibble} in long format + +matrix } \description{ -Read in lower left triangular matrix from file +Read lower triangular distance files as tibbles and matrices } +\section{Functions}{ +\itemize{ +\item \code{read_lt_as_tbl()}: Read in lower left triangular matrix as a long tibble + +Assumes the file is a phylip-formatted lower left triangular matrix +as described in \url{https://mothur.org/wiki/phylip-formatted_distance_matrix/} + +\item \code{read_lt_mat()}: Read in lower left triangular matrix + +}} \examples{ dist_filepath <- system.file("extdata", "sample.final.thetayc.0.03.lt.ave.dist", @@ -25,4 +41,6 @@ head(dist_tbl) } \author{ Nick Lesniak, \email{nlesniak@umich.edu} + +Pat Schloss, \email{pschloss@umich.edu} } diff --git a/tests/testthat/test-read_dist.R b/tests/testthat/test-read_dist.R index efe7728..05c9569 100644 --- a/tests/testthat/test-read_dist.R +++ b/tests/testthat/test-read_dist.R @@ -20,7 +20,7 @@ test_that("read_dist works on example file", { row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame") ) - dist_out <- read_dist(system.file("extdata", + dist_out <- read_dist_lt_as_tbl(system.file("extdata", "sample.final.thetayc.0.03.lt.ave.dist", package = "schtools" ))