From 33b23840f91fad127a448a0da3c9847d0313c4aa Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 31 Mar 2023 00:05:25 -0400 Subject: [PATCH 1/6] Rename read_dist() to read_dist_lt_as_tbl() --- R/read_dist.R | 2 +- tests/testthat/test-read_dist.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/read_dist.R b/R/read_dist.R index c086531..01ab568 100644 --- a/R/read_dist.R +++ b/R/read_dist.R @@ -13,7 +13,7 @@ #' ) #' dist_tbl <- read_dist(dist_filepath) #' head(dist_tbl) -read_dist <- function(dist_filename) { +read_dist_lt_as_tbl <- function(dist_filename) { distances <- rows <- NULL # TODO: input validation - make sure file has expected format & throw errors if it doesn't # read in the first row to determine the matrix dimensions diff --git a/tests/testthat/test-read_dist.R b/tests/testthat/test-read_dist.R index efe7728..05c9569 100644 --- a/tests/testthat/test-read_dist.R +++ b/tests/testthat/test-read_dist.R @@ -20,7 +20,7 @@ test_that("read_dist works on example file", { row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame") ) - dist_out <- read_dist(system.file("extdata", + dist_out <- read_dist_lt_as_tbl(system.file("extdata", "sample.final.thetayc.0.03.lt.ave.dist", package = "schtools" )) From 4cb1d7c5d9bb2c489dd07cf1ff62ec35bb5c2036 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 31 Mar 2023 00:08:26 -0400 Subject: [PATCH 2/6] Improve doc, add url --- R/read_dist.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/read_dist.R b/R/read_dist.R index 01ab568..c5dd232 100644 --- a/R/read_dist.R +++ b/R/read_dist.R @@ -1,8 +1,11 @@ #' Read in lower left triangular matrix from file #' -#' @param dist_filename filename of lower left triangular matrix (.dist) +#' Assumes the distance file is a phylip-formatted lower left triangular matrix +#' as described in \url{https://mothur.org/wiki/phylip-formatted_distance_matrix/} #' -#' @return distance matrix as a tibble +#' @param dist_filename file name of a lower left triangular matrix (`.dist`) +#' +#' @return distance matrix as a `tibble` in long format #' @export #' @author Nick Lesniak, \email{nlesniak@@umich.edu} #' From 532dd5055942fde4b778b994eed7282c005aadea Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 31 Mar 2023 00:09:11 -0400 Subject: [PATCH 3/6] Improve arg --- R/read_dist.R | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/R/read_dist.R b/R/read_dist.R index c5dd232..73c9268 100644 --- a/R/read_dist.R +++ b/R/read_dist.R @@ -3,7 +3,7 @@ #' Assumes the distance file is a phylip-formatted lower left triangular matrix #' as described in \url{https://mothur.org/wiki/phylip-formatted_distance_matrix/} #' -#' @param dist_filename file name of a lower left triangular matrix (`.dist`) +#' @param filename file name of a lower left triangular matrix (`.dist`) #' #' @return distance matrix as a `tibble` in long format #' @export @@ -16,7 +16,7 @@ #' ) #' dist_tbl <- read_dist(dist_filepath) #' head(dist_tbl) -read_dist_lt_as_tbl <- function(dist_filename) { +read_dist_lt_as_tbl <- function(filename) { distances <- rows <- NULL # TODO: input validation - make sure file has expected format & throw errors if it doesn't # read in the first row to determine the matrix dimensions @@ -42,3 +42,38 @@ read_dist_lt_as_tbl <- function(dist_filename) { dplyr::filter(!is.na(distances)) ) } + +read_matrix <- function(file_name){ + + file <- scan(file_name, + what=character(), + quiet=TRUE, + sep="\n") + + n_samples <- as.numeric(file[1]) + file <- file[-1] + + file_split <- strsplit(file, "\t") + + fill_in <- function(x, length){ + c(x, rep("0", length - length(x))) + } + + filled <- lapply(file_split, fill_in, length=n_samples + 1) + + samples_distance_matrix <- do.call(rbind, filled) + + samples <- samples_distance_matrix[,1] + + dist_matrix <- samples_distance_matrix[,-1] + dist_matrix <- matrix(as.numeric(dist_matrix), nrow=n_samples) + + if(sum(dist_matrix[upper.tri(dist_matrix)]) == 0){ + dist_matrix <- dist_matrix+t(dist_matrix) + } + + rownames(dist_matrix) <- samples + colnames(dist_matrix) <- samples + + return(dist_matrix) +} From 70086fdf61bbe0f504076f5fbeaf8fbf566457af Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 31 Mar 2023 00:11:44 -0400 Subject: [PATCH 4/6] Add Pat's function to read dist matrix From https://github.com/riffomonas/distances/blob/f5cb11b7d8c5a900249c5e676269699411f0092a/code/read_matrix.R Co-authored-by: Pat Schloss --- R/read_dist.R | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/R/read_dist.R b/R/read_dist.R index 73c9268..8e64deb 100644 --- a/R/read_dist.R +++ b/R/read_dist.R @@ -1,4 +1,4 @@ -#' Read in lower left triangular matrix from file +#' Read in lower left triangular matrix as a long tibble #' #' Assumes the distance file is a phylip-formatted lower left triangular matrix #' as described in \url{https://mothur.org/wiki/phylip-formatted_distance_matrix/} @@ -43,7 +43,14 @@ read_dist_lt_as_tbl <- function(filename) { ) } -read_matrix <- function(file_name){ +#' Read in lower left triangular matrix +#' +#' @inheritParams read_dist_lt_as_tbl +#' +#' @return distance matrix +#' @export +#' @author Pat Schloss, \email{pschloss@@umich.edu} +read_dist_lt_as_mat <- function(filename){ file <- scan(file_name, what=character(), From c27e6d3396c91a12bed47553327a71fa88314580 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 31 Mar 2023 00:18:07 -0400 Subject: [PATCH 5/6] Bump roxygen --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ebd2f98..f761884 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -77,4 +77,4 @@ VignetteBuilder: Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.1 +RoxygenNote: 7.2.3 From 3fdde905d9bd214ae671efed3a04e53a17323662 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 31 Mar 2023 00:18:23 -0400 Subject: [PATCH 6/6] Combine read_dist docs into one --- NAMESPACE | 3 ++- R/read_dist.R | 19 +++++++++++-------- man/read_dist.Rd | 28 +++++++++++++++++++++++----- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 682dd6f..ea1a1fb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,7 +14,8 @@ export(log_snakemake) export(parse_tax) export(paste_oxford_list) export(pool_taxon_counts) -export(read_dist) +export(read_lt_as_tbl) +export(read_lt_mat) export(read_tax) export(set_knitr_opts) export(theme_lucas) diff --git a/R/read_dist.R b/R/read_dist.R index 8e64deb..e19357c 100644 --- a/R/read_dist.R +++ b/R/read_dist.R @@ -1,11 +1,15 @@ -#' Read in lower left triangular matrix as a long tibble +#' @name read_dist +#' @title Read lower triangular distance files as tibbles and matrices +NULL + +#' @describeIn read_dist Read in lower left triangular matrix as a long tibble #' -#' Assumes the distance file is a phylip-formatted lower left triangular matrix +#' Assumes the file is a phylip-formatted lower left triangular matrix #' as described in \url{https://mothur.org/wiki/phylip-formatted_distance_matrix/} #' #' @param filename file name of a lower left triangular matrix (`.dist`) #' -#' @return distance matrix as a `tibble` in long format +#' @return matrix as a `tibble` in long format #' @export #' @author Nick Lesniak, \email{nlesniak@@umich.edu} #' @@ -16,7 +20,7 @@ #' ) #' dist_tbl <- read_dist(dist_filepath) #' head(dist_tbl) -read_dist_lt_as_tbl <- function(filename) { +read_lt_as_tbl <- function(filename) { distances <- rows <- NULL # TODO: input validation - make sure file has expected format & throw errors if it doesn't # read in the first row to determine the matrix dimensions @@ -43,14 +47,13 @@ read_dist_lt_as_tbl <- function(filename) { ) } -#' Read in lower left triangular matrix +#' @describeIn read_dist Read in lower left triangular matrix #' -#' @inheritParams read_dist_lt_as_tbl #' -#' @return distance matrix +#' @return matrix #' @export #' @author Pat Schloss, \email{pschloss@@umich.edu} -read_dist_lt_as_mat <- function(filename){ +read_lt_mat <- function(filename){ file <- scan(file_name, what=character(), diff --git a/man/read_dist.Rd b/man/read_dist.Rd index 3f76f6e..3d38120 100644 --- a/man/read_dist.Rd +++ b/man/read_dist.Rd @@ -2,19 +2,35 @@ % Please edit documentation in R/read_dist.R \name{read_dist} \alias{read_dist} -\title{Read in lower left triangular matrix from file} +\alias{read_lt_as_tbl} +\alias{read_lt_mat} +\title{Read lower triangular distance files as tibbles and matrices} \usage{ -read_dist(dist_filename) +read_lt_as_tbl(filename) + +read_lt_mat(filename) } \arguments{ -\item{dist_filename}{filename of lower left triangular matrix (.dist)} +\item{filename}{file name of a lower left triangular matrix (\code{.dist})} } \value{ -distance matrix as a tibble +matrix as a \code{tibble} in long format + +matrix } \description{ -Read in lower left triangular matrix from file +Read lower triangular distance files as tibbles and matrices } +\section{Functions}{ +\itemize{ +\item \code{read_lt_as_tbl()}: Read in lower left triangular matrix as a long tibble + +Assumes the file is a phylip-formatted lower left triangular matrix +as described in \url{https://mothur.org/wiki/phylip-formatted_distance_matrix/} + +\item \code{read_lt_mat()}: Read in lower left triangular matrix + +}} \examples{ dist_filepath <- system.file("extdata", "sample.final.thetayc.0.03.lt.ave.dist", @@ -25,4 +41,6 @@ head(dist_tbl) } \author{ Nick Lesniak, \email{nlesniak@umich.edu} + +Pat Schloss, \email{pschloss@umich.edu} }