Browse Source

dplyr 0.8.0 support, fixes #7

v1.8.2
parent
commit
0b8084871d
  1. 4
      DESCRIPTION
  2. 12
      NAMESPACE
  3. 23
      NEWS.md
  4. 32
      R/age.R
  5. 51
      R/deprecated.R
  6. 24
      R/eucast_rules.R
  7. 225
      R/first_isolate.R
  8. 236
      R/freq.R
  9. 42
      R/key_antibiotics.R
  10. 22
      R/mdro.R
  11. 10
      R/misc.R
  12. 4
      R/mo.R
  13. 17
      R/resistance_predict.R
  14. 2
      R/rsi_calc.R
  15. 9
      man/AMR-deprecated.Rd
  16. 8
      man/age.Rd
  17. 6
      man/age_groups.Rd
  18. 4
      man/eucast_rules.Rd
  19. 80
      man/first_isolate.Rd
  20. 14
      man/freq.Rd
  21. 18
      man/key_antibiotics.Rd
  22. 5
      man/mdro.Rd
  23. 8
      man/resistance_predict.Rd
  24. 8
      tests/testthat/test-age.R
  25. 14
      tests/testthat/test-deprecated.R
  26. 31
      tests/testthat/test-first_isolate.R
  27. 4
      tests/testthat/test-mdro.R
  28. 50
      tests/testthat/test-portion.R
  29. 71
      tests/testthat/test-resistance_predict.R

4
DESCRIPTION

@ -1,6 +1,6 @@ @@ -1,6 +1,6 @@
Package: AMR
Version: 0.5.0.9005
Date: 2018-12-15
Version: 0.5.0.9007
Date: 2018-12-22
Title: Antimicrobial Resistance Analysis
Authors@R: c(
person(

12
NAMESPACE

@ -1,7 +1,6 @@ @@ -1,7 +1,6 @@
# Generated by roxygen2: do not edit by hand
S3method(as.data.frame,atc)
S3method(as.data.frame,bactid)
S3method(as.data.frame,frequency_tbl)
S3method(as.data.frame,mo)
S3method(as.double,mic)
@ -21,13 +20,11 @@ S3method(plot,frequency_tbl) @@ -21,13 +20,11 @@ S3method(plot,frequency_tbl)
S3method(plot,mic)
S3method(plot,rsi)
S3method(print,atc)
S3method(print,bactid)
S3method(print,frequency_tbl)
S3method(print,mic)
S3method(print,mo)
S3method(print,rsi)
S3method(pull,atc)
S3method(pull,bactid)
S3method(pull,mo)
S3method(skewness,data.frame)
S3method(skewness,default)
@ -50,7 +47,6 @@ export(age) @@ -50,7 +47,6 @@ export(age)
export(age_groups)
export(anti_join_microorganisms)
export(as.atc)
export(as.bactid)
export(as.mic)
export(as.mo)
export(as.rsi)
@ -68,6 +64,8 @@ export(count_df) @@ -68,6 +64,8 @@ export(count_df)
export(eucast_exceptional_phenotypes)
export(eucast_rules)
export(facet_rsi)
export(filter_first_isolate)
export(filter_first_weighted_isolate)
export(first_isolate)
export(freq)
export(frequency_tbl)
@ -77,12 +75,10 @@ export(geom_rsi) @@ -77,12 +75,10 @@ export(geom_rsi)
export(get_locale)
export(ggplot_rsi)
export(guess_atc)
export(guess_bactid)
export(guess_mo)
export(inner_join_microorganisms)
export(interpretive_reading)
export(is.atc)
export(is.bactid)
export(is.mic)
export(is.mo)
export(is.rsi)
@ -137,7 +133,6 @@ export(skewness) @@ -137,7 +133,6 @@ export(skewness)
export(theme_rsi)
export(top_freq)
exportMethods(as.data.frame.atc)
exportMethods(as.data.frame.bactid)
exportMethods(as.data.frame.frequency_tbl)
exportMethods(as.data.frame.mo)
exportMethods(as.double.mic)
@ -158,13 +153,11 @@ exportMethods(plot.frequency_tbl) @@ -158,13 +153,11 @@ exportMethods(plot.frequency_tbl)
exportMethods(plot.mic)
exportMethods(plot.rsi)
exportMethods(print.atc)
exportMethods(print.bactid)
exportMethods(print.frequency_tbl)
exportMethods(print.mic)
exportMethods(print.mo)
exportMethods(print.rsi)
exportMethods(pull.atc)
exportMethods(pull.bactid)
exportMethods(pull.mo)
exportMethods(skewness)
exportMethods(skewness.data.frame)
@ -214,6 +207,7 @@ importFrom(dplyr,left_join) @@ -214,6 +207,7 @@ importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,mutate_all)
importFrom(dplyr,mutate_at)
importFrom(dplyr,n)
importFrom(dplyr,n_distinct)
importFrom(dplyr,progress_estimated)
importFrom(dplyr,pull)

23
NEWS.md

@ -1,10 +1,25 @@ @@ -1,10 +1,25 @@
# 0.5.0.90xx (latest development version)
#### New
* **BREAKING**: removed deprecated functions, parameters and references to 'bactid'. Use `as.mo` to identify an MO code.
* Support for `dplyr` version 0.8.0
* Function `mo_failures` to review values that could not be coerced to a valid MO code, using `as.mo`. This latter function will now only show a maximum of 25 uncoerced values.
* Function `mo_renamed` to get a list of all returned values from `as.mo` that have had taxonomic renaming
* Function `age` to calculate the (patients) age in years
* Function `age_groups` to split ages into custom or predefined groups (like children or elderly). This allows for easier demographic antimicrobial resistance analysis (per age group).
* Function `age_groups` to split ages into custom or predefined groups (like children or elderly). This allows for easier demographic antimicrobial resistance analysis per age group.
* Functions `filter_first_isolate` and `filter_first_weighted_isolate()` to shorten and fasten filtering on data sets with antimicrobial results, e.g.:
```r
septic_patients %>% filter_first_isolate()
# or
filter_first_isolate(septic_patients)
```
is the same as:
```r
septic_patients %>%
mutate(only_firsts = first_isolate(septic_patients, ...)) %>%
filter(only_firsts == TRUE) %>%
select(-only_firsts)
```
#### Changed
* Improvements for `as.mo`:
@ -18,6 +33,8 @@ @@ -18,6 +33,8 @@
* Function `first_isolate`:
* Will now use a column named like "patid" for the patient ID (parameter `col_patientid`), when this parameter was left blank
* Will now use a column named like "key(...)ab" or "key(...)antibiotics" for the key antibiotics (parameter `col_keyantibiotics`), when this parameter was left blank
* Removed parameter `output_logical`, the function will now always return a logical value
* Renamed parameter `filter_specimen` to `specimen_group`, although using `filter_specimen` will still work
* A note to the manual pages of the `portion` functions, that low counts can influence the outcome and that the `portion` functions may camouflage this, since they only return the portion (albeit being dependent on the `minimum` parameter)
* Function `mo_taxonomy` now contains the kingdom too
* Function `first_isolate` will now use a column named like "patid" for the patient ID, when this parameter was left blank
@ -29,7 +46,11 @@ @@ -29,7 +46,11 @@
* Now honours the `decimal.mark` setting, which just like `format` defaults to `getOption("OutDec")`
* The new `big.mark` parameter will at default be `","` when `decimal.mark = "."` and `"."` otherwise
* Fix for header text where all observations are `NA`
* New parameter `droplevels` to exclude empty factor levels when input is a factor
* Factor levels will be in header when present
* Function `scale_y_percent` now has the `limits` parameter
* Automatic parameter filling for `mdro`, `key_antibiotics` and `eucast_rules`
* Updated examples for resistance prediction (`resistance_predict` function)
#### Other
* Updated licence text to emphasise GPL 2.0 and that this is an R package.

32
R/age.R

@ -19,28 +19,28 @@ @@ -19,28 +19,28 @@
#' Age in years of individuals
#'
#' Calculates age in years based on a reference date, which is the sytem time at default.
#' @param x date(s) - will be coerced with \code{\link{as.POSIXlt}}
#' @param y reference date(s) - defaults to \code{\link{Sys.Date}} - will be coerced with \code{\link{as.POSIXlt}}
#' @param x date(s), will be coerced with \code{\link{as.POSIXlt}}
#' @param reference reference date(s) (defaults to today), will be coerced with \code{\link{as.POSIXlt}}
#' @return Integer (no decimals)
#' @seealso age_groups
#' @seealso \code{\link{age_groups}} to splits age into groups
#' @importFrom dplyr if_else
#' @export
age <- function(x, y = Sys.Date()) {
if (length(x) != length(y)) {
if (length(y) == 1) {
y <- rep(y, length(x))
age <- function(x, reference = Sys.Date()) {
if (length(x) != length(reference)) {
if (length(reference) == 1) {
reference <- rep(reference, length(x))
} else {
stop("`x` and `y` must be of same length, or `y` must be of length 1.")
stop("`x` and `reference` must be of same length, or `reference` must be of length 1.")
}
}
x <- base::as.POSIXlt(x)
y <- base::as.POSIXlt(y)
if (any(y < x)) {
stop("`y` cannot be lower (older) than `x`.")
reference <- base::as.POSIXlt(reference)
if (any(reference < x)) {
stop("`reference` cannot be lower (older) than `x`.")
}
years_gap <- y$year - x$year
years_gap <- reference$year - x$year
# from https://stackoverflow.com/a/25450756/4575331
ages <- if_else(y$mon < x$mon | (y$mon == x$mon & y$mday < x$mday),
ages <- if_else(reference$mon < x$mon | (reference$mon == x$mon & reference$mday < x$mday),
as.integer(years_gap - 1),
as.integer(years_gap))
if (any(ages > 120)) {
@ -51,9 +51,9 @@ age <- function(x, y = Sys.Date()) { @@ -51,9 +51,9 @@ age <- function(x, y = Sys.Date()) {
#' Split ages into age groups
#'
#' Splits ages into groups defined by the \code{split} parameter.
#' Split ages into age groups defined by the \code{split} parameter. This allows for easier demographic (antimicrobial resistance) analysis.
#' @param x age, e.g. calculated with \code{\link{age}}
#' @param split_at values to split \code{x}, defaults to 0-11, 12-24, 26-54, 55-74 and 75+. See Details.
#' @param split_at values to split \code{x} at, defaults to age groups 0-11, 12-24, 26-54, 55-74 and 75+. See Details.
#' @details To split ages, the input can be:
#' \itemize{
#' \item{A numeric vector. A vector of \code{c(10, 20)} will split on 0-9, 10-19 and 20+. A value of only \code{50} will split on 0-49 and 50+.
@ -68,7 +68,7 @@ age <- function(x, y = Sys.Date()) { @@ -68,7 +68,7 @@ age <- function(x, y = Sys.Date()) {
#' }
#' @keywords age_group age
#' @return Ordered \code{\link{factor}}
#' @seealso age
#' @seealso \code{\link{age}} to determine ages based on one or more reference dates
#' @export
#' @examples
#' ages <- c(3, 8, 16, 54, 31, 76, 101, 43, 21)

51
R/deprecated.R

@ -23,57 +23,6 @@ @@ -23,57 +23,6 @@
#' @keywords internal
#' @name AMR-deprecated
#' @rdname AMR-deprecated
as.bactid <- function(...) {
.Deprecated("as.mo", package = "AMR")
as.mo(...)
}
#' @rdname AMR-deprecated
#' @export
is.bactid <- function(...) {
.Deprecated(new = "is.mo", package = "AMR")
is.mo(...)
}
#' @rdname AMR-deprecated
#' @export
guess_bactid <- function(...) {
.Deprecated(new = "guess_mo", package = "AMR")
guess_mo(...)
}
#' @exportMethod print.bactid
#' @export
#' @noRd
print.bactid <- function(x, ...) {
cat("Class 'bactid'\n")
print.default(as.character(x), quote = FALSE)
}
#' @exportMethod as.data.frame.bactid
#' @export
#' @noRd
as.data.frame.bactid <- function (x, ...) {
# same as as.data.frame.character but with removed stringsAsFactors
nm <- paste(deparse(substitute(x), width.cutoff = 500L),
collapse = " ")
if (!"nm" %in% names(list(...))) {
as.data.frame.vector(x, ..., nm = nm)
} else {
as.data.frame.vector(x, ...)
}
}
#' @exportMethod pull.bactid
#' @export
#' @importFrom dplyr pull
#' @noRd
pull.bactid <- function(.data, ...) {
pull(as.data.frame(.data), ...)
}
#' @rdname AMR-deprecated
#' @export
ratio <- function(x, ratio) {
.Deprecated(package = "AMR")

24
R/eucast_rules.R

@ -24,7 +24,6 @@ @@ -24,7 +24,6 @@
#' @param rules a character vector that specifies which rules should be applied - one or more of \code{c("breakpoints", "expert", "other", "all")}
#' @param verbose a logical to indicate whether extensive info should be returned as a \code{data.frame} with info about which rows and columns are effected
#' @param amcl,amik,amox,ampi,azit,azlo,aztr,cefa,cfep,cfot,cfox,cfra,cfta,cftr,cfur,chlo,cipr,clar,clin,clox,coli,czol,dapt,doxy,erta,eryt,fosf,fusi,gent,imip,kana,levo,linc,line,mero,mezl,mino,moxi,nali,neom,neti,nitr,norf,novo,oflo,oxac,peni,pipe,pita,poly,pris,qida,rifa,roxi,siso,teic,tetr,tica,tige,tobr,trim,trsu,vanc column name of an antibiotic, see Antibiotics
#' @param col_bactid deprecated, use \code{col_mo} instead.
#' @param ... parameters that are passed on to \code{eucast_rules}
#' @inheritParams first_isolate
#' @section Antibiotics:
@ -217,8 +216,7 @@ eucast_rules <- function(tbl, @@ -217,8 +216,7 @@ eucast_rules <- function(tbl,
tobr = 'tobr',
trim = 'trim',
trsu = 'trsu',
vanc = 'vanc',
col_bactid = NULL) {
vanc = 'vanc') {
EUCAST_VERSION_BREAKPOINTS <- "8.1, 2018"
EUCAST_VERSION_EXPERT_RULES <- "3.1, 2016"
@ -229,12 +227,12 @@ eucast_rules <- function(tbl, @@ -229,12 +227,12 @@ eucast_rules <- function(tbl,
# try to find columns based on type
# -- mo
if (!is.null(col_bactid)) {
col_mo <- col_bactid
warning("Use of `col_bactid` is deprecated. Use `col_mo` instead.")
} else if (is.null(col_mo) & "mo" %in% lapply(tbl, class)) {
col_mo <- colnames(tbl)[lapply(tbl, class) == "mo"]
message("NOTE: Using column `", col_mo, "` as input for `col_mo`.")
if (is.null(col_mo) & "mo" %in% lapply(tbl, class)) {
col_mo <- colnames(tbl)[lapply(tbl, class) == "mo"][1]
message(blue(paste0("NOTE: Using column `", bold(col_mo), "` as input for `col_mo`.")))
}
if (is.null(col_mo)) {
stop("`col_mo` must be set.", call. = FALSE)
}
if (!all(rules %in% c("breakpoints", "expert", "other", "all"))) {
@ -1731,12 +1729,14 @@ eucast_rules <- function(tbl, @@ -1731,12 +1729,14 @@ eucast_rules <- function(tbl,
} else {
colour <- blue
}
decimal.mark <- getOption("OutDec")
big.mark <- ifelse(decimal.mark != ",", ",", ".")
cat(bold(paste('\n=> EUCAST rules', paste0(wouldve, 'affected'),
amount_affected_rows %>% length() %>% format(big.mark = ","),
'out of', nrow(tbl_original) %>% format(big.mark = ","),
amount_affected_rows %>% length() %>% format(big.mark = big.mark, decimal.mark = decimal.mark),
'out of', nrow(tbl_original) %>% format(big.mark = big.mark, decimal.mark = decimal.mark),
'rows ->',
colour(paste0(wouldve, 'changed'),
amount_changed %>% format(big.mark = ","), 'test results.\n\n'))))
amount_changed %>% format(big.mark = big.mark, decimal.mark = decimal.mark), 'test results.\n\n'))))
}
if (verbose == TRUE) {

225
R/first_isolate.R

@ -29,18 +29,32 @@ @@ -29,18 +29,32 @@
#' @param col_keyantibiotics column name of the key antibiotics to determine first \emph{weighted} isolates, see \code{\link{key_antibiotics}}. Defaults to the first column that starts with 'key' followed by 'ab' or 'antibiotics' (case insensitive). Use \code{col_keyantibiotics = FALSE} to prevent this.
#' @param episode_days episode in days after which a genus/species combination will be determined as 'first isolate' again
#' @param testcodes_exclude character vector with test codes that should be excluded (case-insensitive)
#' @param icu_exclude logical whether ICU isolates should be excluded
#' @param filter_specimen specimen group or type that should be excluded
#' @param output_logical return output as \code{logical} (will else be the values \code{0} or \code{1})
#' @param icu_exclude logical whether ICU isolates should be excluded (rows with value \code{TRUE} in column \code{col_icu})
#' @param specimen_group value in column \code{col_specimen} to filter on
#' @param type type to determine weighed isolates; can be \code{"keyantibiotics"} or \code{"points"}, see Details
#' @param ignore_I logical to determine whether antibiotic interpretations with \code{"I"} will be ignored when \code{type = "keyantibiotics"}, see Details
#' @param points_threshold points until the comparison of key antibiotics will lead to inclusion of an isolate when \code{type = "points"}, see Details
#' @param info print progress
#' @param col_bactid (deprecated, use \code{col_mo} instead)
#' @param col_genus (deprecated, use \code{col_mo} instead) column name of the genus of the microorganisms
#' @param col_species (deprecated, use \code{col_mo} instead) column name of the species of the microorganisms
#' @param ... parameters passed on to the \code{first_isolate} function
#' @details \strong{WHY THIS IS SO IMPORTANT} \cr
#' To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https://www.ncbi.nlm.nih.gov/pubmed/17304462}{[1]}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}.
#' To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https://www.ncbi.nlm.nih.gov/pubmed/17304462}{[1]}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}.
#'
#' The function \code{filter_first_isolate} is essentially equal to:
#' \preformatted{
#' tbl \%>\%
#' mutate(only_firsts = first_isolate(tbl, ...)) \%>\%
#' filter(only_firsts == TRUE) \%>\%
#' select(-only_firsts)
#' }
#' The function \code{filter_first_weighted_isolate} is essentially equal to:
#' \preformatted{
#' tbl \%>\%
#' mutate(keyab = key_antibiotics(.)) \%>\%
#' mutate(only_weighted_firsts = first_isolate(tbl,
#' col_keyantibiotics = "keyab", ...)) \%>\%
#' filter(only_weighted_firsts == TRUE) \%>\%
#' select(-only_weighted_firsts)
#' }
#' @section Key antibiotics:
#' There are two ways to determine whether isolates can be included as first \emph{weighted} isolates which will give generally the same results: \cr
#'
@ -49,31 +63,42 @@ @@ -49,31 +63,42 @@
#'
#' \strong{2. Using} \code{type = "points"} \strong{and parameter} \code{points_threshold} \cr
#' A difference from I to S|R (or vice versa) means 0.5 points, a difference from S to R (or vice versa) means 1 point. When the sum of points exceeds \code{points_threshold}, an isolate will be (re)selected as a first weighted isolate.
#' @rdname first_isolate
#' @keywords isolate isolates first
#' @seealso \code{\link{key_antibiotics}}
#' @export
#' @importFrom dplyr arrange_at lag between row_number filter mutate arrange
#' @return A vector to add to table, see Examples.
#' @importFrom crayon blue bold silver
#' @return Logical vector
#' @source Methodology of this function is based on: \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}.
#' @examples
#' # septic_patients is a dataset available in the AMR package. It is true, genuine data.
#' ?septic_patients
#'
#' library(dplyr)
#' my_patients <- septic_patients %>%
#' # Filter on first isolates:
#' septic_patients %>%
#' mutate(first_isolate = first_isolate(.,
#' col_date = "date",
#' col_patient_id = "patient_id",
#' col_mo = "mo"))
#' col_mo = "mo")) %>%
#' filter(first_isolate == TRUE)
#'
#' # Which can be shortened to:
#' septic_patients %>%
#' filter_first_isolate()
#' # or for first weighted isolates:
#' septic_patients %>%
#' filter_first_weighted_isolate()
#'
#' # Now let's see if first isolates matter:
#' A <- my_patients %>%
#' A <- septic_patients %>%
#' group_by(hospital_id) %>%
#' summarise(count = n_rsi(gent), # gentamicin availability
#' resistance = portion_IR(gent)) # gentamicin resistance
#'
#' B <- my_patients %>%
#' filter(first_isolate == TRUE) %>% # the 1st isolate filter
#' B <- septic_patients %>%
#' filter_first_weighted_isolate() %>% # the 1st isolate filter
#' group_by(hospital_id) %>%
#' summarise(count = n_rsi(gent), # gentamicin availability
#' resistance = portion_IR(gent)) # gentamicin resistance
@ -83,6 +108,7 @@ @@ -83,6 +108,7 @@
#' # Gentamicin resitance in hospital D appears to be 5.4% higher than
#' # when you (erroneously) would have used all isolates!
#'
#'
#' ## OTHER EXAMPLES:
#'
#' \dontrun{
@ -99,29 +125,29 @@ @@ -99,29 +125,29 @@
#'
#' tbl$first_blood_isolate <-
#' first_isolate(tbl,
#' filter_specimen = 'Blood')
#' specimen_group = 'Blood')
#'
#' tbl$first_blood_isolate_weighed <-
#' first_isolate(tbl,
#' filter_specimen = 'Blood',
#' specimen_group = 'Blood',
#' col_keyantibiotics = 'keyab')
#'
#' tbl$first_urine_isolate <-
#' first_isolate(tbl,
#' filter_specimen = 'Urine')
#' specimen_group = 'Urine')
#'
#' tbl$first_urine_isolate_weighed <-
#' first_isolate(tbl,
#' filter_specimen = 'Urine',
#' specimen_group = 'Urine',
#' col_keyantibiotics = 'keyab')
#'
#' tbl$first_resp_isolate <-
#' first_isolate(tbl,
#' filter_specimen = 'Respiratory')
#' specimen_group = 'Respiratory')
#'
#' tbl$first_resp_isolate_weighed <-
#' first_isolate(tbl,
#' filter_specimen = 'Respiratory',
#' specimen_group = 'Respiratory',
#' col_keyantibiotics = 'keyab')
#' }
first_isolate <- function(tbl,
@ -135,28 +161,34 @@ first_isolate <- function(tbl, @@ -135,28 +161,34 @@ first_isolate <- function(tbl,
episode_days = 365,
testcodes_exclude = NULL,
icu_exclude = FALSE,
filter_specimen = NULL,
output_logical = TRUE,
specimen_group = NULL,
type = "keyantibiotics",
ignore_I = TRUE,
points_threshold = 2,
info = TRUE,
col_bactid = NULL,
col_genus = NULL,
col_species = NULL) {
...) {
if (!is.data.frame(tbl)) {
stop("`tbl` must be a data frame.", call. = FALSE)
stop("`tbl` must be a data.frame.", call. = FALSE)
}
dots <- unlist(list(...))
if (length(dots) != 0) {
# backwards compatibility with old parameters
dots.names <- dots %>% names()
if ('filter_specimen' %in% dots.names) {
specimen_group <- dots[which(dots.names == 'filter_specimen')]
}
}
# try to find columns based on type
# -- mo
if (!is.null(col_bactid)) {
col_mo <- col_bactid
warning("Use of `col_bactid` is deprecated. Use `col_mo` instead.")
} else if (is.null(col_mo) & "mo" %in% lapply(tbl, class)) {
if (is.null(col_mo) & "mo" %in% lapply(tbl, class)) {
col_mo <- colnames(tbl)[lapply(tbl, class) == "mo"][1]
message("NOTE: Using column `", col_mo, "` as input for `col_mo`.")
message(blue(paste0("NOTE: Using column `", bold(col_mo), "` as input for `col_mo`.")))
}
if (is.null(col_mo)) {
stop("`col_mo` must be set.", call. = FALSE)
}
# -- date
@ -164,7 +196,7 @@ first_isolate <- function(tbl, @@ -164,7 +196,7 @@ first_isolate <- function(tbl,
for (i in 1:ncol(tbl)) {
if ("Date" %in% class(tbl %>% pull(i)) | "POSIXct" %in% class(tbl %>% pull(i))) {
col_date <- colnames(tbl)[i]
message("NOTE: Using column `", col_date, "` as input for `col_date`.")
message(blue(paste0("NOTE: Using column `", bold(col_date), "` as input for `col_date`.")))
break
}
}
@ -178,7 +210,7 @@ first_isolate <- function(tbl, @@ -178,7 +210,7 @@ first_isolate <- function(tbl,
# -- patient id
if (is.null(col_patient_id) & any(colnames(tbl) %like% "^(patient|patid)")) {
col_patient_id <- colnames(tbl)[colnames(tbl) %like% "^(patient|patid)"][1]
message("NOTE: Using column `", col_patient_id, "` as input for `col_patient_id`.")
message(blue(paste0("NOTE: Using column `", bold(col_patient_id), "` as input for `col_patient_id`.")))
}
if (is.null(col_patient_id)) {
stop("`col_patient_id` must be set.", call. = FALSE)
@ -187,18 +219,12 @@ first_isolate <- function(tbl, @@ -187,18 +219,12 @@ first_isolate <- function(tbl,
# -- key antibiotics
if (is.null(col_keyantibiotics) & any(colnames(tbl) %like% "^key.*(ab|antibiotics)")) {
col_keyantibiotics <- colnames(tbl)[colnames(tbl) %like% "^key.*(ab|antibiotics)"][1]
message("NOTE: Using column `", col_keyantibiotics, "` as input for `col_keyantibiotics`. Use col_keyantibiotics = FALSE to prevent this.")
message(blue(paste0("NOTE: Using column `", bold(col_keyantibiotics), "` as input for `col_keyantibiotics`. Use ", bold("col_keyantibiotics = FALSE"), " to prevent this.")))
}
if (isFALSE(col_keyantibiotics)) {
col_keyantibiotics <- NULL
}
# col_mo OR col_genus+col_species must be available
if (is.null(col_mo) & (is.null(col_genus) | is.null(col_species))) {
stop("`col_mo` or both `col_genus` and `col_species` must be set.", call. = FALSE)
}
# check if columns exist
check_columns_existance <- function(column, tblname = tbl) {
if (NROW(tblname) <= 1 | NCOL(tblname) <= 1) {
@ -215,27 +241,23 @@ first_isolate <- function(tbl, @@ -215,27 +241,23 @@ first_isolate <- function(tbl,
check_columns_existance(col_date)
check_columns_existance(col_patient_id)
check_columns_existance(col_mo)
check_columns_existance(col_genus)
check_columns_existance(col_species)
check_columns_existance(col_testcode)
check_columns_existance(col_icu)
check_columns_existance(col_keyantibiotics)
if (!is.null(col_mo)) {
# join to microorganisms data set
tbl <- tbl %>%
mutate_at(vars(col_mo), as.mo) %>%
left_join_microorganisms(by = col_mo)
col_genus <- "genus"
col_species <- "species"
}
# join to microorganisms data set
tbl <- tbl %>%
mutate_at(vars(col_mo), as.mo) %>%
left_join_microorganisms(by = col_mo)
col_genus <- "genus"
col_species <- "species"
if (is.null(col_testcode)) {
testcodes_exclude <- NULL
}
# remove testcodes
if (!is.null(testcodes_exclude) & info == TRUE) {
cat('[Criteria] Excluded test codes:\n', toString(testcodes_exclude), '\n')
cat('[Criterion] Excluded test codes:\n', toString(testcodes_exclude), '\n')
}
if (is.null(col_icu)) {
@ -246,14 +268,14 @@ first_isolate <- function(tbl, @@ -246,14 +268,14 @@ first_isolate <- function(tbl,
}
if (is.null(col_specimen)) {
filter_specimen <- NULL
specimen_group <- NULL
}
# filter on specimen group and keyantibiotics when they are filled in
if (!is.null(filter_specimen)) {
if (!is.null(specimen_group)) {
check_columns_existance(col_specimen, tbl)
if (info == TRUE) {
cat('[Criteria] Excluded other than specimen group \'', filter_specimen, '\'\n', sep = '')
cat('[Criterion] Excluded other than specimen group \'', specimen_group, '\'\n', sep = '')
}
}
if (!is.null(col_keyantibiotics)) {
@ -274,11 +296,11 @@ first_isolate <- function(tbl, @@ -274,11 +296,11 @@ first_isolate <- function(tbl,
mutate(species = if_else(is.na(species) | species == "(no MO)", "", species),
genus = if_else(is.na(genus) | genus == "(no MO)", "", genus))
if (is.null(filter_specimen)) {
if (is.null(specimen_group)) {
# not filtering on specimen
if (icu_exclude == FALSE) {
if (info == TRUE & !is.null(col_icu)) {
cat('[Criteria] Included isolates from ICU.\n')
cat('[Criterion] Included isolates from ICU.\n')
}
tbl <- tbl %>%
arrange_at(c(col_patient_id,
@ -289,7 +311,7 @@ first_isolate <- function(tbl, @@ -289,7 +311,7 @@ first_isolate <- function(tbl,
row.end <- nrow(tbl)
} else {
if (info == TRUE) {
cat('[Criteria] Excluded isolates from ICU.\n')
cat('[Criterion] Excluded isolates from ICU.\n')
}
tbl <- tbl %>%
arrange_at(c(col_icu,
@ -310,7 +332,7 @@ first_isolate <- function(tbl, @@ -310,7 +332,7 @@ first_isolate <- function(tbl,
# filtering on specimen and only analyse these row to save time
if (icu_exclude == FALSE) {
if (info == TRUE & !is.null(col_icu)) {
cat('[Criteria] Included isolates from ICU.\n')
cat('[Criterion] Included isolates from ICU.\n')
}
tbl <- tbl %>%
arrange_at(c(col_specimen,
@ -319,14 +341,14 @@ first_isolate <- function(tbl, @@ -319,14 +341,14 @@ first_isolate <- function(tbl,
col_species,
col_date))
suppressWarnings(
row.start <- which(tbl %>% pull(col_specimen) == filter_specimen) %>% min(na.rm = TRUE)
row.start <- which(tbl %>% pull(col_specimen) == specimen_group) %>% min(na.rm = TRUE)
)
suppressWarnings(
row.end <- which(tbl %>% pull(col_specimen) == filter_specimen) %>% max(na.rm = TRUE)
row.end <- which(tbl %>% pull(col_specimen) == specimen_group) %>% max(na.rm = TRUE)
)
} else {
if (info == TRUE) {
cat('[Criteria] Excluded isolates from ICU.\n')
cat('[Criterion] Excluded isolates from ICU.\n')
}
tbl <- tbl %>%
arrange_at(c(col_icu,
@ -336,11 +358,11 @@ first_isolate <- function(tbl, @@ -336,11 +358,11 @@ first_isolate <- function(tbl,
col_species,
col_date))
suppressWarnings(
row.start <- which(tbl %>% pull(col_specimen) == filter_specimen
row.start <- which(tbl %>% pull(col_specimen) == specimen_group
& tbl %>% pull(col_icu) == FALSE) %>% min(na.rm = TRUE)
)
suppressWarnings(
row.end <- which(tbl %>% pull(col_specimen) == filter_specimen
row.end <- which(tbl %>% pull(col_specimen) == specimen_group
& tbl %>% pull(col_icu) == FALSE) %>% max(na.rm = TRUE)
)
}
@ -352,12 +374,10 @@ first_isolate <- function(tbl, @@ -352,12 +374,10 @@ first_isolate <- function(tbl,
message('No isolates found.')
}
# NAs where genus is unavailable
tbl <- tbl %>%
mutate(real_first_isolate = if_else(genus == '', NA, FALSE))
if (output_logical == FALSE) {
tbl$real_first_isolate <- tbl %>% pull(real_first_isolate) %>% as.integer()
}
return(tbl %>% pull(real_first_isolate))
return(tbl %>%
mutate(real_first_isolate = if_else(genus == '', NA, FALSE)) %>%
pull(real_first_isolate)
)
}
# suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number())
@ -388,14 +408,14 @@ first_isolate <- function(tbl, @@ -388,14 +408,14 @@ first_isolate <- function(tbl,
weighted.notice <- 'weighted '
if (info == TRUE) {
if (type == 'keyantibiotics') {
cat('[Criteria] Inclusion based on key antibiotics, ')
cat('[Criterion] Inclusion based on key antibiotics, ')
if (ignore_I == FALSE) {
cat('not ')
}
cat('ignoring I.\n')
}
if (type == 'points') {
cat(paste0('[Criteria] Inclusion based on key antibiotics, using points threshold of '
cat(paste0('[Criterion] Inclusion based on key antibiotics, using points threshold of '
, points_threshold, '.\n'))
}
}
@ -458,19 +478,62 @@ first_isolate <- function(tbl, @@ -458,19 +478,62 @@ first_isolate <- function(tbl,
pull(real_first_isolate)
if (info == TRUE) {
message(paste0('Found ',
all_first %>% sum(na.rm = TRUE),
' first ', weighted.notice, 'isolates (',
(all_first %>% sum(na.rm = TRUE) / scope.size) %>% percent(),
' of isolates in scope [where genus was not empty] and ',
(all_first %>% sum(na.rm = TRUE) / tbl %>% nrow()) %>% percent(),
' of total)'))
}
if (output_logical == FALSE) {
all_first <- all_first %>% as.integer()
decimal.mark <- getOption("OutDec")
big.mark <- ifelse(decimal.mark != ",", ",", ".")
n_found <- base::sum(all_first, na.rm = TRUE)
p_found_total <- percent(n_found / nrow(tbl), force_zero = TRUE)
p_found_scope <- percent(n_found / scope.size, force_zero = TRUE)
# mark up number of found
n_found <- base::format(n_found, big.mark = big.mark, decimal.mark = decimal.mark)
if (p_found_total != p_found_scope) {
msg_txt <- paste0("=> Found ",
bold(paste0(n_found, " first ", weighted.notice, "isolates")),
" (", p_found_scope, " within scope and ", p_found_total, " of total)")
} else {
msg_txt <- paste0("=> Found ",
bold(paste0(n_found, " first ", weighted.notice, "isolates")),
" (", p_found_total, " of total)")
}
base::message(msg_txt)
}
all_first
}
#' @rdname first_isolate
#' @importFrom dplyr filter
#' @export
filter_first_isolate <- function(tbl,
col_date = NULL,
col_patient_id = NULL,
col_mo = NULL,
...) {
filter(tbl, first_isolate(tbl = tbl,
col_date = col_date,
col_patient_id = col_patient_id,
col_mo = col_mo,
...))
}
#' @rdname first_isolate
#' @importFrom dplyr %>% mutate filter
#' @export
filter_first_weighted_isolate <- function(tbl,
col_date = NULL,
col_patient_id = NULL,
col_mo = NULL,
col_keyantibiotics = NULL,
...) {
tbl_keyab <- tbl %>%
mutate(keyab = suppressMessages(key_antibiotics(.,
col_mo = col_mo,
...))) %>%
mutate(firsts = first_isolate(.,
col_date = col_date,
col_patient_id = col_patient_id,
col_mo = col_mo,
col_keyantibiotics = "keyab",
...))
tbl[which(tbl_keyab$firsts == TRUE),]
}

236
R/freq.R

@ -31,6 +31,7 @@ @@ -31,6 +31,7 @@
#' @param header a logical value indicating whether an informative header should be printed
#' @param title text to show above frequency table, at default to tries to coerce from the variables passed to \code{x}
#' @param na a character string to should be used to show empty (\code{NA}) values (only useful when \code{na.rm = FALSE})
#' @param droplevels a logical value indicating whether in factors empty levels should be dropped
#' @param sep a character string to separate the terms when selecting multiple columns
#' @inheritParams base::format
#' @param f a frequency table
@ -56,11 +57,12 @@ @@ -56,11 +57,12 @@
#' \item{Median, using \code{\link[stats]{median}}, with percentage since oldest}
#' }
#'
#' In factors, all factor levels that are not existing in the input data will be dropped.
#'
#' The function \code{top_freq} uses \code{\link[dplyr]{top_n}} internally and will include more than \code{n} rows if there are ties.
#' @importFrom stats fivenum sd mad
#' @importFrom grDevices boxplot.stats
#' @importFrom dplyr %>% arrange arrange_at desc filter_at funs group_by mutate mutate_at n_distinct pull select summarise tibble ungroup vars all_vars
#' @importFrom dplyr %>% arrange arrange_at desc filter_at funs group_by mutate mutate_at n n_distinct pull select summarise tibble ungroup vars all_vars
#' @importFrom utils browseVignettes
#' @importFrom hms is.hms
#' @importFrom crayon red green silver
@ -183,6 +185,7 @@ frequency_tbl <- function(x, @@ -183,6 +185,7 @@ frequency_tbl <- function(x,
header = !markdown,
title = NULL,
na = "<NA>",
droplevels = TRUE,
sep = " ",
decimal.mark = getOption("OutDec"),
big.mark = ifelse(decimal.mark != ",", ",", ".")) {
@ -190,23 +193,23 @@ frequency_tbl <- function(x, @@ -190,23 +193,23 @@ frequency_tbl <- function(x,
mult.columns <- 0
x.group = character(0)
df <- NULL
# x_haslevels <- !is.null(levels(x))
x.name <- NULL
cols <- NULL
if (any(class(x) == 'list')) {
if (any(class(x) == "list")) {
cols <- names(x)
x <- as.data.frame(x, stringsAsFactors = FALSE)
x.name <- "a list"
} else if (any(class(x) == 'matrix')) {
} else if (any(class(x) == "matrix")) {
x <- as.data.frame(x, stringsAsFactors = FALSE)
x.name <- "a matrix"
cols <- colnames(x)
if (all(cols %like% 'V[0-9]')) {
if (all(cols %like% "V[0-9]")) {
cols <- NULL
}
}
if (any(class(x) == 'data.frame')) {
if (any(class(x) == "data.frame")) {
x.group <- group_vars(x)
if (length(x.group) > 1) {
x.group <- x.group[1L]
@ -225,13 +228,18 @@ frequency_tbl <- function(x, @@ -225,13 +228,18 @@ frequency_tbl <- function(x,
if (ndots < 10) {
cols <- as.character(dots)
if (!all(cols %in% colnames(x))) {
stop("one or more columns not found: `", paste(cols, collapse = "`, `"), '`', call. = FALSE)
stop("one or more columns not found: `", paste(cols, collapse = "`, `"), "`", call. = FALSE)
}
if (length(x.group) > 0) {
x.group_cols <- c(x.group, cols)
df <- x %>%
group_by_at(vars(x.group_cols)) %>%
summarise(count = n())
# if (droplevels == TRUE) {
# x <- x %>% mutate_at(vars(x.group_cols), droplevels)
# }
suppressWarnings(
df <- x %>%
group_by_at(vars(x.group_cols)) %>%
summarise(count = n())
)
if (na.rm == TRUE) {
df <- df %>% filter_at(vars(cols), all_vars(!is.na(.)))
}
@ -250,16 +258,21 @@ frequency_tbl <- function(x, @@ -250,16 +258,21 @@ frequency_tbl <- function(x,
mutate_at(vars(x.group), funs(ifelse(lag(.) == ., "", .)))
df[1, 1] <- df.topleft
colnames(df)[1:2] <- c("group", "item")
if (!is.null(levels(df$item)) & droplevels == TRUE) {
# is factor
df <- df %>% filter(count != 0)
}
}
if (length(cols) > 0) {
x <- x[, cols]
}
} else if (ndots >= 10) {
stop('A maximum of 9 columns can be analysed at the same time.', call. = FALSE)
stop("A maximum of 9 columns can be analysed at the same time.", call. = FALSE)
} else {
cols <- NULL
}
} else if (any(class(x) == 'table')) {
} else if (any(class(x) == "table")) {
x <- as.data.frame(x, stringsAsFactors = FALSE)
# now this DF contains 3 columns: the 2 vars and a Freq column
# paste the first 2 cols and repeat them Freq times:
@ -274,18 +287,18 @@ frequency_tbl <- function(x, @@ -274,18 +287,18 @@ frequency_tbl <- function(x,
}
if (!is.null(ncol(x))) {
if (ncol(x) == 1 & any(class(x) == 'data.frame')) {
if (ncol(x) == 1 & any(class(x) == "data.frame")) {
x <- x %>% pull(1)
} else if (ncol(x) < 10) {
mult.columns <- ncol(x)
x <- do.call(paste, c(x[colnames(x)], sep = sep))
} else {
stop('A maximum of 9 columns can be analysed at the same time.', call. = FALSE)
stop("A maximum of 9 columns can be analysed at the same time.", call. = FALSE)
}
}
if (mult.columns > 1) {
NAs <- x[is.na(x) | x == trimws(strrep('NA ', mult.columns))]
NAs <- x[is.na(x) | x == trimws(strrep("NA ", mult.columns))]
} else {
NAs <- x[is.na(x)]
}
@ -296,91 +309,109 @@ frequency_tbl <- function(x, @@ -296,91 +309,109 @@ frequency_tbl <- function(x,
class(x) <- x_class
}
# if (sort.count == FALSE & 'factor' %in% class(x)) {
# warning("Sorting a factor sorts on factor level, not necessarily alphabetically.", call. = FALSE)
# }
header_txt <- character(0)
markdown_line <- ''
markdown_line <- ""
if (markdown == TRUE) {
markdown_line <- '\n'
markdown_line <- "\n"
}
x_align <- 'l'
x_align <- "l"
if (mult.columns > 0) {
header_txt <- header_txt %>% paste0(markdown_line, 'Columns: ', mult.columns)
header_txt <- header_txt %>% paste0(markdown_line, "Columns: ", mult.columns)
} else {
header_txt <- header_txt %>% paste0(markdown_line, 'Class: ', class(x) %>% rev() %>% paste(collapse = " > "))
header_txt <- header_txt %>% paste0(markdown_line, "Class: ", class(x) %>% rev() %>% paste(collapse = " > "))
if (!mode(x) %in% class(x)) {
header_txt <- header_txt %>% paste0(silver(paste0(" (", mode(x), ")")))
}
}
if ((length(NAs) + length(x) > 0) > 0) {
na_txt <- paste0(NAs %>% length() %>% format(decimal.mark = decimal.mark, big.mark = big.mark), ' = ',
na_txt <- paste0(NAs %>% length() %>% format(decimal.mark = decimal.mark, big.mark = big.mark), " = ",
(NAs %>% length() / (NAs %>% length() + x %>% length())) %>% percent(force_zero = TRUE, round = digits, decimal.mark = decimal.mark) %>%
sub('NaN', '0', ., fixed = TRUE))
sub("NaN", "0", ., fixed = TRUE))
if (!na_txt %like% "^0 =") {
na_txt <- red(na_txt)
} else {
na_txt <- green(na_txt)
}
na_txt <- paste0('(of which NA: ', na_txt, ')')
na_txt <- paste0("(of which NA: ", na_txt, ")")
} else {
na_txt <- ""
}
header_txt <- header_txt %>% paste0(markdown_line, '\nLength: ', (NAs %>% length() + x %>% length()) %>% format(decimal.mark = decimal.mark, big.mark = big.mark),
' ', na_txt)
header_txt <- header_txt %>% paste0(markdown_line, '\nUnique: ', x %>% n_distinct() %>% format(decimal.mark = decimal.mark, big.mark = big.mark))
if (!is.null(levels(x))) {
n_levels <- x %>% levels() %>% length()
n_levels_empty <- n_levels - x %>% droplevels() %>% levels() %>% length()
n_levels_list <- levels(x)
if (n_levels > 5) {
n_levels_list <- c(n_levels_list[1:5], "...")
}
if (is.ordered(x)) {
n_levels_list <- paste0(levels(x), collapse = " < ")
} else {
n_levels_list <- paste0(levels(x), collapse = ", ")
}
header_txt <- header_txt %>% paste0(markdown_line, "\nLevels: ", n_levels_list)
# drop levels of non-existing factor values,
# since dplyr >= 0.8.0 does not do this anymore in group_by
if (droplevels == TRUE) {
x <- droplevels(x)
}
}
header_txt <- header_txt %>% paste0(markdown_line, "\nLength: ", (NAs %>% length() + x %>% length()) %>% format(decimal.mark = decimal.mark, big.mark = big.mark),
" ", na_txt)
header_txt <- header_txt %>% paste0(markdown_line, "\nUnique: ", x %>% n_distinct() %>% format(decimal.mark = decimal.mark, big.mark = big.mark))
if (NROW(x) > 0 & any(class(x) == "character")) {
header_txt <- header_txt %>% paste0('\n')
header_txt <- header_txt %>% paste0(markdown_line, '\nShortest: ', x %>% base::nchar() %>% base::min(na.rm = TRUE) %>% format(decimal.mark = decimal.mark, big.mark = big.mark))
header_txt <- header_txt %>% paste0(markdown_line, '\nLongest: ', x %>% base::nchar() %>% base::max(na.rm = TRUE) %>% format(decimal.mark = decimal.mark, big.mark = big.mark))
header_txt <- header_txt %>% paste0("\n")
header_txt <- header_txt %>% paste0(markdown_line, "\nShortest: ", x %>% base::nchar() %>% base::min(na.rm = TRUE) %>% format(decimal.mark = decimal.mark, big.mark = big.mark))
header_txt <- header_txt %>% paste0(markdown_line, "\nLongest: ", x %>% base::nchar() %>% base::max(na.rm = TRUE) %>% format(decimal.mark = decimal.mark, big.mark = big.mark))
}
if (NROW(x) > 0 & any(class(x) == "mo")) {
header_txt <- header_txt %>% paste0('\n')
header_txt <- header_txt %>% paste0(markdown_line, '\nFamilies: ', x %>% mo_family() %>% n_distinct() %>% format(decimal.mark = decimal.mark, big.mark = big.mark))
header_txt <- header_txt %>% paste0(markdown_line, '\nGenera: ', x %>% mo_genus() %>% n_distinct() %>% format(decimal.mark = decimal.mark, big.mark = big.mark))
header_txt <- header_txt %>% paste0(markdown_line, '\nSpecies: ', x %>% mo_species() %>% n_distinct() %>% format(decimal.mark = decimal.mark, big.mark = big.mark))
header_txt <- header_txt %>% paste0("\n")
header_txt <- header_txt %>% paste0(markdown_line, "\nFamilies: ", x %>% mo_family() %>% n_distinct() %>% format(decimal.mark = decimal.mark, big.mark = big.mark))
header_txt <- header_txt %>% paste0(markdown_line, "\nGenera: ", x %>% mo_genus() %>% n_distinct() %>% format(decimal.mark = decimal.mark, big.mark = big.mark))
header_txt <- header_txt %>% paste0(markdown_line, "\nSpecies: ", x %>% mo_species() %>% n_distinct() %>% format(decimal.mark = decimal.mark, big.mark = big.mark))
}
if (NROW(x) > 0 & any(class(x) == "difftime") & !is.hms(x)) {
header_txt <- header_txt %>% paste0('\n')
header_txt <- header_txt %>% paste(markdown_line, '\nUnits: ', attributes(x)$units)
header_txt <- header_txt %>% paste0("\n")
header_txt <- header_txt %>% paste(markdown_line, "\nUnits: ", attributes(x)$units)
x <- as.double(x)
# after this, the numeric header_txt continues
}
if (NROW(x) > 0 & any(class(x) %in% c('double', 'integer', 'numeric', 'raw', 'single'))) {
if (NROW(x) > 0 & any(class(x) %in% c("double", "integer", "numeric", "raw", "single"))) {
# right align number
Tukey_five <- stats::fivenum(x, na.rm = TRUE)
x_align <- 'r'
header_txt <- header_txt %>% paste0('\n')
header_txt <- header_txt %>% paste(markdown_line, '\nMean: ', x %>% base::mean(na.rm = TRUE) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark))
header_txt <- header_txt %>% paste0(markdown_line, '\nStd. dev.: ', x %>% stats::sd(na.rm = TRUE) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark),
' (CV: ', x %>% cv(na.rm = TRUE) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark),
', MAD: ', x %>% stats::mad(na.rm = TRUE) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark), ')')
header_txt <- header_txt %>% paste0(markdown_line, '\nFive-Num: ', Tukey_five %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark) %>% trimws() %>% paste(collapse = ' | '),
' (IQR: ', (Tukey_five[4] - Tukey_five[2]) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark),
', CQV: ', x %>% cqv(na.rm = TRUE) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark), ')')
x_align <- "r"
header_txt <- header_txt %>% paste0("\n")
header_txt <- header_txt %>% paste(markdown_line, "\nMean: ", x %>% base::mean(na.rm = TRUE) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark))
header_txt <- header_txt %>% paste0(markdown_line, "\nStd. dev.: ", x %>% stats::sd(na.rm = TRUE) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark),
" (CV: ", x %>% cv(na.rm = TRUE) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark),
", MAD: ", x %>% stats::mad(na.rm = TRUE) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark), ")")
header_txt <- header_txt %>% paste0(markdown_line, "\nFive-Num: ", Tukey_five %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark) %>% trimws() %>% paste(collapse = " | "),
" (IQR: ", (Tukey_five[4] - Tukey_five[2]) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark),
", CQV: ", x %>% cqv(na.rm = TRUE) %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark), ")")
outlier_length <- length(boxplot.stats(x)$out)
header_txt <- header_txt %>% paste0(markdown_line, '\nOutliers: ', outlier_length)
header_txt <- header_txt %>% paste0(markdown_line, "\nOutliers: ", outlier_length)
if (outlier_length > 0) {
header_txt <- header_txt %>% paste0(' (unique count: ', boxplot.stats(x)$out %>% n_distinct(), ')')
header_txt <- header_txt %>% paste0(" (unique count: ", boxplot.stats(x)$out %>% n_distinct(), ")")
}
}
if (NROW(x) > 0 & any(class(x) == "rsi")) {
header_txt <- header_txt %>% paste0('\n')
header_txt <- header_txt %>% paste0("\n")
cnt_S <- sum(x == "S", na.rm = TRUE)
cnt_IR <- sum(x %in% c("I", "R"), na.rm = TRUE)
header_txt <- header_txt %>% paste(markdown_line, '\n%IR: ',
header_txt <- header_txt %>% paste(markdown_line, "\n%IR: ",
(cnt_IR / sum(!is.na(x), na.rm = TRUE)) %>% percent(force_zero = TRUE, round = digits, decimal.mark = decimal.mark),
paste0('(ratio S : IR = 1.0 : ', (cnt_IR / cnt_S) %>% format(digits = 1, nsmall = 1, decimal.mark = decimal.mark, big.mark = big.mark), ")"))
paste0("(ratio S : IR = 1.0 : ", (cnt_IR / cnt_S) %>% format(digits = 1, nsmall = 1, decimal.mark = decimal.mark, big.mark = big.mark), ")"))
if (NROW(x) < 30) {
header_txt <- header_txt %>% paste(markdown_line, red('\nToo few isolates for reliable resistance interpretation.'))
header_txt <- header_txt %>% paste(markdown_line, red("\nToo few isolates for reliable resistance interpretation."))
}
}
@ -389,29 +420,29 @@ frequency_tbl <- function(x, @@ -389,29 +420,29 @@ frequency_tbl <- function(x,
x <- x %>% as.POSIXlt()
formatdates <- "%H:%M:%S"
}
if (NROW(x) > 0 & any(class(x) %in% c('Date', 'POSIXct', 'POSIXlt'))) {
header_txt <- header_txt %>% paste0('\n')
if (NROW(x) > 0 & any(class(x) %in% c("Date", "POSIXct", "POSIXlt"))) {
header_txt <- header_txt %>% paste0("\n")
mindate <- x %>% min(na.rm = TRUE)
maxdate <- x %>% max(na.rm = TRUE)
maxdate_days <- difftime(maxdate, mindate, units = 'auto') %>% as.double()
maxdate_days <- difftime(maxdate, mindate, units = "auto") %>% as.double()
mediandate <- x %>% median(na.rm = TRUE)
median_days <- difftime(mediandate, mindate, units = 'auto') %>% as.double()
median_days <- difftime(mediandate, mindate, units = "auto") %>% as.double()
if (formatdates == "%H:%M:%S") {
# hms
header_txt <- header_txt %>% paste0(markdown_line, '\nEarliest: ', mindate %>% format(formatdates) %>% trimws())
header_txt <- header_txt %>% paste0(markdown_line, '\nLatest: ', maxdate %>% format(formatdates) %>% trimws(),
' (+', difftime(maxdate, mindate, units = 'mins') %>% as.double() %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark), ' min.)')
header_txt <- header_txt %>% paste0(markdown_line, "\nEarliest: ", mindate %>% format(formatdates) %>% trimws())
header_txt <- header_txt %>% paste0(markdown_line, "\nLatest: ", maxdate %>% format(formatdates) %>% trimws(),
" (+", difftime(maxdate, mindate, units = "mins") %>% as.double() %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark), " min.)")
} else {
# other date formats
header_txt <- header_txt %>% paste0(markdown_line, '\nOldest: ', mindate %>% format(formatdates) %>% trimws())
header_txt <- header_txt %>% paste0(markdown_line, '\nNewest: ', maxdate %>% format(formatdates) %>% trimws(),
' (+', difftime(maxdate, mindate, units = 'auto') %>% as.double() %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark), ')')
header_txt <- header_txt %>% paste0(markdown_line, "\nOldest: ", mindate %>% format(formatdates) %>% trimws())
header_txt <- header_txt %>% paste0(markdown_line, "\nNewest: ", maxdate %>% format(formatdates) %>% trimws(),
" (+", difftime(maxdate, mindate, units = "auto") %>% as.double() %>% format(digits = digits, decimal.mark = decimal.mark, big.mark = big.mark), ")")
}
header_txt <- header_txt %>% paste0(markdown_line, '\nMedian: ', mediandate %>% format(formatdates) %>% trimws(),
' (~', percent(median_days / maxdate_days, round = 0, decimal.mark = decimal.mark), ')')
header_txt <- header_txt %>% paste0(markdown_line, "\nMedian: ", mediandate %>% format(formatdates) %>% trimws(),
" (~", percent(median_days / maxdate_days, round = 0, decimal.mark = decimal.mark), ")")
}
if (any(class(x) == 'POSIXlt')) {
if (any(class(x) == "POSIXlt")) {
x <- x %>% format(formatdates)
}
@ -427,9 +458,9 @@ frequency_tbl <- function(x, @@ -427,9 +458,9 @@ frequency_tbl <- function(x,
nmax <- length(x)
}
column_names <- c('Item', 'Count', 'Percent', 'Cum. Count', 'Cum. Percent')
column_names_df <- c('item', 'count', 'percent', 'cum_count', 'cum_percent')
column_align <- c(x_align, 'r', 'r', 'r', 'r')
column_names <- c("Item", "Count", "Percent", "Cum. Count", "Cum. Percent")
column_names_df <- c("item", "count", "percent", "cum_count", "cum_percent")
column_align <- c(x_align, "r", "r", "r", "r")
if (is.null(df)) {
# create table with counts and percentages
@ -449,10 +480,10 @@ frequency_tbl <- function(x, @@ -449,10 +480,10 @@ frequency_tbl <- function(x,
column_align <- c("l", column_align)
}
if (df$item %>% paste(collapse = ',') %like% '\033') {
if (df$item %>% paste(collapse = ",") %like% "\033") {
# remove escape char
# see https://en.wikipedia.org/wiki/Escape_character#ASCII_escape_character
df <- df %>% mutate(item = item %>% gsub('\033', ' ', ., fixed = TRUE))
df <- df %>% mutate(item = item %>% gsub("\033", " ", ., fixed = TRUE))
}
if (quote == TRUE) {
@ -475,9 +506,9 @@ frequency_tbl <- function(x, @@ -475,9 +506,9 @@ frequency_tbl <- function(x,
}
if (markdown == TRUE) {
tbl_format <- 'markdown'
tbl_format <- "markdown"
} else {
tbl_format <- 'pandoc'
tbl_format <- "pandoc"
}
if (!is.null(title)) {
@ -485,7 +516,7 @@ frequency_tbl <- function(x, @@ -485,7 +516,7 @@ frequency_tbl <- function(x,
}
structure(.Data = df,
class = c('frequency_tbl', class(df)),
class = c("frequency_tbl", class(df)),
opt = list(title = title,
data = x.name,
vars = cols,
@ -511,11 +542,11 @@ freq <- frequency_tbl @@ -511,11 +542,11 @@ freq <- frequency_tbl
#' @export
#' @importFrom dplyr top_n pull
top_freq <- function(f, n) {
if (!'frequency_tbl' %in% class(f)) {
stop('top_freq can only be applied to frequency tables', call. = FALSE)
if (!"frequency_tbl" %in% class(f)) {
stop("top_freq can only be applied to frequency tables", call. = FALSE)
}
if (!is.numeric(n) | length(n) != 1L) {
stop('For top_freq, `nmax` must be a number of length 1', call. = FALSE)
stop("For top_freq, `nmax` must be a number of length 1", call. = FALSE)
}
top <- f %>% top_n(n, count)
vect <- top %>% pull(item)
@ -562,10 +593,10 @@ diff.frequency_tbl <- function(x, y, ...) { @@ -562,10 +593,10 @@ diff.frequency_tbl <- function(x, y, ...) {
diff.percent = percent(
diff / count.x,
force_zero = TRUE)) %>%
mutate(diff = ifelse(diff %like% '^-',
mutate(diff = ifelse(diff %like% "^-",
diff,
paste0("+", diff)),
diff.percent = ifelse(diff.percent %like% '^-',
diff.percent = ifelse(diff.percent %like% "^-",
diff.percent,
paste0("+", diff.percent)))
@ -590,7 +621,7 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default = @@ -590,7 +621,7 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
big.mark = ifelse(decimal.mark != ",", ",", "."),
...) {
opt <- attr(x, 'opt')
opt <- attr(x, "opt")
if (length(opt$vars) == 0) {
opt$vars <- NULL
@ -666,7 +697,7 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default = @@ -666,7 +697,7 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
}
if (NROW(x) == 0) {
cat('\n\nNo observations.\n')
cat("\n\nNo observations.\n")
return(invisible())
}
@ -680,7 +711,7 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default = @@ -680,7 +711,7 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
if (nrow(x) > opt$nmax & opt$tbl_format != "markdown") {
x.rows <- nrow(x)
x.unprinted <- base::sum(x[(opt$nmax + 1):nrow(x), 'count'], na.rm = TRUE)
x.unprinted <- base::sum(x[(opt$nmax + 1):nrow(x), "count"], na.rm = TRUE)
x.printed <- base::sum(x$count) - x.unprinted
if (opt$nmax.set == TRUE) {
@ -692,18 +723,18 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default = @@ -692,18 +723,18 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
x <- x[1:nmax,]
if (opt$nmax.set == TRUE) {
footer <- paste('[ reached `nmax = ', opt$nmax, '`', sep = '')
footer <- paste("[ reached `nmax = ", opt$nmax, "`", sep = "")
} else {
footer <- '[ reached getOption("max.print.freq")'
}
footer <- paste(footer,
' -- omitted ',
" -- omitted ",
format(x.rows - opt$nmax, big.mark = opt$big.mark),
' entries, n = ',