(v1.0.1.9004) Support dplyr 1.0.0

v1.8.2
parent 3760bcb11e
commit 219cff403f
  1. 1
      .gitignore
  2. 10
      DESCRIPTION
  3. 39
      NAMESPACE
  4. 7
      NEWS.md
  5. 10
      R/ab.R
  6. 1
      R/amr.R
  7. 2
      R/atc_online.R
  8. 75
      R/disk.R
  9. 17
      R/join_microorganisms.R
  10. 4
      R/key_antibiotics.R
  11. 10
      R/mic.R
  12. 48
      R/mo.R
  13. 2
      R/pca.R
  14. 142
      R/progress_estimated.R
  15. 10
      R/rsi.R
  16. BIN
      R/sysdata.rda
  17. 2
      data-raw/country_analysis.R
  18. 40
      data-raw/reproduction_of_microorganisms.R
  19. 2
      docs/404.html
  20. 2
      docs/LICENSE-text.html
  21. 8
      docs/articles/EUCAST.html
  22. 4
      docs/articles/PCA.html
  23. BIN
      docs/articles/PCA_files/figure-html/unnamed-chunk-6-1.png
  24. BIN
      docs/articles/PCA_files/figure-html/unnamed-chunk-7-1.png
  25. 125
      docs/articles/benchmarks.html
  26. BIN
      docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png
  27. BIN
      docs/articles/benchmarks_files/figure-html/unnamed-chunk-6-1.png
  28. 2
      docs/articles/index.html
  29. 2
      docs/authors.html
  30. BIN
      docs/countries.png
  31. BIN
      docs/countries_large.png
  32. 11
      docs/index.html
  33. 59
      docs/news/index.html
  34. 10
      docs/reference/AMR.html
  35. 10
      docs/reference/WHONET.html
  36. 6
      docs/reference/age.html
  37. 6
      docs/reference/age_groups.html
  38. 14
      docs/reference/antibiotics.html
  39. 10
      docs/reference/example_isolates.html
  40. 10
      docs/reference/example_isolates_unclean.html
  41. 2
      docs/reference/ggplot_pca.html
  42. 2
      docs/reference/index.html
  43. 6
      docs/reference/lifecycle.html
  44. 10
      docs/reference/microorganisms.codes.html
  45. 10
      docs/reference/microorganisms.html
  46. 10
      docs/reference/microorganisms.old.html
  47. 10
      docs/reference/pca.html
  48. 10
      docs/reference/rsi_translation.html
  49. 9
      index.md
  50. 1
      man/AMR.Rd
  51. 6
      man/WHONET.Rd
  52. 8
      man/antibiotics.Rd
  53. 6
      man/example_isolates.Rd
  54. 6
      man/example_isolates_unclean.Rd
  55. 6
      man/microorganisms.Rd
  56. 6
      man/microorganisms.codes.Rd
  57. 6
      man/microorganisms.old.Rd
  58. 6
      man/pca.Rd
  59. 6
      man/rsi_translation.Rd
  60. BIN
      pkgdown/logos/countries.png
  61. BIN
      pkgdown/logos/countries_large.png
  62. 2
      tests/testthat/test-join_microorganisms.R

1
.gitignore vendored

@ -20,6 +20,7 @@ vignettes/*.R
^CRAN-RELEASE$
packrat/lib*/
packrat/src/
data-raw/taxa.txt
data-raw/taxon.tab
data-raw/DSMZ_bactnames.xlsx
data-raw/country_analysis_url_token.R

@ -1,6 +1,6 @@
Package: AMR
Version: 1.0.1.9003
Date: 2020-03-08
Version: 1.0.1.9004
Date: 2020-03-14
Title: Antimicrobial Resistance Analysis
Authors@R: c(
person(role = c("aut", "cre"),
@ -46,8 +46,10 @@ Imports:
knitr (>= 1.0.0),
microbenchmark,
pillar,
R6,
rlang (>= 0.3.1),
tidyr (>= 1.0.0)
tidyr (>= 1.0.0),
vctrs (>= 0.2.4)
Suggests:
covr (>= 3.0.1),
curl,
@ -63,5 +65,5 @@ BugReports: https://gitlab.com/msberends/AMR/issues
License: GPL-2 | file LICENSE
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.0.2
RoxygenNote: 7.1.0
Roxygen: list(markdown = TRUE)

@ -1,14 +1,19 @@
# Generated by roxygen2: do not edit by hand
S3method("[",ab)
S3method("[",disk)
S3method("[",mo)
S3method("[<-",ab)
S3method("[<-",disk)
S3method("[<-",mo)
S3method("[[",ab)
S3method("[[",disk)
S3method("[[",mo)
S3method("[[<-",ab)
S3method("[[<-",disk)
S3method("[[<-",mo)
S3method(as.data.frame,ab)
S3method(as.data.frame,disk)
S3method(as.data.frame,mo)
S3method(as.double,mic)
S3method(as.integer,mic)
@ -20,6 +25,7 @@ S3method(as.rsi,mic)
S3method(barplot,mic)
S3method(barplot,rsi)
S3method(c,ab)
S3method(c,disk)
S3method(c,mo)
S3method(droplevels,mic)
S3method(droplevels,rsi)
@ -52,11 +58,21 @@ S3method(skewness,matrix)
S3method(summary,mic)
S3method(summary,mo)
S3method(summary,rsi)
S3method(type_sum,ab)
S3method(type_sum,disk)
S3method(type_sum,mic)
S3method(type_sum,mo)
S3method(type_sum,rsi)
S3method(vec_cast,character.mo)
S3method(vec_cast,mo)
S3method(vec_cast,mo.character)
S3method(vec_cast,mo.mo)
S3method(vec_ptype2,mo)
S3method(vec_ptype_abbr,ab)
S3method(vec_ptype_abbr,disk)
S3method(vec_ptype_abbr,mic)
S3method(vec_ptype_abbr,mo)
S3method(vec_ptype_abbr,rsi)
S3method(vec_ptype_full,ab)
S3method(vec_ptype_full,disk)
S3method(vec_ptype_full,mic)
S3method(vec_ptype_full,mo)
S3method(vec_ptype_full,rsi)
export("%like%")
export("%like_case%")
export(ab_atc)
@ -199,14 +215,19 @@ export(skewness)
export(susceptibility)
export(theme_rsi)
exportMethods("[.ab")
exportMethods("[.disk")
exportMethods("[.mo")
exportMethods("[<-.ab")
exportMethods("[<-.disk")
exportMethods("[<-.mo")
exportMethods("[[.ab")
exportMethods("[[.disk")
exportMethods("[[.mo")
exportMethods("[[<-.ab")
exportMethods("[[<-.disk")
exportMethods("[[<-.mo")
exportMethods(as.data.frame.ab)
exportMethods(as.data.frame.disk)
exportMethods(as.data.frame.mo)
exportMethods(as.double.mic)
exportMethods(as.integer.mic)
@ -214,6 +235,7 @@ exportMethods(as.numeric.mic)
exportMethods(barplot.mic)
exportMethods(barplot.rsi)
exportMethods(c.ab)
exportMethods(c.disk)
exportMethods(c.mo)
exportMethods(droplevels.mic)
exportMethods(droplevels.rsi)
@ -244,6 +266,7 @@ exportMethods(skewness.matrix)
exportMethods(summary.mic)
exportMethods(summary.mo)
exportMethods(summary.rsi)
importFrom(R6,R6Class)
importFrom(cleaner,freq)
importFrom(cleaner,freq.default)
importFrom(cleaner,percentage)
@ -294,7 +317,6 @@ importFrom(dplyr,mutate_at)
importFrom(dplyr,n)
importFrom(dplyr,n_distinct)
importFrom(dplyr,n_groups)
importFrom(dplyr,progress_estimated)
importFrom(dplyr,pull)
importFrom(dplyr,rename)
importFrom(dplyr,row_number)
@ -317,7 +339,6 @@ importFrom(graphics,text)
importFrom(knitr,kable)
importFrom(microbenchmark,microbenchmark)
importFrom(pillar,pillar_shaft)
importFrom(pillar,type_sum)
importFrom(rlang,as_label)
importFrom(rlang,enquos)
importFrom(rlang,eval_tidy)
@ -334,3 +355,7 @@ importFrom(tidyr,pivot_wider)
importFrom(utils,adist)
importFrom(utils,browseURL)
importFrom(utils,menu)
importFrom(vctrs,vec_cast)
importFrom(vctrs,vec_ptype2)
importFrom(vctrs,vec_ptype_abbr)
importFrom(vctrs,vec_ptype_full)

@ -1,10 +1,13 @@
# AMR 1.0.1.9003
## <small>Last updated: 08-Mar-2020</small>
# AMR 1.0.1.9004
## <small>Last updated: 14-Mar-2020</small>
### New
* Support for easy principal component analysis for AMR, using the new `pca()` function
* Plotting biplots for principal component analysis using the new `ggplot_pca()` function
### Other
* Support for the upcoming `dplyr` version 1.0.0
# AMR 1.0.1
### Changed

@ -396,9 +396,15 @@ c.ab <- function(x, ...) {
class_integrity_check(y, "antimicrobial code", antibiotics$ab)
}
#' @importFrom pillar type_sum
#' @importFrom vctrs vec_ptype_abbr
#' @export
type_sum.ab <- function(x) {
vec_ptype_abbr.ab <- function(x, ...) {
"ab"
}
#' @importFrom vctrs vec_ptype_full
#' @export
vec_ptype_full.ab <- function(x, ...) {
"ab"
}

@ -41,6 +41,7 @@
#' - Getting SNOMED codes of a microorganism, or get its name associated with a SNOMED code
#' - Getting LOINC codes of an antibiotic, or get its name associated with a LOINC code
#' - Machine reading the EUCAST and CLSI guidelines from 2011-2020 to translate MIC values and disk diffusion diameters to R/SI
#' - Principal component analysis for AMR
#' @section Read more on our website!:
#' On our website <https://msberends.gitlab.io/AMR> you can find [a comprehensive tutorial](https://msberends.gitlab.io/AMR/articles/AMR.html) about how to conduct AMR analysis, the [complete documentation of all functions](https://msberends.gitlab.io/AMR/reference) (which reads a lot easier than here in R) and [an example analysis using WHONET data](https://msberends.gitlab.io/AMR/articles/WHONET.html).

@ -56,7 +56,7 @@
#' - `"ml"` = milliliter (e.g. eyedrops)
#' @export
#' @rdname atc_online
#' @importFrom dplyr %>% progress_estimated
#' @importFrom dplyr %>%
#' @inheritSection AMR Read more on our website!
#' @source <https://www.whocc.no/atc_ddd_alterations__cumulative/ddd_alterations/abbrevations/>
#' @examples

@ -80,7 +80,7 @@ as.disk <- function(x, na.rm = FALSE) {
list_missing, call. = FALSE)
}
class(x) <- c("disk", "integer")
class(x) <- "disk"
x
}
}
@ -97,6 +97,20 @@ is.disk <- function(x) {
inherits(x, "disk")
}
#' @exportMethod as.data.frame.disk
#' @export
#' @noRd
as.data.frame.disk <- function(x, ...) {
# same as as.data.frame.integer but with removed stringsAsFactors, since it will be class "disk"
nm <- paste(deparse(substitute(x), width.cutoff = 500L),
collapse = " ")
if (!"nm" %in% names(list(...))) {
as.data.frame.vector(x, ..., nm = nm)
} else {
as.data.frame.vector(x, ...)
}
}
#' @exportMethod print.disk
#' @export
#' @noRd
@ -105,12 +119,6 @@ print.disk <- function(x, ...) {
print(as.integer(x), quote = FALSE)
}
#' @importFrom pillar type_sum
#' @export
type_sum.disk <- function(x) {
"disk"
}
#' @importFrom pillar pillar_shaft
#' @export
pillar_shaft.disk <- function(x, ...) {
@ -118,3 +126,56 @@ pillar_shaft.disk <- function(x, ...) {
out[is.na(x)] <- pillar::style_na(NA)
pillar::new_pillar_shaft_simple(out, align = "right", min_width = 3)
}
#' @importFrom vctrs vec_ptype_abbr
#' @export
vec_ptype_abbr.disk <- function(x, ...) {
"disk"
}
#' @importFrom vctrs vec_ptype_full
#' @export
vec_ptype_full.disk <- function(x, ...) {
"disk"
}
#' @exportMethod [.disk
#' @export
#' @noRd
"[.disk" <- function(x, ...) {
y <- NextMethod()
attributes(y) <- attributes(x)
y
}
#' @exportMethod [[.disk
#' @export
#' @noRd
"[[.disk" <- function(x, ...) {
y <- NextMethod()
attributes(y) <- attributes(x)
y
}
#' @exportMethod [<-.disk
#' @export
#' @noRd
"[<-.disk" <- function(i, j, ..., value) {
y <- NextMethod()
attributes(y) <- attributes(i)
y
}
#' @exportMethod [[<-.disk
#' @export
#' @noRd
"[[<-.disk" <- function(i, j, ..., value) {
y <- NextMethod()
attributes(y) <- attributes(i)
y
}
#' @exportMethod c.disk
#' @export
#' @noRd
c.disk <- function(x, ...) {
y <- NextMethod()
attributes(y) <- attributes(x)
y
}

@ -50,13 +50,14 @@
#' df_joined <- left_join_microorganisms(df, "bacteria")
#' colnames(df_joined)
inner_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
check_dataset_integrity()
checked <- joins_check_df(x, by)
x <- checked$x
by <- checked$by
join <- suppressWarnings(
dplyr::inner_join(x = x, y = microorganisms, by = by, suffix = suffix, ...)
)
if (nrow(join) > nrow(x)) {
if (NROW(join) > NROW(x)) {
warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.")
}
join
@ -65,13 +66,14 @@ inner_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
#' @rdname join
#' @export
left_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
check_dataset_integrity()
checked <- joins_check_df(x, by)
x <- checked$x
by <- checked$by
join <- suppressWarnings(
dplyr::left_join(x = x, y = microorganisms, by = by, suffix = suffix, ...)
)
if (nrow(join) > nrow(x)) {
if (NROW(join) > NROW(x)) {
warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.")
}
join
@ -80,13 +82,14 @@ left_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
#' @rdname join
#' @export
right_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
check_dataset_integrity()
checked <- joins_check_df(x, by)
x <- checked$x
by <- checked$by
join <- suppressWarnings(
dplyr::right_join(x = x, y = microorganisms, by = by, suffix = suffix, ...)
)
if (nrow(join) > nrow(x)) {
if (NROW(join) > NROW(x)) {
warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.")
}
join
@ -95,13 +98,14 @@ right_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
#' @rdname join
#' @export
full_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
check_dataset_integrity()
checked <- joins_check_df(x, by)
x <- checked$x
by <- checked$by
join <- suppressWarnings(
dplyr::full_join(x = x, y = microorganisms, by = by, suffix = suffix, ...)
)
if (nrow(join) > nrow(x)) {
if (NROW(join) > NROW(x)) {
warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.")
}
join
@ -110,6 +114,7 @@ full_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
#' @rdname join
#' @export
semi_join_microorganisms <- function(x, by = NULL, ...) {
check_dataset_integrity()
checked <- joins_check_df(x, by)
x <- checked$x
by <- checked$by
@ -121,6 +126,7 @@ semi_join_microorganisms <- function(x, by = NULL, ...) {
#' @rdname join
#' @export
anti_join_microorganisms <- function(x, by = NULL, ...) {
check_dataset_integrity()
checked <- joins_check_df(x, by)
x <- checked$x
by <- checked$by
@ -131,7 +137,7 @@ anti_join_microorganisms <- function(x, by = NULL, ...) {
joins_check_df <- function(x, by) {
if (!any(class(x) %in% c("data.frame", "matrix"))) {
x <- data.frame(mo = as.character(x), stringsAsFactors = FALSE)
x <- data.frame(mo = as.mo(x), stringsAsFactors = FALSE)
if (is.null(by)) {
by <- "mo"
}
@ -142,6 +148,7 @@ joins_check_df <- function(x, by) {
if (is.na(by)) {
if ("mo" %in% colnames(x)) {
by <- "mo"
x[, "mo"] <- as.mo(x[, "mo"])
} else {
stop("Cannot join - no column found with name or class `mo`.", call. = FALSE)
}

@ -245,7 +245,7 @@ key_antibiotics <- function(x,
}
#' @importFrom dplyr progress_estimated %>%
#' @importFrom dplyr %>%
#' @rdname key_antibiotics
#' @export
key_antibiotics_equal <- function(y,
@ -270,7 +270,7 @@ key_antibiotics_equal <- function(y,
result <- logical(length(x))
if (info_needed == TRUE) {
p <- dplyr::progress_estimated(length(x))
p <- progress_estimated(length(x))
}
for (i in seq_len(length(x))) {

@ -235,9 +235,15 @@ barplot.mic <- function(height,
axis(2, seq(0, max(table(droplevels.factor(height)))))
}
#' @importFrom pillar type_sum
#' @importFrom vctrs vec_ptype_abbr
#' @export
type_sum.mic <- function(x) {
vec_ptype_abbr.mic <- function(x, ...) {
"mic"
}
#' @importFrom vctrs vec_ptype_full
#' @export
vec_ptype_full.mic <- function(x, ...) {
"mic"
}

@ -78,9 +78,9 @@
#' - Uncertainty level 3: allow all of level 1 and 2, strip off text elements from the end, allow any part of a taxonomic name.
#'
#' This leads to e.g.:
#' - `"Streptococcus group B (known as S. agalactiae)"`. The text between brackets will be removed and a warning will be thrown that the result *Streptococcus group B* (`B_STRPT_GRPB`) needs review.
#' - `"S. aureus - please mind: MRSA"`. The last word will be stripped, after which the function will try to find a match. If it does not, the second last word will be stripped, etc. Again, a warning will be thrown that the result *Staphylococcus aureus* (`B_STPHY_AURS`) needs review.
#' - `"Fluoroquinolone-resistant Neisseria gonorrhoeae"`. The first word will be stripped, after which the function will try to find a match. A warning will be thrown that the result *Neisseria gonorrhoeae* (`B_NESSR_GNRR`) needs review.
#' - `"Streptococcus group B (known as S. agalactiae)"`. The text between brackets will be removed and a warning will be thrown that the result *Streptococcus group B* (``r as.mo("Streptococcus group B")``) needs review.
#' - `"S. aureus - please mind: MRSA"`. The last word will be stripped, after which the function will try to find a match. If it does not, the second last word will be stripped, etc. Again, a warning will be thrown that the result *Staphylococcus aureus* (``r as.mo("Staphylococcus aureus")``) needs review.
#' - `"Fluoroquinolone-resistant Neisseria gonorrhoeae"`. The first word will be stripped, after which the function will try to find a match. A warning will be thrown that the result *Neisseria gonorrhoeae* (``r as.mo("Neisseria gonorrhoeae")``) needs review.
#'
#' The level of uncertainty can be set using the argument `allow_uncertain`. The default is `allow_uncertain = TRUE`, which is equal to uncertainty level 2. Using `allow_uncertain = FALSE` is equal to uncertainty level 0 and will skip all rules. You can also use e.g. `as.mo(..., allow_uncertain = 1)` to only allow up to level 1 uncertainty.
#'
@ -234,7 +234,7 @@ is.mo <- function(x) {
inherits(x, "mo")
}
#' @importFrom dplyr %>% pull left_join n_distinct progress_estimated filter distinct
#' @importFrom dplyr %>% pull left_join n_distinct filter distinct
#' @importFrom data.table data.table as.data.table setkey
#' @importFrom crayon magenta red blue silver italic
#' @importFrom cleaner percentage
@ -1675,12 +1675,48 @@ print.mo <- function(x, ...) {
print.default(x, quote = FALSE)
}
#' @importFrom pillar type_sum
#' @importFrom vctrs vec_ptype_abbr
#' @export
type_sum.mo <- function(x) {
vec_ptype_abbr.mo <- function(x, ...) {
"mo"
}
#' @importFrom vctrs vec_ptype_full
#' @export
vec_ptype_full.mo <- function(x, ...) {
"mo"
}
#' @importFrom vctrs vec_ptype2
#' @export
vec_ptype2.mo <- function(x, y, ...) {
vctrs::vec_ptype2(x = as.character(x), y = as.character(y), ...)
}
#' @importFrom vctrs vec_cast
#' @export
vec_cast.mo <- function(x, to, ...) {
as.mo(vctrs::vec_cast(x = as.character(x), to = as.character(to), ...))
}
#' @importFrom vctrs vec_cast
#' @export
vec_cast.mo.mo <- function(x, to, ...) {
as.mo(vctrs::vec_cast(x = as.character(x), to = as.character(to), ...))
}
#' @importFrom vctrs vec_cast
#' @export
vec_cast.mo.character <- function(x, to, ...) {
vctrs::vec_cast(x = as.character(x), to = as.character(to), ...)
}
#' @importFrom vctrs vec_cast
#' @export
vec_cast.character.mo <- function(x, to, ...) {
as.mo(vctrs::vec_cast(x = as.character(x), to = as.character(to), ...))
}
#' @importFrom pillar pillar_shaft
#' @export
pillar_shaft.mo <- function(x, ...) {

@ -22,7 +22,7 @@
#' Principal Component Analysis (for AMR)
#'
#' Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels, and automatic filtering on only suitable (i.e. non-empty and numeric) variables.
#' @inheritSection lifecycle Experimental lifecycle
#' @inheritSection lifecycle Maturing lifecycle
#' @param x a [data.frame] containing numeric columns
#' @param ... columns of `x` to be selected for PCA
#' @inheritParams stats::prcomp

@ -0,0 +1,142 @@
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# SOURCE #
# https://gitlab.com/msberends/AMR #
# #
# LICENCE #
# (c) 2018-2020 Berends MS, Luz CF et al. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
# Visit our website for more info: https://msberends.gitlab.io/AMR. #
# ==================================================================== #
# taken from https://github.com/tidyverse/dplyr/blob/f306d8da8f27c2e6abbd3c70f219fef7ca61fbb5/R/progress.R
# when it was still in the dplyr package
progress_estimated <- function(n, min_time = 0) {
Progress$new(n, min_time = min_time)
}
#' @importFrom R6 R6Class
Progress <- R6::R6Class("Progress",
public = list(
n = NULL,
i = 0,
init_time = NULL,
stopped = FALSE,
stop_time = NULL,
min_time = NULL,
last_update = NULL,
initialize = function(n, min_time = 0, ...) {
self$n <- n
self$min_time <- min_time
self$begin()
},
begin = function() {
"Initialise timer. Call this before beginning timing."
self$i <- 0
self$last_update <- self$init_time <- now()
self$stopped <- FALSE
self
},
pause = function(x) {
"Sleep for x seconds. Useful for testing."
Sys.sleep(x)
self
},
width = function() {
getOption("width") - nchar("|100% ~ 99.9 h remaining") - 2
},
tick = function() {
"Process one element"
if (self$stopped) return(self)
if (self$i == self$n) stop("No more ticks")
self$i <- self$i + 1
self
},
stop = function() {
if (self$stopped) return(self)
self$stopped <- TRUE
self$stop_time <- now()
self
},
print = function(...) {
if (!isTRUE(getOption("dplyr.show_progress")) || # user sepecifies no progress
!interactive() || # not an interactive session
!is.null(getOption("knitr.in.progress"))) { # dplyr used within knitr document
return(invisible(self))
}
now_ <- now()
if (now_ - self$init_time < self$min_time || now_ - self$last_update < 0.05) {
return(invisible(self))
}
self$last_update <- now_
if (self$stopped) {
overall <- show_time(self$stop_time - self$init_time)
if (self$i == self$n) {
cat_line("Completed after ", overall)
cat("\n")
} else {
cat_line("Killed after ", overall)
cat("\n")
}
return(invisible(self))
}
avg <- (now() - self$init_time) / self$i
time_left <- (self$n - self$i) * avg
nbars <- trunc(self$i / self$n * self$width())
cat_line(
"|", str_rep("=", nbars), str_rep(" ", self$width() - nbars), "|",
format(round(self$i / self$n * 100), width = 3), "% ",
"~", show_time(time_left), " remaining"
)
invisible(self)
}
)
)
cat_line <- function(...) {
msg <- paste(..., sep = "", collapse = "")
gap <- max(c(0, getOption("width") - nchar(msg, "width")))
cat("\r", msg, rep.int(" ", gap), sep = "")
utils::flush.console()
}
str_rep <- function(x, i) {
paste(rep.int(x, i), collapse = "")
}
show_time <- function(x) {
if (x < 60) {
paste(round(x), "s")
} else if (x < 60 * 60) {
paste(round(x / 60), "m")
} else {
paste(round(x / (60 * 60)), "h")
}
}
now <- function() proc.time()[[3]]

@ -659,9 +659,15 @@ barplot.rsi <- function(height,
}
}
#' @importFrom pillar type_sum
#' @importFrom vctrs vec_ptype_abbr
#' @export
type_sum.rsi <- function(x) {
vec_ptype_abbr.rsi <- function(x, ...) {
"rsi"
}
#' @importFrom vctrs vec_ptype_full
#' @export
vec_ptype_full.rsi <- function(x, ...) {
"rsi"
}

Binary file not shown.

@ -50,7 +50,7 @@ rm(data_json)
# add country data based on IP address and ipinfo.io API
unique_ip <- unique(data$ipaddress)
ip_tbl <- GET_df(unique_ip[1])
p <- progress_estimated(n = length(unique_ip) - 1, min_time = 0)
p <- AMR:::progress_estimated(n = length(unique_ip) - 1, min_time = 0)
for (i in 2:length(unique_ip)) {
p$tick()$print()
ip_tbl <- ip_tbl %>%

@ -23,6 +23,7 @@
# Data retrieved from the Catalogue of Life (CoL) through the Encyclopaedia of Life:
# https://opendata.eol.org/dataset/catalogue-of-life/
# https://doi.org/10.15468/rffz4x
# (download the resource file with a name like "Catalogue of Life yyyy-mm-dd")
# and from the Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures
# https://www.dsmz.de/support/bacterial-nomenclature-up-to-date-downloads.html
@ -32,7 +33,8 @@ library(dplyr)
library(AMR)
# unzip and extract taxon.tab (around 1.5 GB) from the CoL archive, then:
data_col <- data.table::fread("data-raw/taxon.tab")
# data_col <- data.table::fread("data-raw/taxon.tab")
data_col <- data.table::fread("data-raw/taxa.txt", quote = "")
# read the xlsx file from DSMZ (only around 2.5 MB):
data_dsmz <- readxl::read_xlsx("data-raw/DSMZ_bactnames.xlsx")
@ -66,7 +68,7 @@ data_col <- data_col %>%
subspecies = infraspecificEpithet,
rank = taxonRank,
ref = scientificNameAuthorship,
species_id = furtherInformationURL)
species_id = references)
data_col$source <- "CoL"
# clean data_dsmz
@ -761,6 +763,7 @@ new_families <- MOs %>%
class(MOs$mo) <- "character"
MOs <- rbind(MOs %>% filter(!(rank == "family" & fullname %in% new_families)),
AMR::microorganisms %>%
select(-snomed) %>%
filter(family == "Enterobacteriaceae" & rank == "family") %>%
rbind(., ., ., ., ., ., .) %>%
mutate(fullname = new_families,
@ -794,7 +797,9 @@ colnames(MOs)
MOs %>% arrange(fullname) %>% filter(!fullname %in% AMR::microorganisms$fullname) %>% View()
MOs.old %>% arrange(fullname) %>% filter(!fullname %in% AMR::microorganisms.old$fullname) %>% View()
# and the ones we lost:
AMR::microorganisms %>% filter(!fullname %in% MOs$fullname) %>% View()
AMR::microorganisms %>% filter(!fullname %in% MOs$fullname) %>% View() # based on fullname
AMR::microorganisms %>% filter(!mo %in% MOs$mo) %>% View() # based on mo
AMR::microorganisms %>% filter(!mo %in% MOs$mo & !fullname %in% MOs$fullname) %>% View()
# and these IDs have changed:
old_new <- MOs %>%
mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
@ -805,24 +810,41 @@ old_new <- MOs %>%
View(old_new)
# to keep all the old IDs:
# MOs <- MOs %>% filter(!mo %in% old_new$mo_new) %>%
# MOs <- MOs %>% filter(!mo %in% old_new$mo_new) %>%
# rbind(microorganisms %>%
# filter(mo %in% old_new$mo_old) %>%
# select(mo, fullname) %>%
# left_join(MOs %>%
# left_join(MOs %>%
# select(-mo), by = "fullname"))
# and these codes are now missing (which will throw a unit test error):
AMR::microorganisms.codes %>% filter(!mo %in% MOs$mo)
AMR::rsi_translation %>% filter(!mo %in% MOs$mo)
AMR::microorganisms.translation %>% filter(!mo_new %in% MOs$mo)
AMR:::microorganisms.translation %>% filter(!mo_new %in% MOs$mo) %>% View()
# this is how to fix it
microorganisms.codes <- AMR::microorganisms.codes %>%
left_join(MOs %>%
mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
left_join(AMR::microorganisms %>%
mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
select(mo, kingdom_fullname), by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
left_join(AMR::microorganisms %>%
transmute(mo, kingdom_fullname = paste(kingdom, fullname)),
by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
select(mo_old, mo_new),
by = c("mo" = "mo_old")) %>%
select(code, mo = mo_new) %>%
filter(!is.na(mo))
microorganisms.codes %>% filter(!mo %in% MOs$mo)
# and for microorganisms.translation:
microorganisms.translation <- AMR:::microorganisms.translation %>%
select(mo = mo_new) %>%
left_join(AMR::microorganisms %>%
transmute(mo, kingdom_fullname = paste(kingdom, fullname)),
by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
select(mo_old, mo_new)
left_join(MOs %>%
mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
left_join(AMR::microorganisms %>%
transmute(mo, kingdom_fullname = paste(kingdom, fullname)),
by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
select(mo_old, mo_new),
by = c("mo" = "mo_old")) %>%
select(code, mo = mo_new) %>%

@ -78,7 +78,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="https://msberends.gitlab.io/AMR/index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9004</span>
</span>
</div>

@ -78,7 +78,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9004</span>
</span>
</div>

@ -39,7 +39,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9004</span>
</span>
</div>
@ -77,9 +77,9 @@
</li>
<li>
<a href="../articles/PCA.html">
<span class="fa fa-compress-alt"></span>
<span class="fa fa-compress"></span>
Conduct Principal Component Analysis for AMR
Conduct principal component analysis for AMR
</a>
</li>
<li>
@ -186,7 +186,7 @@
<h1>How to apply EUCAST rules</h1>
<h4 class="author">Matthijs S. Berends</h4>
<h4 class="date">07 March 2020</h4>
<h4 class="date">14 March 2020</h4>
<div class="hidden name"><code>EUCAST.Rmd</code></div>

@ -39,7 +39,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9004</span>
</span>
</div>
@ -186,7 +186,7 @@
<h1>How to conduct principal component analysis (PCA) for AMR</h1>
<h4 class="author">Matthijs S. Berends</h4>
<h4 class="date">07 March 2020</h4>
<h4 class="date">14 March 2020</h4>
<div class="hidden name"><code>PCA.Rmd</code></div>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 142 KiB

After

Width:  |  Height:  |  Size: 144 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 148 KiB

After

Width:  |  Height:  |  Size: 148 KiB

@ -39,7 +39,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9004</span>
</span>
</div>
@ -186,7 +186,7 @@
<h1>Benchmarks</h1>
<h4 class="author">Matthijs S. Berends</h4>
<h4 class="date">07 March 2020</h4>
<h4 class="date">14 March 2020</h4>
<div class="hidden name"><code>benchmarks.Rmd</code></div>
@ -220,36 +220,21 @@
<span id="cb2-16"><a href="#cb2-16"></a> <span class="dt">times =</span> <span class="dv">10</span>)</span>
<span id="cb2-17"><a href="#cb2-17"></a><span class="kw"><a href="https://rdrr.io/r/base/print.html">print</a></span>(S.aureus, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">2</span>)</span>
<span id="cb2-18"><a href="#cb2-18"></a><span class="co"># Unit: milliseconds</span></span>
<span id="cb2-19"><a href="#cb2-19"></a><span class="co"># expr min lq mean median uq max</span></span>
<span id="cb2-20"><a href="#cb2-20"></a><span class="co"># as.mo("sau") 8.0 8.2 9.1 8.4 8.5 16</span></span>
<span id="cb2-21"><a href="#cb2-21"></a><span class="co"># as.mo("stau") 37.0 40.0 51.0 52.0 60.0 76</span></span>
<span id="cb2-22"><a href="#cb2-22"></a><span class="co"># as.mo("STAU") 36.0 38.0 58.0 60.0 68.0 100</span></span>
<span id="cb2-23"><a href="#cb2-23"></a><span class="co"># as.mo("staaur") 8.2 8.4 9.5 8.6 8.9 14</span></span>
<span id="cb2-24"><a href="#cb2-24"></a><span class="co"># as.mo("STAAUR") 8.2 8.3 15.0 9.2 14.0 53</span></span>
<span id="cb2-25"><a href="#cb2-25"></a><span class="co"># as.mo("S. aureus") 13.0 21.0 64.0 21.0 45.0 260</span></span>
<span id="cb2-26"><a href="#cb2-26"></a><span class="co"># as.mo("S aureus") 13.0 14.0 33.0 24.0 44.0 76</span></span>
<span id="cb2-27"><a href="#cb2-27"></a><span class="co"># as.mo("Staphylococcus aureus") 4.7 4.8 9.9 6.8 7.9 42</span></span>
<span id="cb2-28"><a href="#cb2-28"></a><span class="co"># as.mo("Staphylococcus aureus (MRSA)") 620.0 640.0 770.0 700.0 860.0 1100</span></span>
<span id="cb2-29"><a href="#cb2-29"></a><span class="co"># as.mo("Sthafilokkockus aaureuz") 330.0 350.0 460.0 490.0 560.0 570</span></span>
<span id="cb2-30"><a href="#cb2-30"></a><span class="co"># as.mo("MRSA") 8.1 8.3 14.0 12.0 13.0 48</span></span>
<span id="cb2-31"><a href="#cb2-31"></a><span class="co"># as.mo("VISA") 24.0 25.0 34.0 26.0 38.0 59</span></span>
<span id="cb2-32"><a href="#cb2-32"></a><span class="co"># as.mo("VRSA") 23.0 24.0 37.0 27.0 39.0 78</span></span>
<span id="cb2-33"><a href="#cb2-33"></a><span class="co"># as.mo(22242419) 120.0 130.0 150.0 140.0 160.0 240</span></span>
<span id="cb2-34"><a href="#cb2-34"></a><span class="co"># neval</span></span>
<span id="cb2-35"><a href="#cb2-35"></a><span class="co"># 10</span></span>
<span id="cb2-36"><a href="#cb2-36"></a><span class="co"># 10</span></span>
<span id="cb2-37"><a href="#cb2-37"></a><span class="co"># 10</span></span>
<span id="cb2-38"><a href="#cb2-38"></a><span class="co"># 10</span></span>
<span id="cb2-39"><a href="#cb2-39"></a><span class="co"># 10</span></span>
<span id="cb2-40"><a href="#cb2-40"></a><span class="co"># 10</span></span>
<span id="cb2-41"><a href="#cb2-41"></a><span class="co"># 10</span></span>
<span id="cb2-42"><a href="#cb2-42"></a><span class="co"># 10</span></span>
<span id="cb2-43"><a href="#cb2-43"></a><span class="co"># 10</span></span>
<span id="cb2-44"><a href="#cb2-44"></a><span class="co"># 10</span></span>
<span id="cb2-45"><a href="#cb2-45"></a><span class="co"># 10</span></span>
<span id="cb2-46"><a href="#cb2-46"></a><span class="co"># 10</span></span>
<span id="cb2-47"><a href="#cb2-47"></a><span class="co"># 10</span></span>
<span id="cb2-48"><a href="#cb2-48"></a><span class="co"># 10</span></span></code></pre></div>
<span id="cb2-19"><a href="#cb2-19"></a><span class="co"># expr min lq mean median uq max neval</span></span>
<span id="cb2-20"><a href="#cb2-20"></a><span class="co"># as.mo("sau") 8.9 9.3 9.6 9.6 9.9 10 10</span></span>
<span id="cb2-21"><a href="#cb2-21"></a><span class="co"># as.mo("stau") 41.0 41.0 51.0 43.0 67.0 74 10</span></span>
<span id="cb2-22"><a href="#cb2-22"></a><span class="co"># as.mo("STAU") 39.0 41.0 49.0 42.0 56.0 72 10</span></span>
<span id="cb2-23"><a href="#cb2-23"></a><span class="co"># as.mo("staaur") 9.0 9.2 9.7 9.5 9.9 11 10</span></span>
<span id="cb2-24"><a href="#cb2-24"></a><span class="co"># as.mo("STAAUR") 9.5 9.8 24.0 21.0 38.0 45 10</span></span>
<span id="cb2-25"><a href="#cb2-25"></a><span class="co"># as.mo("S. aureus") 15.0 16.0 26.0 18.0 38.0 61 10</span></span>
<span id="cb2-26"><a href="#cb2-26"></a><span class="co"># as.mo("S aureus") 15.0 15.0 17.0 16.0 17.0 21 10</span></span>
<span id="cb2-27"><a href="#cb2-27"></a><span class="co"># as.mo("Staphylococcus aureus") 5.2 5.6 8.4 6.0 6.5 30 10</span></span>
<span id="cb2-28"><a href="#cb2-28"></a><span class="co"># as.mo("Staphylococcus aureus (MRSA)") 640.0 690.0 710.0 710.0 720.0 760 10</span></span>
<span id="cb2-29"><a href="#cb2-29"></a><span class="co"># as.mo("Sthafilokkockus aaureuz") 350.0 360.0 420.0 400.0 490.0 510 10</span></span>
<span id="cb2-30"><a href="#cb2-30"></a><span class="co"># as.mo("MRSA") 9.2 9.3 16.0 10.0 10.0 49 10</span></span>
<span id="cb2-31"><a href="#cb2-31"></a><span class="co"># as.mo("VISA") 25.0 27.0 46.0 56.0 57.0 60 10</span></span>
<span id="cb2-32"><a href="#cb2-32"></a><span class="co"># as.mo("VRSA") 26.0 27.0 39.0 28.0 32.0 120 10</span></span>
<span id="cb2-33"><a href="#cb2-33"></a><span class="co"># as.mo(22242419) 120.0 140.0 170.0 140.0 150.0 410 10</span></span></code></pre></div>
<p><img src="benchmarks_files/figure-html/unnamed-chunk-4-1.png" width="562.5"></p>
<p>In the table above, all measurements are shown in milliseconds (thousands of seconds). A value of 5 milliseconds means it can determine 200 input values per second. It case of 100 milliseconds, this is only 10 input values per second.</p>
<p>To achieve this speed, the <code>as.mo</code> function also takes into account the prevalence of human pathogenic microorganisms. The downside of this is of course that less prevalent microorganisms will be determined less fast. See this example for the ID of <em>Methanosarcina semesiae</em> (<code>B_MTHNSR_SEMS</code>), a bug probably never found before in humans:</p>
@ -261,19 +246,19 @@
<span id="cb3-6"><a href="#cb3-6"></a> <span class="dt">times =</span> <span class="dv">10</span>)</span>
<span id="cb3-7"><a href="#cb3-7"></a><span class="kw"><a href="https://rdrr.io/r/base/print.html">print</a></span>(M.semesiae, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">4</span>)</span>
<span id="cb3-8"><a href="#cb3-8"></a><span class="co"># Unit: milliseconds</span></span>
<span id="cb3-9"><a href="#cb3-9"></a><span class="co"># expr min lq mean median uq</span></span>
<span id="cb3-10"><a href="#cb3-10"></a><span class="co"># as.mo("metsem") 1349.000 1352.000 1597.000 1411.000 1983.000</span></span>
<span id="cb3-11"><a href="#cb3-11"></a><span class="co"># as.mo("METSEM") 1316.000 2146.000 2069.000 2226.000 2245.000</span></span>
<span id="cb3-12"><a href="#cb3-12"></a><span class="co"># as.mo("M. semesiae") 13.330 14.110 32.960 21.840 53.090</span></span>
<span id="cb3-13"><a href="#cb3-13"></a><span class="co"># as.mo("M. semesiae") 13.730 20.960 29.720 21.430 40.000</span></span>
<span id="cb3-14"><a href="#cb3-14"></a><span class="co"># as.mo("Methanosarcina semesiae") 4.802 5.171 6.667 6.551 8.036</span></span>
<span id="cb3-15"><a href="#cb3-15"></a><span class="co"># max neval</span></span>
<span id="cb3-16"><a href="#cb3-16"></a><span class="co"># 2184.000 10</span></span>
<span id="cb3-17"><a href="#cb3-17"></a><span class="co"># 2337.000 10</span></span>
<span id="cb3-18"><a href="#cb3-18"></a><span class="co"># 62.780 10</span></span>
<span id="cb3-19"><a href="#cb3-19"></a><span class="co"># 64.510 10</span></span>
<span id="cb3-20"><a href="#cb3-20"></a><span class="co"># 8.735 10</span></span></code></pre></div>
<p>That takes 6.1 times as much time on average. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like <em>Methanosarcina semesiae</em>) are always very fast and only take some thousands of seconds to coerce - they are the most probable input from most data sets.</p>
<span id="cb3-9"><a href="#cb3-9"></a><span class="co"># expr min lq mean median uq</span></span>
<span id="cb3-10"><a href="#cb3-10"></a><span class="co"># as.mo("metsem") 1485.000 1507.000 1524.00 1519.000 1538.000</span></span>
<span id="cb3-11"><a href="#cb3-11"></a><span class="co"># as.mo("METSEM") 1371.000 1495.000 1557.00 1567.000 1633.000</span></span>
<span id="cb3-12"><a href="#cb3-12"></a><span class="co"># as.mo("M. semesiae") 16.010 16.310 25.38 16.480 42.840</span></span>
<span id="cb3-13"><a href="#cb3-13"></a><span class="co"># as.mo("M. semesiae") 15.700 15.900 16.74 16.370 17.480</span></span>
<span id="cb3-14"><a href="#cb3-14"></a><span class="co"># as.mo("Methanosarcina semesiae") 5.885 6.116 11.79 6.347 8.155</span></span>
<span id="cb3-15"><a href="#cb3-15"></a><span class="co"># max neval</span></span>
<span id="cb3-16"><a href="#cb3-16"></a><span class="co"># 1577.00 10</span></span>
<span id="cb3-17"><a href="#cb3-17"></a><span class="co"># 1663.00 10</span></span>
<span id="cb3-18"><a href="#cb3-18"></a><span class="co"># 48.53 10</span></span>
<span id="cb3-19"><a href="#cb3-19"></a><span class="co"># 18.55 10</span></span>
<span id="cb3-20"><a href="#cb3-20"></a><span class="co"># 32.92 10</span></span></code></pre></div>
<p>That takes 5.5 times as much time on average. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like <em>Methanosarcina semesiae</em>) are always very fast and only take some thousands of seconds to coerce - they are the most probable input from most data sets.</p>
<p>In the figure below, we compare <em>Escherichia coli</em> (which is very common) with <em>Prevotella brevis</em> (which is moderately common) and with <em>Methanosarcina semesiae</em> (which is uncommon):</p>
<p><img src="benchmarks_files/figure-html/unnamed-chunk-6-1.png" width="900"></p>
<p>Uncommon microorganisms take a lot more time than common microorganisms. To relieve this pitfall and further improve performance, two important calculations take almost no time at all: <strong>repetitive results</strong> and <strong>already precalculated results</strong>.</p>
@ -287,11 +272,11 @@
<span id="cb4-4"><a href="#cb4-4"></a><span class="st"> </span><span class="co"># keep only the unique ones</span></span>
<span id="cb4-5"><a href="#cb4-5"></a><span class="st"> </span><span class="kw"><a href="https://rdrr.io/r/base/unique.html">unique</a></span>() <span class="op">%&gt;%</span></span>
<span id="cb4-6"><a href="#cb4-6"></a><span class="st"> </span><span class="co"># pick 50 of them at random</span></span>
<span id="cb4-7"><a href="#cb4-7"></a><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/sample.html">sample</a></span>(<span class="dv">50</span>) <span class="op">%&gt;%</span></span>
<span id="cb4-7"><a href="#cb4-7"></a><span class="st"> </span><span class="kw"><a href="https://rdrr.io/r/base/sample.html">sample</a></span>(<span class="dv">50</span>) <span class="op">%&gt;%</span></span>
<span id="cb4-8"><a href="#cb4-8"></a><span class="st"> </span><span class="co"># paste that 10,000 times</span></span>
<span id="cb4-9"><a href="#cb4-9"></a><span class="st"> </span><span class="kw"><a href="https://rdrr.io/r/base/rep.html">rep</a></span>(<span class="dv">10000</span>) <span class="op">%&gt;%</span></span>
<span id="cb4-10"><a href="#cb4-10"></a><span class="st"> </span><span class="co"># scramble it</span></span>
<span id="cb4-11"><a href="#cb4-11"></a><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/sample.html">sample</a></span>()</span>
<span id="cb4-11"><a href="#cb4-11"></a><span class="st"> </span><span class="kw"><a href="https://rdrr.io/r/base/sample.html">sample</a></span>()</span>
<span id="cb4-12"><a href="#cb4-12"></a> </span>
<span id="cb4-13"><a href="#cb4-13"></a><span class="co"># got indeed 50 times 10,000 = half a million?</span></span>
<span id="cb4-14"><a href="#cb4-14"></a><span class="kw"><a href="https://rdrr.io/r/base/length.html">length</a></span>(x)</span>
@ -306,9 +291,9 @@
<span id="cb4-23"><a href="#cb4-23"></a> <span class="dt">times =</span> <span class="dv">100</span>)</span>
<span id="cb4-24"><a href="#cb4-24"></a><span class="kw"><a href="https://rdrr.io/r/base/print.html">print</a></span>(run_it, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">3</span>)</span>
<span id="cb4-25"><a href="#cb4-25"></a><span class="co"># Unit: milliseconds</span></span>
<span id="cb4-26"><a href="#cb4-26"></a><span class="co"># expr min lq mean median uq max neval</span></span>
<span id="cb4-27"><a href="#cb4-27"></a><span class="co"># mo_name(x) 564 605 673 630 657 1100 100</span></span></code></pre></div>
<p>So transforming 500,000 values (!!) of 50 unique values only takes 0.63 seconds (630 ms). You only lose time on your unique input values.</p>
<span id="cb4-26"><a href="#cb4-26"></a><span class="co"># expr min lq mean median uq max neval</span></span>
<span id="cb4-27"><a href="#cb4-27"></a><span class="co"># mo_name(x) 542 585 605 601 614 738 100</span></span></code></pre></div>
<p>So transforming 500,000 values (!!) of 50 unique values only takes 0.6 seconds (600 ms). You only lose time on your unique input values.</p>
</div>
<div id="precalculated-results" class="section level3">
<h3 class="hasAnchor">
@ -320,11 +305,11 @@
<span id="cb5-4"><a href="#cb5-4"></a> <span class="dt">times =</span> <span class="dv">10</span>)</span>
<span id="cb5-5"><a href="#cb5-5"></a><span class="kw"><a href="https://rdrr.io/r/base/print.html">print</a></span>(run_it, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">3</span>)</span>
<span id="cb5-6"><a href="#cb5-6"></a><span class="co"># Unit: milliseconds</span></span>
<span id="cb5-7"><a href="#cb5-7"></a><span class="co"># expr min lq mean median uq max neval</span></span>
<span id="cb5-8"><a href="#cb5-8"></a><span class="co"># A 6.58 6.590 7.340 6.630 6.780 13.00 10</span></span>
<span id="cb5-9"><a href="#cb5-9"></a><span class="co"># B 13.50 13.700 18.700 13.900 14.600 60.80 10</span></span>
<span id="cb5-10"><a href="#cb5-10"></a><span class="co"># C 0.72 0.863 0.917 0.898 0.935 1.26 10</span></span></code></pre></div>
<p>So going from <code><a href="../reference/mo_property.html">mo_name("Staphylococcus aureus")</a></code> to <code>"Staphylococcus aureus"</code> takes 0.0009 seconds - it doesn’t even start calculating <em>if the result would be the same as the expected resulting value</em>. That goes for all helper functions:</p>
<span id="cb5-7"><a href="#cb5-7"></a><span class="co"># expr min lq mean median uq max neval</span></span>
<span id="cb5-8"><a href="#cb5-8"></a><span class="co"># A 6.760 6.900 7.43 7.070 7.540 9.290 10</span></span>
<span id="cb5-9"><a href="#cb5-9"></a><span class="co"># B 14.200 14.400 18.80 14.900 16.000 51.500 10</span></span>
<span id="cb5-10"><a href="#cb5-10"></a><span class="co"># C 0.586 0.726 0.74 0.757 0.763 0.804 10</span></span></code></pre></div>
<p>So going from <code><a href="../reference/mo_property.html">mo_name("Staphylococcus aureus")</a></code> to <code>"Staphylococcus aureus"</code> takes 0.0008 seconds - it doesn’t even start calculating <em>if the result would be the same as the expected resulting value</em>. That goes for all helper functions:</p>
<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1"></a>run_it &lt;-<span class="st"> </span><span class="kw"><a href="https://rdrr.io/pkg/microbenchmark/man/microbenchmark.html">microbenchmark</a></span>(<span class="dt">A =</span> <span class="kw"><a href="../reference/mo_property.html">mo_species</a></span>(<span class="st">"aureus"</span>),</span>
<span id="cb6-2"><a href="#cb6-2"></a> <span class="dt">B =</span> <span class="kw"><a href="../reference/mo_property.html">mo_genus</a></span>(<span class="st">"Staphylococcus"</span>),</span>
<span id="cb6-3"><a href="#cb6-3"></a> <span class="dt">C =</span> <span class="kw"><a href="../reference/mo_property.html">mo_name</a></span>(<span class="st">"Staphylococcus aureus"</span>),</span>
@ -337,14 +322,14 @@
<span id="cb6-10"><a href="#cb6-10"></a><span class="kw"><a href="https://rdrr.io/r/base/print.html">print</a></span>(run_it, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">3</span>)</span>
<span id="cb6-11"><a href="#cb6-11"></a><span class="co"># Unit: milliseconds</span></span>
<span id="cb6-12"><a href="#cb6-12"></a><span class="co"># expr min lq mean median uq max neval</span></span>
<span id="cb6-13"><a href="#cb6-13"></a><span class="co"># A 0.499 0.511 0.516 0.517 0.522 0.544 10</span></span>
<span id="cb6-14"><a href="#cb6-14"></a><span class="co"># B 0.532 0.539 0.550 0.542 0.563 0.592 10</span></span>
<span id="cb6-15"><a href="#cb6-15"></a><span class="co"># C 0.718 0.787 0.832 0.843 0.889 0.904 10</span></span>
<span id="cb6-16"><a href="#cb6-16"></a><span class="co"># D 0.538 0.548 0.566 0.567 0.571 0.607 10</span></span>
<span id="cb6-17"><a href="#cb6-17"></a><span class="co"># E 0.503 0.509 0.515 0.513 0.516 0.549 10</span></span>
<span id="cb6-18"><a href="#cb6-18"></a><span class="co"># F 0.502 0.504 0.514 0.511 0.519 0.539 10</span></span>
<span id="cb6-19"><a href="#cb6-19"></a><span class="co"># G 0.493 0.513 0.538 0.514 0.536 0.684 10</span></span>
<span id="cb6-20"><a href="#cb6-20"></a><span class="co"># H 0.499 0.501 0.509 0.505 0.516 0.531 10</span></span></code></pre></div>
<span id="cb6-13"><a href="#cb6-13"></a><span class="co"># A 0.374 0.381 0.389 0.389 0.395 0.416 10</span></span>
<span id="cb6-14"><a href="#cb6-14"></a><span class="co"># B 0.404 0.411 0.422 0.421 0.425 0.452 10</span></