Browse Source

added Becker 2019

new-mo-algorithm
parent
commit
29f444543d
  1. 4
      DESCRIPTION
  2. 4
      NAMESPACE
  3. 25
      R/catalogue_of_life.R
  4. 2
      R/count.R
  5. 59
      R/filter_ab_class.R
  6. 22
      R/freq.R
  7. 14
      R/ggplot_rsi.R
  8. 2
      R/globals.R
  9. 6
      R/misc.R
  10. 233
      R/mo.R
  11. 59
      R/mo_history.R
  12. 11
      R/mo_property.R
  13. 17
      R/portion.R
  14. BIN
      data/antibiotics.rda
  15. 2
      docs/LICENSE-text.html
  16. 402
      docs/articles/AMR.html
  17. BIN
      docs/articles/AMR_files/figure-html/plot 1-1.png
  18. BIN
      docs/articles/AMR_files/figure-html/plot 3-1.png
  19. BIN
      docs/articles/AMR_files/figure-html/plot 4-1.png
  20. BIN
      docs/articles/AMR_files/figure-html/plot 5-1.png
  21. 2
      docs/articles/index.html
  22. 2
      docs/authors.html
  23. 2
      docs/index.html
  24. 2
      docs/news/index.html
  25. 26
      docs/reference/as.mo.html
  26. 2
      docs/reference/catalogue_of_life.html
  27. 6
      docs/reference/catalogue_of_life_version.html
  28. 4
      docs/reference/count.html
  29. 24
      docs/reference/filter_ab_class.html
  30. 4
      docs/reference/index.html
  31. 2
      docs/reference/microorganisms.html
  32. 2
      docs/reference/microorganisms.old.html
  33. 7
      docs/reference/mo_property.html
  34. 19
      docs/reference/portion.html
  35. 4
      docs/reference/rsi.html
  36. 25
      man/as.mo.Rd
  37. 4
      man/catalogue_of_life_version.Rd
  38. 2
      man/count.Rd
  39. 16
      man/filter_ab_class.Rd
  40. 6
      man/mo_property.Rd
  41. 15
      man/portion.Rd
  42. 2
      man/rsi.Rd
  43. 2
      tests/testthat/test-get_locale.R
  44. 18
      tests/testthat/test-mo_history.R
  45. 2
      vignettes/AMR.Rmd

4
DESCRIPTION

@ -1,6 +1,6 @@ @@ -1,6 +1,6 @@
Package: AMR
Version: 0.5.0.9024
Date: 2019-03-18
Version: 0.5.0.9025
Date: 2019-03-26
Title: Antimicrobial Resistance Analysis
Authors@R: c(
person(

4
NAMESPACE

@ -23,6 +23,7 @@ S3method(plot,mic) @@ -23,6 +23,7 @@ S3method(plot,mic)
S3method(plot,resistance_predict)
S3method(plot,rsi)
S3method(print,atc)
S3method(print,catalogue_of_life_version)
S3method(print,frequency_tbl)
S3method(print,mic)
S3method(print,mo)
@ -191,6 +192,7 @@ exportMethods(plot.frequency_tbl) @@ -191,6 +192,7 @@ exportMethods(plot.frequency_tbl)
exportMethods(plot.mic)
exportMethods(plot.rsi)
exportMethods(print.atc)
exportMethods(print.catalogue_of_life_version)
exportMethods(print.frequency_tbl)
exportMethods(print.mic)
exportMethods(print.mo)
@ -221,6 +223,7 @@ importFrom(crayon,red) @@ -221,6 +223,7 @@ importFrom(crayon,red)
importFrom(crayon,silver)
importFrom(crayon,strip_style)
importFrom(crayon,underline)
importFrom(crayon,white)
importFrom(crayon,yellow)
importFrom(data.table,as.data.table)
importFrom(data.table,data.table)
@ -292,3 +295,4 @@ importFrom(stats,sd) @@ -292,3 +295,4 @@ importFrom(stats,sd)
importFrom(utils,browseURL)
importFrom(utils,browseVignettes)
importFrom(utils,installed.packages)
importFrom(utils,menu)

25
R/catalogue_of_life.R

@ -81,10 +81,10 @@ NULL @@ -81,10 +81,10 @@ NULL
#'
#' This function returns information about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year.
#' @seealso \code{\link{microorganisms}}
#' @details The list item \code{is_latest_annual_release} is based on the system date.
#' @details The list item \code{...$catalogue_of_life$is_latest_annual_release} is based on the system date.
#'
#' For DSMZ, see \code{?microorganisms}.
#' @return a \code{list}, invisibly
#' @return a \code{list}, which prints in pretty format
#' @inheritSection catalogue_of_life Catalogue of Life
#' @inheritSection AMR Read more on our website!
#' @importFrom crayon bold underline
@ -99,8 +99,8 @@ catalogue_of_life_version <- function() { @@ -99,8 +99,8 @@ catalogue_of_life_version <- function() {
lst <- list(catalogue_of_life =
list(version = gsub("{year}", catalogue_of_life$year, catalogue_of_life$version, fixed = TRUE),
url = gsub("{year}", catalogue_of_life$year, catalogue_of_life$url_CoL, fixed = TRUE),
# annual release always somewhere in March, so before April is TRUE, FALSE otherwise
is_latest_annual_release = Sys.Date() < as.Date(paste0(catalogue_of_life$year + 1, "-04-01")),
# annual release always somewhere in May, so before June is TRUE, FALSE otherwise
is_latest_annual_release = Sys.Date() < as.Date(paste0(catalogue_of_life$year + 1, "-06-01")),
n = nrow(filter(AMR::microorganisms, source == "CoL"))),
deutsche_sammlung_von_mikroorganismen_und_zellkulturen =
list(version = "Prokaryotic Nomenclature Up-to-Date from DSMZ",
@ -112,7 +112,16 @@ catalogue_of_life_version <- function() { @@ -112,7 +112,16 @@ catalogue_of_life_version <- function() {
n_total_species = nrow(AMR::microorganisms),
n_total_synonyms = nrow(AMR::microorganisms.old)))
cat(paste0(bold("Included in this package are:\n\n"),
structure(.Data = lst,
class = c("catalogue_of_life_version", "list"))
}
#' @exportMethod print.catalogue_of_life_version
#' @export
#' @noRd
print.catalogue_of_life_version <- function(x, ...) {
lst <- x
cat(paste0(bold("Included in this AMR package are:\n\n"),
underline(lst$catalogue_of_life$version), "\n",
" Available at: ", lst$catalogue_of_life$url, "\n",
" Number of included species: ", format(lst$catalogue_of_life$n, big.mark = ","), "\n",
@ -121,9 +130,7 @@ catalogue_of_life_version <- function() { @@ -121,9 +130,7 @@ catalogue_of_life_version <- function() {
lst$deutsche_sammlung_von_mikroorganismen_und_zellkulturen$yearmonth, ")")), "\n",
" Available at: ", lst$deutsche_sammlung_von_mikroorganismen_und_zellkulturen$url, "\n",
" Number of included species: ", format(lst$deutsche_sammlung_von_mikroorganismen_und_zellkulturen$n, big.mark = ","), "\n\n",
"Total number of species included: ", format(lst$total_included$n_total_species, big.mark = ","), "\n",
"Total number of synonyms included: ", format(lst$total_included$n_total_synonyms, big.mark = ","), "\n\n",
"=> Total number of species included: ", format(lst$total_included$n_total_species, big.mark = ","), "\n",
"=> Total number of synonyms included: ", format(lst$total_included$n_total_synonyms, big.mark = ","), "\n\n",
"See for more info ?microorganisms and ?catalogue_of_life.\n"))
return(base::invisible(lst))
}

2
R/count.R

@ -69,7 +69,7 @@ @@ -69,7 +69,7 @@
#' S = count_S(cipr),
#' n1 = count_all(cipr), # the actual total; sum of all three
#' n2 = n_rsi(cipr), # same - analogous to n_distinct
#' total = n()) # NOT the amount of tested isolates!
#' total = n()) # NOT the number of tested isolates!
#'
#' # Count co-resistance between amoxicillin/clav acid and gentamicin,
#' # so we can see that combination therapy does a lot more than mono therapy.

59
R/filter_ab_class.R

@ -19,11 +19,11 @@ @@ -19,11 +19,11 @@
# Visit our website for more info: https://msberends.gitab.io/AMR. #
# ==================================================================== #
#' Filter on antibiotic class
#' Filter isolates on result in antibiotic class
#'
#' Filter on specific antibiotic variables based on their class (ATC groups).
#' Filter isolates on results in specific antibiotic variables based on their class (ATC groups). This makes it easy to get a list of isolates that were tested for e.g. any aminoglycoside.
#' @param tbl a data set
#' @param ab_class an antimicrobial class, like \code{"carbapenems"}
#' @param ab_class an antimicrobial class, like \code{"carbapenems"}. More specifically, this should be a text that can be found in a 4th level ATC group (chemical subgroup) or a 5th level ATC group (chemical substance), please see \href{https://www.whocc.no/atc/structure_and_principles/}{this explanation on the WHOCC website}.
#' @param result an antibiotic result: S, I or R (or a combination of more of them)
#' @param scope the scope to check which variables to check, can be \code{"any"} (default) or \code{"all"}
#' @param ... parameters passed on to \code{\link[dplyr]{filter_at}}
@ -54,8 +54,14 @@ @@ -54,8 +54,14 @@
#' # filter on isolates that show resistance to
#' # any aminoglycoside and any fluoroquinolone
#' septic_patients %>%
#' filter_aminoglycosides("R", "any") %>%
#' filter_fluoroquinolones("R", "any")
#' filter_aminoglycosides("R") %>%
#' filter_fluoroquinolones("R")
#'
#' # filter on isolates that show resistance to
#' # all aminoglycosides and all fluoroquinolones
#' septic_patients %>%
#' filter_aminoglycosides("R", "all") %>%
#' filter_fluoroquinolones("R", "all")
filter_ab_class <- function(tbl,
ab_class,
result = NULL,
@ -65,6 +71,8 @@ filter_ab_class <- function(tbl, @@ -65,6 +71,8 @@ filter_ab_class <- function(tbl,
if (is.null(result)) {
result <- c("S", "I", "R")
}
# make result = "IR" work too:
result <- unlist(strsplit(result, ""))
if (!all(result %in% c("S", "I", "R"))) {
stop("`result` must be one or more of: S, I, R", call. = FALSE)
@ -88,12 +96,20 @@ filter_ab_class <- function(tbl, @@ -88,12 +96,20 @@ filter_ab_class <- function(tbl,
} else {
scope_txt <- " and "
scope_fn <- all_vars
if (length(vars_df) > 1) {
operator <- gsub("is", "are", operator)
}
}
if (length(vars_df) > 1) {
scope <- paste(scope, "of ")
} else {
scope <- ""
}
message(blue(paste0("Filtering on ", atc_groups, ": ", scope, " of ",
message(blue(paste0("Filtering on ", atc_groups, ": ", scope,
paste(bold(paste0("`", vars_df, "`")), collapse = scope_txt), operator, toString(result))))
tbl %>%
filter_at(.vars = vars(vars_df),
.vars_predicate = scope_fn(. %in% result),
filter_at(vars(vars_df),
scope_fn(. %in% result),
...)
} else {
warning(paste0("no antibiotics of class ", atc_groups, " found, leaving data unchanged"), call. = FALSE)
@ -244,7 +260,7 @@ filter_tetracyclines <- function(tbl, @@ -244,7 +260,7 @@ filter_tetracyclines <- function(tbl,
...)
}
#' @importFrom dplyr %>% filter_at any_vars select
#' @importFrom dplyr %>% filter_at vars any_vars select
ab_class_vars <- function(ab_class) {
ab_vars <- AMR::antibiotics %>%
filter_at(vars(c("atc_group1", "atc_group2")), any_vars(. %like% ab_class)) %>%
@ -260,10 +276,23 @@ ab_class_vars <- function(ab_class) { @@ -260,10 +276,23 @@ ab_class_vars <- function(ab_class) {
#' @importFrom dplyr %>% filter pull
ab_class_atcgroups <- function(ab_class) {
AMR::antibiotics %>%
filter(atc %in% ab_class_vars(ab_class)) %>%
pull("atc_group2") %>%
unique() %>%
tolower() %>%
paste(collapse = "/")
ifelse(ab_class %in% c("aminoglycoside",
"carbapenem",
"cephalosporin",
"first-generation cephalosporin",
"second-generation cephalosporin",
"third-generation cephalosporin",
"fourth-generation cephalosporin",
"fluoroquinolone",
"glycopeptide",
"macrolide",
"tetracycline"),
paste0(ab_class, "s"),
AMR::antibiotics %>%
filter(atc %in% ab_class_vars(ab_class)) %>%
pull("atc_group2") %>%
unique() %>%
tolower() %>%
paste(collapse = "/")
)
}

22
R/freq.R

@ -417,9 +417,9 @@ frequency_tbl <- function(x, @@ -417,9 +417,9 @@ frequency_tbl <- function(x,
header_list$outliers_unique <- n_distinct(boxplot.stats(x)$out)
}
if (NROW(x) > 0 & any(class(x) == "rsi")) {
header_list$count_S <- sum(x == "S", na.rm = TRUE)
header_list$count_IR <- sum(x %in% c("I", "R"), na.rm = TRUE)
if (any(class(x) == "rsi")) {
header_list$count_S <- max(0, sum(x == "S", na.rm = TRUE), na.rm = TRUE)
header_list$count_IR <- max(0, sum(x %in% c("I", "R"), na.rm = TRUE), na.rm = TRUE)
}
formatdates <- "%e %B %Y" # = d mmmm yyyy
@ -564,18 +564,14 @@ format_header <- function(x, markdown = FALSE, decimal.mark = ".", big.mark = ", @@ -564,18 +564,14 @@ format_header <- function(x, markdown = FALSE, decimal.mark = ".", big.mark = ",
# FORMATTING
# rsi
if (has_length == TRUE & any(x_class == "rsi")) {
if (header$count_S < header$count_IR) {
ratio <- paste0(green(1), ":", red(format(header$count_IR / header$count_S,
digits = 1, nsmall = 1, decimal.mark = decimal.mark, big.mark = big.mark)))
} else {
ratio <- paste0(green(format(header$count_S / header$count_IR,
digits = 1, nsmall = 1, decimal.mark = decimal.mark, big.mark = big.mark)),
":", red(1))
ab <- tryCatch(atc_name(attributes(x)$opt$vars), error = function(e) NA)
if (!is.na(ab)) {
header$drug <- ab[1L]
}
header$`%IR` <- paste((header$count_IR / header$length) %>% percent(force_zero = TRUE, round = digits, decimal.mark = decimal.mark),
paste0("(ratio ", ratio, ")"))
header <- header[!names(header) %in% c("count_S", "count_IR")]
header$`%IR` <- percent(header$count_IR / (header$count_S + header$count_IR),
force_zero = TRUE, round = digits, decimal.mark = decimal.mark)
}
header <- header[!names(header) %in% c("count_S", "count_IR")]
# dates
if (!is.null(header$date_format)) {
if (header$date_format == "%H:%M:%S") {

14
R/ggplot_rsi.R

@ -164,9 +164,7 @@ ggplot_rsi <- function(data, @@ -164,9 +164,7 @@ ggplot_rsi <- function(data,
datalabels.colour = "grey15",
...) {
if (!"ggplot2" %in% rownames(installed.packages())) {
stop('this function requires the ggplot2 package.', call. = FALSE)
}
stopifnot_installed_package("ggplot2")
fun_name <- deparse(substitute(fun))
if (!fun_name %in% c("portion_df", "count_df")) {
@ -235,6 +233,8 @@ geom_rsi <- function(position = NULL, @@ -235,6 +233,8 @@ geom_rsi <- function(position = NULL,
fun = count_df,
...) {
stopifnot_installed_package("ggplot2")
fun_name <- deparse(substitute(fun))
if (!fun_name %in% c("portion_df", "count_df", "fun")) {
stop("`fun` must be portion_df or count_df")
@ -279,6 +279,8 @@ geom_rsi <- function(position = NULL, @@ -279,6 +279,8 @@ geom_rsi <- function(position = NULL,
#' @export
facet_rsi <- function(facet = c("Interpretation", "Antibiotic"), nrow = NULL) {
stopifnot_installed_package("ggplot2")
facet <- facet[1]
# we work with aes_string later on
@ -302,6 +304,8 @@ facet_rsi <- function(facet = c("Interpretation", "Antibiotic"), nrow = NULL) { @@ -302,6 +304,8 @@ facet_rsi <- function(facet = c("Interpretation", "Antibiotic"), nrow = NULL) {
#' @rdname ggplot_rsi
#' @export
scale_y_percent <- function(breaks = seq(0, 1, 0.1), limits = NULL) {
stopifnot_installed_package("ggplot2")
if (all(breaks[breaks != 0] > 1)) {
breaks <- breaks / 100
}
@ -313,6 +317,7 @@ scale_y_percent <- function(breaks = seq(0, 1, 0.1), limits = NULL) { @@ -313,6 +317,7 @@ scale_y_percent <- function(breaks = seq(0, 1, 0.1), limits = NULL) {
#' @rdname ggplot_rsi
#' @export
scale_rsi_colours <- function() {
stopifnot_installed_package("ggplot2")
#ggplot2::scale_fill_brewer(palette = "RdYlGn")
ggplot2::scale_fill_manual(values = c("#b22222", "#ae9c20", "#7cfc00"))
}
@ -320,6 +325,7 @@ scale_rsi_colours <- function() { @@ -320,6 +325,7 @@ scale_rsi_colours <- function() {
#' @rdname ggplot_rsi
#' @export
theme_rsi <- function() {
stopifnot_installed_package("ggplot2")
ggplot2::theme_minimal() +
ggplot2::theme(panel.grid.major.x = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
@ -332,6 +338,7 @@ labels_rsi_count <- function(position = NULL, @@ -332,6 +338,7 @@ labels_rsi_count <- function(position = NULL,
x = "Antibiotic",
datalabels.size = 3,
datalabels.colour = "grey15") {
stopifnot_installed_package("ggplot2")
if (is.null(position)) {
position <- "fill"
}
@ -357,3 +364,4 @@ getlbls <- function(data) { @@ -357,3 +364,4 @@ getlbls <- function(data) {
" (n=", Value, ")")) %>%
mutate(lbl = ifelse(lbl == "0.0% (n=0)", "", lbl))
}

2
R/globals.R

@ -92,7 +92,7 @@ globalVariables(c(".", @@ -92,7 +92,7 @@ globalVariables(c(".",
"Sex",
"shortname",
"species",
"superprevalent",
"species_id",
"trade_name",
"transmute",
"tsn",

6
R/misc.R

@ -194,3 +194,9 @@ search_type_in_df <- function(tbl, type) { @@ -194,3 +194,9 @@ search_type_in_df <- function(tbl, type) {
}
found
}
stopifnot_installed_package <- function(package) {
if (!package %in% base::rownames(utils::installed.packages())) {
stop("this function requires the ", package, " package.", call. = FALSE)
}
}

233
R/mo.R

@ -23,10 +23,10 @@ @@ -23,10 +23,10 @@
#'
#' Use this function to determine a valid microorganism ID (\code{mo}). Determination is done using intelligent rules and the complete taxonomic kingdoms Bacteria, Chromista, Protozoa, Archaea and most microbial species from the kingdom Fungi (see Source). The input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. Please see Examples.
#' @param x a character vector or a \code{data.frame} with one or two columns
#' @param Becker a logical to indicate whether \emph{Staphylococci} should be categorised into Coagulase Negative \emph{Staphylococci} ("CoNS") and Coagulase Positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1]. Note that this does not include species that were newly named after this publication.
#' @param Becker a logical to indicate whether \emph{Staphylococci} should be categorised into coagulase-negative \emph{Staphylococci} ("CoNS") and coagulase-positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1,2]. Note that this does not include species that were newly named after these publications, like \emph{S. caeli}.
#'
#' This excludes \emph{Staphylococcus aureus} at default, use \code{Becker = "all"} to also categorise \emph{S. aureus} as "CoPS".
#' @param Lancefield a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [2]. These \emph{Streptococci} will be categorised in their first group, e.g. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L.
#' @param Lancefield a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [3]. These \emph{Streptococci} will be categorised in their first group, e.g. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L.
#'
#' This excludes \emph{Enterococci} at default (who are in group D), use \code{Lancefield = "all"} to also categorise all \emph{Enterococci} as group D.
#' @param allow_uncertain a logical (\code{TRUE} or \code{FALSE}) or a value between 0 and 3 to indicate whether the input should be checked for less possible results, see Details
@ -49,18 +49,19 @@ @@ -49,18 +49,19 @@
#' | | | ----> subspecies, a 3-4 letter acronym
#' | | ----> species, a 3-4 letter acronym
#' | ----> genus, a 5-7 letter acronym, mostly without vowels
#' ----> taxonomic kingdom: A (Archaea), AN (Animalia), B (Bacteria), C (Chromista),
#' F (Fungi), P (Protozoa) or PL (Plantae)
#' ----> taxonomic kingdom: A (Archaea), AN (Animalia), B (Bacteria),
#' C (Chromista), F (Fungi), P (Protozoa) or
#' PL (Plantae)
#' }
#'
#' Values that cannot be coered will be considered 'unknown' and have an MO code \code{UNKNOWN}.
#' Values that cannot be coered will be considered 'unknown' and will get the MO code \code{UNKNOWN}.
#'
#' Use the \code{\link{mo_property}_*} functions to get properties based on the returned code, see Examples.
#'
#' The algorithm uses data from the Catalogue of Life (see below) and from one other source (see \code{?microorganisms}).
#'
#' \strong{Self-learning algoritm} \cr
#' The \code{as.mo()} function gains experience from previously determined microbial IDs and learns from it. This drastically improves both speed and reliability. Use \code{clean_mo_history()} to reset the algorithms. Only experience from your current \code{AMR} package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge. Usually, any guess after the first try runs 90-95\% faster than the first try. The algorithm saves its previous findings to \code{~/.Rhistory_mo}.
#' The \code{as.mo()} function gains experience from previously determined microbial IDs and learns from it. This drastically improves both speed and reliability. Use \code{clean_mo_history()} to reset the algorithms. Only experience from your current \code{AMR} package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge. Usually, any guess after the first try runs 80-95\% faster than the first try. The algorithm saves its previous findings to \code{~/.Rhistory_mo}.
#'
#' \strong{Intelligent rules} \cr
#' This function uses intelligent rules to help getting fast and logical results. It tries to find matches in this order:
@ -80,7 +81,7 @@ @@ -80,7 +81,7 @@
#' This means that looking up human pathogenic microorganisms takes less time than looking up human non-pathogenic microorganisms.
#'
#' \strong{Uncertain results} \cr
#' The algorithm can additionally use three different levels of uncertainty to guess valid results. The default is \code{allow_uncertain = TRUE}, which is uqual to uncertainty level 2. Using \code{allow_uncertain = FALSE} will skip all of these additional rules:
#' The algorithm can additionally use three different levels of uncertainty to guess valid results. The default is \code{allow_uncertain = TRUE}, which is equal to uncertainty level 2. Using \code{allow_uncertain = FALSE} will skip all of these additional rules:
#' \itemize{
#' \item{(uncertainty level 1): It tries to look for only matching genera}
#' \item{(uncertainty level 1): It tries to look for previously accepted (but now invalid) taxonomic names}
@ -121,11 +122,13 @@ @@ -121,11 +122,13 @@
#' @section Source:
#' [1] Becker K \emph{et al.} \strong{Coagulase-Negative Staphylococci}. 2014. Clin Microbiol Rev. 27(4): 870โ€“926. \url{https://dx.doi.org/10.1128/CMR.00109-13}
#'
#' [2] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571โ€“95. \url{https://dx.doi.org/10.1084/jem.57.4.571}
#' [2] Becker K \emph{et al.} \strong{Implications of identifying the recently defined members of the S. aureus complex, S. argenteus and S. schweitzeri: A position paper of members of the ESCMID Study Group for staphylococci and Staphylococcal Diseases (ESGS).}. 2019. Clin Microbiol Infect. 2019 Mar 11. \url{https://doi.org/10.1016/j.cmi.2019.02.028}
#'
#' [3] Catalogue of Life: Annual Checklist (public online taxonomic database), \url{www.catalogueoflife.org} (check included annual version with \code{\link{catalogue_of_life_version}()}).
#' [3] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571โ€“95. \url{https://dx.doi.org/10.1084/jem.57.4.571}
#'
#' [4] Catalogue of Life: Annual Checklist (public online taxonomic database), \url{www.catalogueoflife.org} (check included annual version with \code{\link{catalogue_of_life_version}()}).
#' @export
#' @return Character (vector) with class \code{"mo"}. Unknown values will return \code{NA}.
#' @return Character (vector) with class \code{"mo"}
#' @seealso \code{\link{microorganisms}} for the \code{data.frame} that is being used to determine ID's. \cr
#' The \code{\link{mo_property}} functions (like \code{\link{mo_genus}}, \code{\link{mo_gramstain}}) to get properties based on the returned code.
#' @inheritSection AMR Read more on our website!
@ -188,7 +191,8 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE, @@ -188,7 +191,8 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
# check onLoad() in R/zzz.R: data tables are created there.
}
mo_hist <- get_mo_history(x, force = isTRUE(list(...)$force_mo_history))
uncertainty_level <- translate_allow_uncertain(allow_uncertain)
mo_hist <- get_mo_history(x, uncertainty_level, force = isTRUE(list(...)$force_mo_history))
if (mo_source_isvalid(reference_df)
& isFALSE(Becker)
@ -247,13 +251,13 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE, @@ -247,13 +251,13 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
"mo"][[1]]
}
# save them to history
set_mo_history(x, y, force = isTRUE(list(...)$force_mo_history))
set_mo_history(x, y, 0, force = isTRUE(list(...)$force_mo_history))
} else {
# will be checked for mo class in validation and uses exec_as.mo internally if necessary
y <- mo_validate(x = x, property = "mo",
Becker = Becker, Lancefield = Lancefield,
allow_uncertain = allow_uncertain, reference_df = reference_df,
allow_uncertain = uncertainty_level, reference_df = reference_df,
force_mo_history = isTRUE(list(...)$force_mo_history))
}
@ -320,15 +324,8 @@ exec_as.mo <- function(x, @@ -320,15 +324,8 @@ exec_as.mo <- function(x,
fullname = character(0),
mo = character(0))
failures <- character(0)
if (isTRUE(allow_uncertain)) {
# default to uncertainty level 2
allow_uncertain <- 2
} else {
allow_uncertain <- as.integer(allow_uncertain)
if (!allow_uncertain %in% c(0:3)) {
stop("`allow_uncertain` must be a number between 0 (none) and 3 (all), or TRUE (= 2) or FALSE (= 0).", call. = FALSE)
}
}
uncertainty_level <- translate_allow_uncertain(allow_uncertain)
x_input <- x
# already strip leading and trailing spaces
x <- trimws(x, which = "both")
@ -341,7 +338,6 @@ exec_as.mo <- function(x, @@ -341,7 +338,6 @@ exec_as.mo <- function(x,
& !identical(x, "")
& !identical(x, "xxx")
& !identical(x, "con")]
x_input_backup <- x
# conversion of old MO codes from v0.5.0 (ITIS) to later versions (Catalogue of Life)
if (any(x %like% "^[BFP]_[A-Z]{3,7}") & !all(x %in% microorganisms$mo)) {
@ -406,9 +402,13 @@ exec_as.mo <- function(x, @@ -406,9 +402,13 @@ exec_as.mo <- function(x,
}
x <- y
} else if (all(x %in% read_mo_history(force = force_mo_history)$x)) {
} else if (all(x %in% read_mo_history(uncertainty_level,
force = force_mo_history)$x)) {
# previously found code
x <- microorganismsDT[data.table(mo = get_mo_history(x, force = force_mo_history)), on = "mo", ..property][[1]]
x <- microorganismsDT[data.table(mo = get_mo_history(x,
uncertainty_level,
force = force_mo_history)),
on = "mo", ..property][[1]]
} else if (all(tolower(x) %in% microorganismsDT$fullname_lower)) {
# we need special treatment for very prevalent full names, they are likely!
@ -430,7 +430,7 @@ exec_as.mo <- function(x, @@ -430,7 +430,7 @@ exec_as.mo <- function(x,
# commonly used MO codes
y <- as.data.table(AMR::microorganisms.codes)[data.table(code = toupper(x)), on = "code", ]
# save them to history
set_mo_history(x, y$mo, force = force_mo_history)
set_mo_history(x, y$mo, 0, force = force_mo_history)
x <- microorganismsDT[data.table(mo = y[["mo"]]), on = "mo", ..property][[1]]
@ -502,11 +502,16 @@ exec_as.mo <- function(x, @@ -502,11 +502,16 @@ exec_as.mo <- function(x,
progress$tick()$print()
found <- microorganismsDT[mo == get_mo_history(x_backup[i], force = force_mo_history), ..property][[1]]
# previously found result
if (length(found) > 0) {
x[i] <- found[1L]
next
if (initial_search == TRUE) {
found <- microorganismsDT[mo == get_mo_history(x_backup[i],
uncertainty_level,
force = force_mo_history),
..property][[1]]
# previously found result
if (length(found) > 0) {
x[i] <- found[1L]
next
}
}
found <- microorganismsDT[mo == toupper(x_backup[i]), ..property][[1]]
@ -521,7 +526,7 @@ exec_as.mo <- function(x, @@ -521,7 +526,7 @@ exec_as.mo <- function(x,
if (length(found) > 0) {
x[i] <- found[1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -535,7 +540,7 @@ exec_as.mo <- function(x, @@ -535,7 +540,7 @@ exec_as.mo <- function(x,
# empty and nonsense values, ignore without warning
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -552,7 +557,7 @@ exec_as.mo <- function(x, @@ -552,7 +557,7 @@ exec_as.mo <- function(x,
if (length(found) > 0) {
x[i] <- found[1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -562,7 +567,7 @@ exec_as.mo <- function(x, @@ -562,7 +567,7 @@ exec_as.mo <- function(x,
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
if (initial_search == TRUE) {
failures <- c(failures, x_backup[i])
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -572,7 +577,7 @@ exec_as.mo <- function(x, @@ -572,7 +577,7 @@ exec_as.mo <- function(x,
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
if (initial_search == TRUE) {
failures <- c(failures, x_backup[i])
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -582,14 +587,14 @@ exec_as.mo <- function(x, @@ -582,14 +587,14 @@ exec_as.mo <- function(x,
if (toupper(x_backup_without_spp[i]) %in% c('MRSA', 'MSSA', 'VISA', 'VRSA')) {
x[i] <- microorganismsDT[mo == 'B_STPHY_AUR', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
if (toupper(x_backup_without_spp[i]) %in% c('MRSE', 'MSSE')) {
x[i] <- microorganismsDT[mo == 'B_STPHY_EPI', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -597,14 +602,14 @@ exec_as.mo <- function(x, @@ -597,14 +602,14 @@ exec_as.mo <- function(x,
| x_backup_without_spp[i] %like% '(enterococci|enterokok|enterococo)[a-z]*?$') {
x[i] <- microorganismsDT[mo == 'B_ENTRC', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
if (toupper(x_backup_without_spp[i]) %in% c("EHEC", "EPEC", "EIEC", "STEC", "ATEC")) {
x[i] <- microorganismsDT[mo == 'B_ESCHR_COL', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -612,7 +617,7 @@ exec_as.mo <- function(x, @@ -612,7 +617,7 @@ exec_as.mo <- function(x,
# multi resistant P. aeruginosa
x[i] <- microorganismsDT[mo == 'B_PSDMN_AER', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -621,7 +626,7 @@ exec_as.mo <- function(x, @@ -621,7 +626,7 @@ exec_as.mo <- function(x,
# co-trim resistant S. maltophilia
x[i] <- microorganismsDT[mo == 'B_STNTR_MAL', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -629,7 +634,7 @@ exec_as.mo <- function(x, @@ -629,7 +634,7 @@ exec_as.mo <- function(x,
# peni I, peni R, vanco I, vanco R: S. pneumoniae
x[i] <- microorganismsDT[mo == 'B_STRPT_PNE', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -637,7 +642,7 @@ exec_as.mo <- function(x, @@ -637,7 +642,7 @@ exec_as.mo <- function(x,
# Streptococci, like GBS = Group B Streptococci (B_STRPT_GRB)
x[i] <- microorganismsDT[mo == gsub("G([ABCDFGHK])S", "B_STRPT_GR\\1", x_backup_without_spp[i], ignore.case = TRUE), ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -645,7 +650,7 @@ exec_as.mo <- function(x, @@ -645,7 +650,7 @@ exec_as.mo <- function(x,
# Streptococci in different languages, like "estreptococos grupo B"
x[i] <- microorganismsDT[mo == gsub(".*(streptococ|streptokok|estreptococ).* ([ABCDFGHK])$", "B_STRPT_GR\\2", x_backup_without_spp[i], ignore.case = TRUE), ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -653,7 +658,7 @@ exec_as.mo <- function(x, @@ -653,7 +658,7 @@ exec_as.mo <- function(x,
# Streptococci in different languages, like "Group A Streptococci"
x[i] <- microorganismsDT[mo == gsub(".*group ([ABCDFGHK]) (streptococ|streptokok|estreptococ).*", "B_STRPT_GR\\1", x_backup_without_spp[i], ignore.case = TRUE), ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -664,7 +669,7 @@ exec_as.mo <- function(x, @@ -664,7 +669,7 @@ exec_as.mo <- function(x,
# coerce S. coagulase negative
x[i] <- microorganismsDT[mo == 'B_STPHY_CNS', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -674,7 +679,7 @@ exec_as.mo <- function(x, @@ -674,7 +679,7 @@ exec_as.mo <- function(x,
# coerce S. coagulase positive
x[i] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -684,7 +689,7 @@ exec_as.mo <- function(x, @@ -684,7 +689,7 @@ exec_as.mo <- function(x,
# coerce Gram negatives
x[i] <- microorganismsDT[mo == 'B_GRAMN', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -694,7 +699,7 @@ exec_as.mo <- function(x, @@ -694,7 +699,7 @@ exec_as.mo <- function(x,
# coerce Gram positives
x[i] <- microorganismsDT[mo == 'B_GRAMP', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -703,7 +708,7 @@ exec_as.mo <- function(x, @@ -703,7 +708,7 @@ exec_as.mo <- function(x,
# Salmonella Group A to Z, just return S. species for now
x[i] <- microorganismsDT[mo == 'B_SLMNL', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
options(mo_renamed = c(getOption("mo_renamed"),
magenta(paste0("Note: ",
@ -715,7 +720,7 @@ exec_as.mo <- function(x, @@ -715,7 +720,7 @@ exec_as.mo <- function(x,
# Salmonella with capital letter species like "Salmonella Goettingen" - they're all S. enterica
x[i] <- microorganismsDT[mo == 'B_SLMNL_ENT', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
options(mo_renamed = c(getOption("mo_renamed"),
magenta(paste0("Note: ",
@ -735,7 +740,7 @@ exec_as.mo <- function(x, @@ -735,7 +740,7 @@ exec_as.mo <- function(x,
if (length(found) > 0) {
x[i] <- found[1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -744,7 +749,7 @@ exec_as.mo <- function(x, @@ -744,7 +749,7 @@ exec_as.mo <- function(x,
if (length(found) > 0) {
x[i] <- found[1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -759,7 +764,7 @@ exec_as.mo <- function(x, @@ -759,7 +764,7 @@ exec_as.mo <- function(x,
if (length(mo_found) > 0) {
x[i] <- microorganismsDT[mo == mo_found, ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -782,7 +787,7 @@ exec_as.mo <- function(x, @@ -782,7 +787,7 @@ exec_as.mo <- function(x,
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
if (initial_search == TRUE) {
failures <- c(failures, x_backup[i])
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -869,7 +874,7 @@ exec_as.mo <- function(x, @@ -869,7 +874,7 @@ exec_as.mo <- function(x,
g.x_backup_without_spp = x_backup_without_spp[i])
if (!empty_result(x[i])) {
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -884,7 +889,7 @@ exec_as.mo <- function(x, @@ -884,7 +889,7 @@ exec_as.mo <- function(x,
g.x_backup_without_spp = x_backup_without_spp[i])
if (!empty_result(x[i])) {
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -899,7 +904,7 @@ exec_as.mo <- function(x, @@ -899,7 +904,7 @@ exec_as.mo <- function(x,
g.x_backup_without_spp = x_backup_without_spp[i])
if (!empty_result(x[i])) {
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -925,7 +930,7 @@ exec_as.mo <- function(x, @@ -925,7 +930,7 @@ exec_as.mo <- function(x,
ref_new = microorganismsDT[col_id == found[1, col_id_new], ref],
mo = microorganismsDT[col_id == found[1, col_id_new], mo])
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
next
}
@ -938,12 +943,12 @@ exec_as.mo <- function(x, @@ -938,12 +943,12 @@ exec_as.mo <- function(x,
f.x_withspaces_end_only,
g.x_backup_without_spp) {
if (allow_uncertain == 0) {
if (uncertainty_level == 0) {
# do not allow uncertainties
return(NA_character_)
}
if (allow_uncertain >= 1) {
if (uncertainty_level >= 1) {
# (1) look again for old taxonomic names, now for G. species ----
found <- microorganisms.oldDT[fullname %like% c.x_withspaces_start_end
| fullname %like% d.x_withspaces_start_only]
@ -966,11 +971,14 @@ exec_as.mo <- function(x, @@ -966,11 +971,14 @@ exec_as.mo <- function(x,
input = a.x_backup,
fullname = found[1, fullname],
mo = paste("CoL", found[1, col_id])))
if (initial_search == TRUE) {
set_mo_history(a.x_backup, get_mo_code(x, property), 1, force = force_mo_history)
}
return(x)
}
}
if (allow_uncertain >= 2) {
if (uncertainty_level >= 2) {
# (3) look for genus only, part of name ----
if (nchar(g.x_backup_without_spp) > 4 & !b.x_trimmed %like% " ") {
if (!grepl("^[A-Z][a-z]+", b.x_trimmed, ignore.case = FALSE)) {
@ -983,6 +991,9 @@ exec_as.mo <- function(x, @@ -983,6 +991,9 @@ exec_as.mo <- function(x,
input = a.x_backup,
fullname = microorganismsDT[mo == found[1L], fullname][[1]],
mo = found[1L]))
if (initial_search == TRUE) {
set_mo_history(a.x_backup, get_mo_code(x, property), 2, force = force_mo_history)
}
return(x)
}
}
@ -1000,6 +1011,9 @@ exec_as.mo <- function(x, @@ -1000,6 +1011,9 @@ exec_as.mo <- function(x,
input = a.x_backup,
fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
mo = found_result[1L]))
if (initial_search == TRUE) {
set_mo_history(a.x_backup, get_mo_code(found[1L], property), 2, force = force_mo_history)
}
return(found[1L])
}
@ -1018,6 +1032,33 @@ exec_as.mo <- function(x, @@ -1018,6 +1032,33 @@ exec_as.mo <- function(x,
input = a.x_backup,
fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
mo = found_result[1L]))
if (initial_search == TRUE) {
set_mo_history(a.x_backup, get_mo_code(found[1L], property), 2, force = force_mo_history)
}
return(found[1L])
}
}
}
}
# (6) try to strip off one element from start and check the remains (only allow 2-part name outcome) ----
x_strip <- a.x_backup %>% strsplit(" ") %>% unlist()
if (length(x_strip) > 1 & nchar(g.x_backup_without_spp) >= 6) {
for (i in 2:(length(x_strip))) {
x_strip_collapsed <- paste(x_strip[i:length(x_strip)], collapse = " ")
found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, allow_uncertain = FALSE)))
if (!empty_result(found)) {
found_result <- found
found <- microorganismsDT[mo == found_result[1L], ..property][[1]]
# uncertainty level 2 only if the fullname contains a space (otherwise it will be found with lvl 3)
if (microorganismsDT[mo == found_result[1L], fullname][[1]] %like% " ") {
uncertainties <<- rbind(uncertainties,
data.frame(uncertainty = 2,
input = a.x_backup,
fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
mo = found_result[1L]))
if (initial_search == TRUE) {
set_mo_history(a.x_backup, get_mo_code(found[1L], property), 2, force = force_mo_history)
}
return(found[1L])
}
}
@ -1025,8 +1066,8 @@ exec_as.mo <- function(x, @@ -1025,8 +1066,8 @@ exec_as.mo <- function(x,
}
}
if (allow_uncertain >= 3) {
# (6) try to strip off one element from start and check the remains ----
if (uncertainty_level >= 3) {
# (7) try to strip off one element from start and check the remains ----
x_strip <- a.x_backup %>% strsplit(" ") %>% unlist()
if (length(x_strip) > 1 & nchar(g.x_backup_without_spp) >= 6) {
for (i in 2:(length(x_strip))) {
@ -1040,12 +1081,15 @@ exec_as.mo <- function(x, @@ -1040,12 +1081,15 @@ exec_as.mo <- function(x,
input = a.x_backup,
fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
mo = found_result[1L]))
if (initial_search == TRUE) {
set_mo_history(a.x_backup, get_mo_code(found[1L], property), 3, force = force_mo_history)
}
return(found[1L])
}
}
}
# (7) part of a name (very unlikely match) ----
# (8) part of a name (very unlikely match) ----
found <- microorganismsDT[fullname %like% f.x_withspaces_end_only]
if (nrow(found) > 0) {
found_result <- found[["mo"]]
@ -1056,6 +1100,9 @@ exec_as.mo <- function(x, @@ -1056,6 +1100,9 @@ exec_as.mo <- function(x,
input = a.x_backup,
fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
mo = found_result[1L]))
if (initial_search == TRUE) {
set_mo_history(a.x_backup, get_mo_code(found[1L], property), 3, force = force_mo_history)
}
return(found[1L])
}
}
@ -1071,7 +1118,7 @@ exec_as.mo <- function(x, @@ -1071,7 +1118,7 @@ exec_as.mo <- function(x,
x_withspaces_end_only[i],
x_backup_without_spp[i])
if (!empty_result(x[i])) {
# no set_mo_history here; these are uncertain
# no set_mo_history: is already set in uncertain_fn()
next
}
@ -1079,7 +1126,7 @@ exec_as.mo <- function(x, @@ -1079,7 +1126,7 @@ exec_as.mo <- function(x,
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
if (initial_search == TRUE) {
failures <- c(failures, x_backup[i])
set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
}
}
}
@ -1127,8 +1174,8 @@ exec_as.mo <- function(x, @@ -1127,8 +1174,8 @@ exec_as.mo <- function(x,
MOs_staph <- microorganismsDT[genus == "Staphylococcus"]
setkey(MOs_staph, species)
CoNS <- MOs_staph[species %in% c("arlettae", "auricularis", "capitis",
"caprae", "carnosus", "cohnii", "condimenti",
"devriesei", "epidermidis", "equorum",
"caprae", "carnosus", "chromogenes", "cohnii", "condimenti",
"devriesei", "epidermidis", "equorum", "felis",
"fleurettii", "gallinarum", "haemolyticus",
"hominis", "jettensis", "kloosii", "lentus",
"lugdunensis", "massiliensis", "microti",
@ -1136,16 +1183,31 @@ exec_as.mo <- function(x, @@ -1136,16 +1183,31 @@ exec_as.mo <- function(x,
"pettenkoferi", "piscifermentans", "rostri",
"saccharolyticus", "saprophyticus", "sciuri",
"stepanovicii", "simulans", "succinus",
"vitulinus", "warneri", "xylosus"), ..property][[1]]
CoPS <- MOs_staph[species %in% c("simiae", "agnetis", "chromogenes",
"delphini", "felis", "lutrae",
"vitulinus", "warneri", "xylosus")
| (species == "schleiferi" & subspecies %in% c("schleiferi", "")), ..property][[1]]
CoPS <- MOs_staph[species %in% c("simiae", "agnetis",
"delphini", "lutrae",
"hyicus", "intermedius",
"pseudintermedius", "pseudointermedius",
"schleiferi"), ..property][[1]]
"schweitzeri", "argenteus")
| (species == "schleiferi" & subspecies == "coagulans"), ..property][[1]]
# warn when species found that are not in Becker (2014, PMID 25278577) and Becker (2019, PMID 30872103)
post_Becker <- c("argensis", "caeli", "cornubiensis", "edaphicus")
if (any(x %in% MOs_staph[species %in% post_Becker, ..property][[1]])) {
warning("Becker ", italic("et al."), " (2014) does not contain species named after their publication: ",
italic(paste("S.",
sort(mo_species(unique(x[x %in% MOs_staph[species %in% post_Becker, ..property][[1]]]))),
collapse = ", ")),
call. = FALSE,
immediate. = TRUE)
}
x[x %in% CoNS] <- microorganismsDT[mo == 'B_STPHY_CNS', ..property][[1]][1L]
x[x %in% CoPS] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L]
if (Becker == "all") {
x[x == microorganismsDT[mo == 'B_STPHY_AUR', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L]
x[x %in% microorganismsDT[mo %like% '^B_STPHY_AUR', ..property][[1]]] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L]
}
}
@ -1305,7 +1367,7 @@ mo_uncertainties <- function() { @@ -1305,7 +1367,7 @@ mo_uncertainties <- function() {
}
#' @exportMethod print.mo_uncertainties
#' @importFrom crayon green yellow red bgGreen bgYellow bgRed
#' @importFrom crayon green yellow red white bgGreen bgYellow bgRed
#' @export
#' @noRd
print.mo_uncertainties <- function(x, ...) {
@ -1321,16 +1383,16 @@ print.mo_uncertainties <- function(x, ...) { @@ -1321,16 +1383,16 @@ print.mo_uncertainties <- function(x, ...) {
for (i in 1:nrow(x)) {
if (x[i, "uncertainty"] == 1) {
colour1 <- green
colour2 <- bgGreen
colour2 <- function(...) bgGreen(white(...))
} else if (x[i, "uncertainty"] == 2) {
colour1 <- yellow
colour2 <- bgYellow
} else {
colour1 <- red
colour2 <- bgRed
colour2 <- function(...) bgRed(white(...))
}
msg <- paste(msg,
paste0("[", colour2(paste0(" ", x[i, "uncertainty"], " ")), '] - "', x[i, "input"], '" -> ',
paste0(colour2(paste0(" [", x[i, "uncertainty"], "] ")), ' "', x[i, "input"], '" -> ',
colour1(paste0(italic(x[i, "fullname"]), " (", x[i, "mo"], ")"))),
sep = "\n")
}
@ -1373,3 +1435,16 @@ get_mo_code <- function(x, property) { @@ -1373,3 +1435,16 @@ get_mo_code <- function(x, property) {
AMR::microorganisms[base::which(AMR::microorganisms[, property] %in% x),]$mo
}
}
translate_allow_uncertain <- function(allow_uncertain) {
if (isTRUE(allow_uncertain)) {
# default to uncertainty level 2
allow_uncertain <- 2
} else {
allow_uncertain <- as.integer(allow_uncertain)
if (!allow_uncertain %in% c(0:3)) {
stop("`allow_uncertain` must be a number between 0 (none) and 3 (all), or TRUE (= 2) or FALSE (= 0).", call. = FALSE)
}
}
allow_uncertain
}

59
R/mo_history.R

@ -21,10 +21,10 @@ @@ -21,10 +21,10 @@
# print successful as.mo coercions to file, not uncertain ones
#' @importFrom dplyr %>% distinct filter
set_mo_history <- function(x, mo, force = FALSE) {
set_mo_history <- function(x, mo, uncertainty_level, force = FALSE) {
file_location <- base::path.expand('~/.Rhistory_mo')
if (base::interactive() | force == TRUE) {
mo_hist <- read_mo_history(force = force)
mo_hist <- read_mo_history(uncertainty_level = uncertainty_level, force = force)
df <- data.frame(x, mo, stringsAsFactors = FALSE) %>%
distinct(x, .keep_all = TRUE) %>%
filter(!is.na(x) & !is.na(mo))
@ -35,10 +35,12 @@ set_mo_history <- function(x, mo, force = FALSE) { @@ -35,10 +35,12 @@ set_mo_history <- function(x, mo, force = FALSE) {
mo <- df$mo
for (i in 1:length(x)) {
# save package version too, as both the as.mo() algorithm and the reference data set may change
if (NROW(mo_hist[base::which(mo_hist$x == x[i] & mo_hist$package_version == utils::packageVersion("AMR")),]) == 0) {
base::write(x = c(x[i], mo[i], base::as.character(utils::packageVersion("AMR"))),
if (NROW(mo_hist[base::which(mo_hist$x == x[i] &
mo_hist$uncertainty_level >= uncertainty_level &
mo_hist$package_version == utils::packageVersion("AMR")),]) == 0) {
base::write(x = c(x[i], mo[i], uncertainty_level, base::as.character(utils::packageVersion("AMR"))),
file = file_location,
ncolumns = 3,
ncolumns = 4,
append = TRUE,
sep = "\t")
}
@ -47,8 +49,8 @@ set_mo_history <- function(x, mo, force = FALSE) { @@ -47,8 +49,8 @@ set_mo_history <- function(x, mo, force = FALSE) {
return(base::invisible())
}
get_mo_history <- function(x, force = FALSE) {
file_read <- read_mo_history(force = force)
get_mo_history <- function(x, uncertainty_level, force = FALSE) {
file_read <- read_mo_history(uncertainty_level = uncertainty_level, force = force)
if (base::is.null(file_read)) {
NA
} else {
@ -59,30 +61,57 @@ get_mo_history <- function(x, force = FALSE) { @@ -59,30 +61,57 @@ get_mo_history <- function(x, force = FALSE) {
}
#' @importFrom dplyr %>% filter distinct
read_mo_history <- function(force = FALSE) {
read_mo_history <- function(uncertainty_level = 2, force = FALSE, unfiltered = FALSE) {
file_location <- base::path.expand('~/.Rhistory_mo')
if (!base::file.exists(file_location) | (!base::interactive() & force == FALSE)) {
return(NULL)
}
uncertainty_level_param <- uncertainty_level
file_read <- utils::read.table(file = file_location,
header = FALSE,
sep = "\t",
col.names = c("x", "mo", "package_version"),
col.names = c("x", "mo", "uncertainty_level", "package_version"),
stringsAsFactors = FALSE)
# Below: filter on current package version.
# Even current fullnames may be replaced by new taxonomic names, so new versions of
# the Catalogue of Life must not lead to data corruption.
file_read %>%
filter(package_version == utils::packageVersion("AMR")) %>%
distinct(x, mo, .keep_all = TRUE)
if (unfiltered == FALSE) {
file_read <- file_read %>%
filter(package_version == utils::packageVersion("AMR"),
# only take unknowns if uncertainty_level_param is higher
((mo == "UNKNOWN" & uncertainty_level_param == uncertainty_level) |
(mo != "UNKNOWN" & uncertainty_level_param >= uncertainty_level))) %>%
arrange(desc(uncertainty_level)) %>%
distinct(x, mo, .keep_all = TRUE)
}
if (nrow(file_read) == 0) {
NULL
} else {
file_read
}
}
#' @rdname as.mo
#' @importFrom crayon red
#' @importFrom utils menu
#' @export
clean_mo_history <- function() {
clean_mo_history <- function(...) {
file_location <- base::path.expand('~/.Rhistory_mo')
if (base::file.exists(file_location)) {
base::unlink(file_location)
if (file.exists(file_location)) {
if (interactive() & !isTRUE(list(...)$force)) {
q <- menu(title = paste("This will remove all",
format(nrow(read_mo_history(999, unfiltered = TRUE)), big.mark = ","),
"previously determined microbial IDs. Are you sure?"),
choices = c("Yes", "No"),
graphics = FALSE)
if (q != 1) {
return(invisible())
}
}
unlink(file_location)
cat(red("File", file_location, "removed."))
}
}

11
R/mo_property.R

@ -446,6 +446,8 @@ mo_translate <- function(x, language) { @@ -446,6 +446,8 @@ mo_translate <- function(x, language) {
# Spanish
language == "es" ~ x[x_tobetranslated] %>%
# not 'negativa'
# https://www.sciencedirect.com/science/article/pii/S0123939215000739
gsub("Coagulase-negative Staphylococcus","Staphylococcus coagulasa negativo", ., fixed = TRUE) %>%
gsub("Coagulase-positive Staphylococcus","Staphylococcus coagulasa positivo", ., fixed = TRUE) %>%
gsub("Beta-haemolytic Streptococcus", "Streptococcus Beta-hemol\u00edtico", ., fixed = TRUE) %>%
@ -461,6 +463,8 @@ mo_translate <- function(x, language) { @@ -461,6 +463,8 @@ mo_translate <- function(x, language) {
gsub("unknown species", "especie desconocida", ., fixed = TRUE) %>%
gsub("unknown subspecies", "subespecie desconocida", ., fixed = TRUE) %>%
gsub("unknown rank", "rango desconocido", ., fixed = TRUE) %>%
gsub("(CoNS)", "(SCN)", ., fixed = TRUE) %>%
gsub("(CoPS)", "(SCP)", ., fixed = TRUE) %>%
gsub("Gram negative", "Gram negativo", ., fixed = TRUE) %>%
gsub("Gram positive", "Gram positivo", ., fixed = TRUE) %>%
gsub("Bacteria", "Bacterias", ., fixed = TRUE) %>%