Browse Source

(v0.7.1.9075) new microorganism codes

new-mo-algorithm
parent
commit
e2aa4f996b
  1. 6
      DESCRIPTION
  2. 9
      NEWS.md
  3. 18
      R/data.R
  4. 1
      R/globals.R
  5. 179
      R/mo.R
  6. 2
      R/mo_history.R
  7. 2
      R/rsi.R
  8. BIN
      R/sysdata.rda
  9. 46
      R/zzz.R
  10. 2
      cran-comments.md
  11. BIN
      data-raw/DRGLST1.xlsx
  12. 7
      data-raw/internals.R
  13. BIN
      data-raw/microorganisms.translation.rds
  14. 261
      data-raw/reproduction_of_microorganisms.R
  15. 4
      data-raw/reproduction_of_rsi_translation.R
  16. BIN
      data/example_isolates.rda
  17. BIN
      data/microorganisms.codes.rda
  18. BIN
      data/microorganisms.old.rda
  19. BIN
      data/microorganisms.rda
  20. BIN
      data/rsi_translation.rda
  21. 2
      docs/LICENSE-text.html
  22. 144
      docs/articles/benchmarks.html
  23. BIN
      docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png
  24. BIN
      docs/articles/benchmarks_files/figure-html/unnamed-chunk-6-1.png
  25. BIN
      docs/articles/benchmarks_files/figure-html/unnamed-chunk-7-1.png
  26. 2
      docs/articles/index.html
  27. 6
      docs/authors.html
  28. 13
      docs/extra.js
  29. 4
      docs/index.html
  30. 15
      docs/news/index.html
  31. 36
      docs/reference/as.mo.html
  32. 14
      docs/reference/example_isolates.html
  33. 6
      docs/reference/index.html
  34. 12
      docs/reference/microorganisms.codes.html
  35. 6
      docs/reference/microorganisms.html
  36. 4
      docs/reference/microorganisms.old.html
  37. 2
      index.md
  38. 34
      man/as.mo.Rd
  39. 6
      man/example_isolates.Rd
  40. 4
      man/microorganisms.Rd
  41. 6
      man/microorganisms.codes.Rd
  42. 2
      man/microorganisms.old.Rd
  43. 13
      pkgdown/extra.js
  44. 6
      tests/testthat/test-first_isolate.R
  45. 4
      tests/testthat/test-freq.R
  46. 14
      tests/testthat/test-join_microorganisms.R
  47. 2
      tests/testthat/test-mdro.R
  48. 179
      tests/testthat/test-mo.R
  49. 2
      tests/testthat/test-mo_property.R
  50. 2
      tests/testthat/test-read.4d.R
  51. 20
      tests/testthat/test-resistance_predict.R
  52. 10
      tests/testthat/test-rsi.R
  53. 6
      vignettes/benchmarks.Rmd

6
DESCRIPTION

@ -1,6 +1,6 @@ @@ -1,6 +1,6 @@
Package: AMR
Version: 0.7.1.9074
Date: 2019-09-16
Version: 0.7.1.9075
Date: 2019-09-18
Title: Antimicrobial Resistance Analysis
Authors@R: c(
person(role = c("aut", "cre"),
@ -15,6 +15,8 @@ Authors@R: c( @@ -15,6 +15,8 @@ Authors@R: c(
family = "Albers", given = c("Casper", "J."), email = "c.j.albers@rug.nl", comment = c(ORCID = "0000-0002-9213-6743")),
person(role = c("aut", "ths"),
family = "Glasner", given = "Corinna", email = "c.glasner@umcg.nl", comment = c(ORCID = "0000-0003-1241-1328")),
person(role = "ctb",
family = "Fonville", given = c("Judith", "M."), email = "j.fonville@pamm.nl"),
person(role = "ctb",
family = "Hassing", given = c("Erwin", "E.", "A."), email = "e.hassing@certe.nl"),
person(role = "ctb",

9
NEWS.md

@ -1,5 +1,5 @@ @@ -1,5 +1,5 @@
# AMR 0.7.1.9074
<small>Last updated: 16-Sep-2019</small>
# AMR 0.7.1.9075
<small>Last updated: 18-Sep-2019</small>
### Breaking
* Determination of first isolates now **excludes** all 'unknown' microorganisms at default, i.e. microbial code `"UNKNOWN"`. They can be included with the new parameter `include_unknown`:
@ -72,7 +72,7 @@ @@ -72,7 +72,7 @@
```
### Changed
* Many algorithm improvements for `as.mo()` (of which some led to additions to the `microorganisms` data set):
* Many algorithm improvements for `as.mo()` (of which some led to additions to the `microorganisms` data set). Many thanks to all contributors that helped improving the algorithms.
* Self-learning algorithm - the function now gains experience from previously determined microorganism IDs and learns from it (yielding 80-95% speed improvement for any guess after the first try)
* Big improvement for misspelled input
* These new trivial names known to the field are now understood: meningococcus, gonococcus, pneumococcus
@ -80,6 +80,7 @@ @@ -80,6 +80,7 @@
* Added support for Viridans Group Streptococci (VGS) and Milleri Group Streptococci (MGS)
* Added support for 5,000 new fungi
* Added support for unknown yeasts and fungi
* Changed most microorganism IDs to improve readability. **IMPORTANT:** Because of these changes, the microorganism IDs have been changed to a slightly different format. Old microorganism IDs are still supported, but support will be dropped in a future version. Use `as.mo()` on your old codes to transform them to the new format.
* Renamed data set `septic_patients` to `example_isolates`
* Function `eucast_rules()`:
* Fixed a bug for *Yersinia pseudotuberculosis*
@ -105,7 +106,7 @@ @@ -105,7 +106,7 @@
* Fix for `key_antibiotics()` on foreign systems
#### Other
* Added Prof Dr Casper Albers as doctoral advisor and Dr Bart Meijer, Dr Dennis Souverein and Annick Lenglet as contributors
* Added Prof. Dr. Casper Albers as doctoral advisor and added Dr. Judith Fonville, Eric Hazenberg, Dr. Bart Meijer, Dr. Dennis Souverein and Annick Lenglet as contributors
# AMR 0.7.1

18
R/data.R

@ -55,7 +55,7 @@ @@ -55,7 +55,7 @@
#'
#' A data set containing the microbial taxonomy of six kingdoms from the Catalogue of Life. MO codes can be looked up using \code{\link{as.mo}}.
#' @inheritSection catalogue_of_life Catalogue of Life
#' @format A \code{\link{data.frame}} with 69,855 observations and 16 variables:
#' @format A \code{\link{data.frame}} with 69,460 observations and 16 variables:
#' \describe{
#' \item{\code{mo}}{ID of microorganism as used by this package}
#' \item{\code{col_id}}{Catalogue of Life ID}
@ -73,7 +73,7 @@ @@ -73,7 +73,7 @@
#' \item{2 entries of \emph{Staphylococcus} (coagulase-negative [CoNS] and coagulase-positive [CoPS])}
#' \item{3 entries of \emph{Trichomonas} (\emph{Trichomonas vaginalis}, and its family and genus)}
#' \item{5 other 'undefined' entries (unknown, unknown Gram negatives, unknown Gram positives, unknown yeast and unknown fungus)}
#' \item{8,970 species from the DSMZ (Deutsche Sammlung von Mikroorganismen und Zellkulturen) that are not in the Catalogue of Life}
#' \item{22,654 species from the DSMZ (Deutsche Sammlung von Mikroorganismen und Zellkulturen) overwriting records from the Catalogue of Life, since the DSMZ contain the latest taxonomic information based on recent publications}
#' }
#' @section About the records from DSMZ (see source):
#' Names of prokaryotes are defined as being validly published by the International Code of Nomenclature of Bacteria. Validly published are all names which are included in the Approved Lists of Bacterial Names and the names subsequently published in the International Journal of Systematic Bacteriology (IJSB) and, from January 2000, in the International Journal of Systematic and Evolutionary Microbiology (IJSEM) as original articles or in the validation lists.
@ -98,7 +98,7 @@ catalogue_of_life <- list( @@ -98,7 +98,7 @@ catalogue_of_life <- list(
#'
#' A data set containing old (previously valid or accepted) taxonomic names according to the Catalogue of Life. This data set is used internally by \code{\link{as.mo}}.
#' @inheritSection catalogue_of_life Catalogue of Life
#' @format A \code{\link{data.frame}} with 22,932 observations and 4 variables:
#' @format A \code{\link{data.frame}} with 24,246 observations and 4 variables:
#' \describe{
#' \item{\code{col_id}}{Catalogue of Life ID that was originally given}
#' \item{\code{col_id_new}}{New Catalogue of Life ID that responds to an entry in the \code{\link{microorganisms}} data set}
@ -110,12 +110,12 @@ catalogue_of_life <- list( @@ -110,12 +110,12 @@ catalogue_of_life <- list(
#' @seealso \code{\link{as.mo}} \code{\link{mo_property}} \code{\link{microorganisms}}
"microorganisms.old"
#' Translation table for microorganism codes
#' Translation table for common microorganism codes
#'
#' A data set containing commonly used codes for microorganisms, from laboratory systems and WHONET. Define your own with \code{\link{set_mo_source}}.
#' @format A \code{\link{data.frame}} with 4,965 observations and 2 variables:
#' @format A \code{\link{data.frame}} with 4,927 observations and 2 variables:
#' \describe{
#' \item{\code{certe}}{Commonly used code of a microorganism}
#' \item{\code{code}}{Commonly used code of a microorganism}
#' \item{\code{mo}}{ID of the microorganism in the \code{\link{microorganisms}} data set}
#' }
#' @inheritSection catalogue_of_life Catalogue of Life
@ -123,9 +123,9 @@ catalogue_of_life <- list( @@ -123,9 +123,9 @@ catalogue_of_life <- list(
#' @seealso \code{\link{as.mo}} \code{\link{microorganisms}}
"microorganisms.codes"
#' Data set with 2,000 blood culture isolates from septic patients
#' Data set with 2,000 blood culture isolates
#'
#' An anonymised data set containing 2,000 microbial blood culture isolates with their full antibiograms found in septic patients in 4 different hospitals in the Netherlands, between 2001 and 2017. This \code{data.frame} can be used to practice AMR analysis. For examples, please read \href{https://msberends.gitlab.io/AMR/articles/AMR.html}{the tutorial on our website}.
#' An anonymised data set containing 2,000 microbial blood culture isolates with their full antibiograms found 4 different hospitals in the Netherlands, between 2001 and 2017. This \code{data.frame} can be used to practice AMR analysis. For examples, please read \href{https://msberends.gitlab.io/AMR/articles/AMR.html}{the tutorial on our website}.
#' @format A \code{\link{data.frame}} with 2,000 observations and 49 variables:
#' \describe{
#' \item{\code{date}}{date of receipt at the laboratory}
@ -137,7 +137,7 @@ catalogue_of_life <- list( @@ -137,7 +137,7 @@ catalogue_of_life <- list(
#' \item{\code{gender}}{gender of the patient}
#' \item{\code{patient_id}}{ID of the patient, first 10 characters of an SHA hash containing irretrievable information}
#' \item{\code{mo}}{ID of microorganism created with \code{\link{as.mo}}, see also \code{\link{microorganisms}}}
#' \item{\code{peni:rifa}}{40 different antibiotics with class \code{rsi} (see \code{\link{as.rsi}}); these column names occur in \code{\link{antibiotics}} data set and can be translated with \code{\link{ab_name}}}
#' \item{\code{PEN:RIF}}{40 different antibiotics with class \code{rsi} (see \code{\link{as.rsi}}); these column names occur in \code{\link{antibiotics}} data set and can be translated with \code{\link{ab_name}}}
#' }
#' @inheritSection AMR Read more on our website!
"example_isolates"

1
R/globals.R

@ -67,6 +67,7 @@ globalVariables(c(".", @@ -67,6 +67,7 @@ globalVariables(c(".",
"observations",
"observed",
"old",
"old_name",
"other_pat_or_mo",
"package_version",
"patient_id",

179
R/mo.R

@ -29,7 +29,7 @@ @@ -29,7 +29,7 @@
#' @param Lancefield a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [3]. These \emph{Streptococci} will be categorised in their first group, e.g. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L.
#'
#' This excludes \emph{Enterococci} at default (who are in group D), use \code{Lancefield = "all"} to also categorise all \emph{Enterococci} as group D.
#' @param allow_uncertain a number between 0 (or "none") and 3 (or "all"), or TRUE (= 2) or FALSE (= 0) to indicate whether the input should be checked for less possible results, see Details
#' @param allow_uncertain a number between 0 (or "none") and 3 (or "all"), or TRUE (= 2) or FALSE (= 0) to indicate whether the input should be checked for less probable results, see Details
#' @param reference_df a \code{data.frame} to use for extra reference when translating \code{x} to a valid \code{mo}. See \code{\link{set_mo_source}} and \code{\link{get_mo_source}} to automate the usage of your own codes (e.g. used in your analysis or organisation).
#' @param ... other parameters passed on to functions
#' @rdname as.mo
@ -39,16 +39,16 @@ @@ -39,16 +39,16 @@
#' \strong{General info} \cr
#' A microorganism ID from this package (class: \code{mo}) typically looks like these examples:\cr
#' \preformatted{
#' Code Full name
#' --------------- --------------------------------------
#' B_KLBSL Klebsiella
#' B_KLBSL_PNE Klebsiella pneumoniae
#' B_KLBSL_PNE_RHI Klebsiella pneumoniae rhinoscleromatis
#' | | | |
#' | | | |
#' | | | ----> subspecies, a 3-4 letter acronym
#' | | ----> species, a 3-4 letter acronym
#' | ----> genus, a 5-7 letter acronym, mostly without vowels
#' Code Full name
#' --------------- --------------------------------------
#' B_KLBSL Klebsiella
#' B_KLBSL_PNMN Klebsiella pneumoniae
#' B_KLBSL_PNMN_RHNS Klebsiella pneumoniae rhinoscleromatis
#' | | | |
#' | | | |
#' | | | ---> subspecies, a 4-5 letter acronym
#' | | ----> species, a 4-5 letter acronym
#' | ----> genus, a 5-7 letter acronym
#' ----> taxonomic kingdom: A (Archaea), AN (Animalia), B (Bacteria),
#' C (Chromista), F (Fungi), P (Protozoa)
#' }
@ -95,7 +95,7 @@ @@ -95,7 +95,7 @@
#'
#' Examples:
#' \itemize{
#' \item{\code{"Streptococcus group B (known as S. agalactiae)"}. The text between brackets will be removed and a warning will be thrown that the result \emph{Streptococcus group B} (\code{B_STRPT_GRB}) needs review.}
#' \item{\code{"Streptococcus group B (known as S. agalactiae)"}. The text between brackets will be removed and a warning will be thrown that the result \emph{Streptococcus group B} (\code{B_STRPT_GRPB}) needs review.}
#' \item{\code{"S. aureus - please mind: MRSA"}. The last word will be stripped, after which the function will try to find a match. If it does not, the second last word will be stripped, etc. Again, a warning will be thrown that the result \emph{Staphylococcus aureus} (\code{B_STPHY_AUR}) needs review.}
#' \item{\code{"Fluoroquinolone-resistant Neisseria gonorrhoeae"}. The first word will be stripped, after which the function will try to find a match. A warning will be thrown that the result \emph{Neisseria gonorrhoeae} (\code{B_NESSR_GON}) needs review.}
#' }
@ -135,7 +135,7 @@ @@ -135,7 +135,7 @@
#' @importFrom dplyr %>% pull left_join
#' @examples
#' \donttest{
#' # These examples all return "B_STPHY_AUR", the ID of S. aureus:
#' # These examples all return "B_STPHY_AURS", the ID of S. aureus:
#' as.mo("sau") # WHONET code
#' as.mo("stau")
#' as.mo("STAU")
@ -160,11 +160,11 @@ @@ -160,11 +160,11 @@
#' as.mo("GAS") # Group A Streptococci
#' as.mo("GBS") # Group B Streptococci
#'
#' as.mo("S. epidermidis") # will remain species: B_STPHY_EPI
#' as.mo("S. epidermidis", Becker = TRUE) # will not remain species: B_STPHY_CNS
#' as.mo("S. epidermidis") # will remain species: B_STPHY_EPDR
#' as.mo("S. epidermidis", Becker = TRUE) # will not remain species: B_STPHY_CONS
#'
#' as.mo("S. pyogenes") # will remain species: B_STRPT_PYO
#' as.mo("S. pyogenes", Lancefield = TRUE) # will not remain species: B_STRPT_GRA
#' as.mo("S. pyogenes") # will remain species: B_STRPT_PYGN
#' as.mo("S. pyogenes", Lancefield = TRUE) # will not remain species: B_STRPT_GRPA
#'
#' # All mo_* functions use as.mo() internally too (see ?mo_property):
#' mo_genus("E. coli") # returns "Escherichia"
@ -342,6 +342,7 @@ exec_as.mo <- function(x, @@ -342,6 +342,7 @@ exec_as.mo <- function(x,
stringsAsFactors = FALSE)
failures <- character(0)
uncertainty_level <- translate_allow_uncertain(allow_uncertain)
old_mo_warning <- FALSE
x_input <- x
# already strip leading and trailing spaces
@ -359,6 +360,7 @@ exec_as.mo <- function(x, @@ -359,6 +360,7 @@ exec_as.mo <- function(x,
if (any(x %like_case% "^[BFP]_[A-Z]{3,7}") & !all(x %in% microorganisms$mo)) {
leftpart <- gsub("^([BFP]_[A-Z]{3,7}).*", "\\1", x)
if (any(leftpart %in% names(mo_codes_v0.5.0))) {
old_mo_warning <- TRUE
rightpart <- gsub("^[BFP]_[A-Z]{3,7}(.*)", "\\1", x)
leftpart <- mo_codes_v0.5.0[leftpart]
x[!is.na(leftpart)] <- paste0(leftpart[!is.na(leftpart)], rightpart[!is.na(leftpart)])
@ -366,6 +368,7 @@ exec_as.mo <- function(x, @@ -366,6 +368,7 @@ exec_as.mo <- function(x,
# now check if some are still old
still_old <- x[x %in% names(mo_codes_v0.5.0)]
if (length(still_old) > 0) {
old_mo_warning <- TRUE
x[x %in% names(mo_codes_v0.5.0)] <- data.frame(old = still_old, stringsAsFactors = FALSE) %>%
left_join(data.frame(old = names(mo_codes_v0.5.0),
new = mo_codes_v0.5.0,
@ -466,6 +469,14 @@ exec_as.mo <- function(x, @@ -466,6 +469,14 @@ exec_as.mo <- function(x,
x <- reference_data_to_use[data.table(mo = y[["mo"]]), on = "mo", ..property][[1]]
} else if (all(x %in% microorganisms.translation$mo_old)) {
# is an old mo code, used in previous versions of this package
old_mo_warning <- TRUE
y <- as.data.table(microorganisms.translation)[data.table(mo_old = x), on = "mo_old", "mo_new"][[1]]
y <- reference_data_to_use[data.table(mo = y), on = "mo", ..property][[1]]
# don't save to history, as all items are already in microorganisms.translation
x <- y
} else if (!all(x %in% AMR::microorganisms[, property])) {
strip_whitespace <- function(x, dyslexia_mode) {
@ -487,6 +498,8 @@ exec_as.mo <- function(x, @@ -487,6 +498,8 @@ exec_as.mo <- function(x,
# from here on case-insensitive
x <- tolower(x)
x_backup[grepl("^(fungus|fungi)$", x)] <- "F_FUNGUS" # will otherwise become the kingdom
# remove spp and species
x <- gsub(" +(spp.?|ssp.?|sp.? |ss ?.?|subsp.?|subspecies|biovar |serovar |species)", " ", x, ignore.case = TRUE)
x <- gsub("(spp.?|ssp.?|subsp.?|subspecies|biovar|serovar|species)", "", x, ignore.case = TRUE)
@ -499,12 +512,14 @@ exec_as.mo <- function(x, @@ -499,12 +512,14 @@ exec_as.mo <- function(x,
# no groups and complexes as ending
x <- gsub("(complex|group)$", "", x)
x <- gsub("((an)?aero+b)[a-z]*", "", x)
x <- gsub("^atyp[a-z]*", "", x)
x <- gsub("(vergroen)[a-z]*", "viridans", x)
x <- gsub("[a-z]*diff?erent[a-z]*", "", x)
x <- gsub("(hefe|gist|gisten|levadura|lievito|fermento|levure)[a-z]*", "yeast", x)
x <- gsub("(schimmels?|mofo|molde|stampo|moisissure|fungi)[a-z]*", "fungus", x)
x <- gsub("fungus[ph|f]rya", "fungiphrya", x)
# remove non-text in case of "E. coli" except dots and spaces
x <- gsub("[^.a-zA-Z0-9/ \\-]+", "", x)
x <- trimws(gsub("[^.a-zA-Z0-9/ \\-]+", " ", x))
# replace minus by a space
x <- gsub("-+", " ", x)
# replace hemolytic by haemolytic
@ -543,7 +558,7 @@ exec_as.mo <- function(x, @@ -543,7 +558,7 @@ exec_as.mo <- function(x,
x[nchar(x_backup_without_spp) > 10] <- gsub("[+]", paste0("+[", constants, "]?"), x[nchar(x_backup_without_spp) > 10])
}
x <- strip_whitespace(x, dyslexia_mode)
x_trimmed <- x
x_trimmed_species <- paste(x_trimmed, "species")
x_trimmed_without_group <- gsub(" gro.u.p$", "", x_trimmed)
@ -591,6 +606,11 @@ exec_as.mo <- function(x, @@ -591,6 +606,11 @@ exec_as.mo <- function(x,
}
}
if (x_backup[i] %like_case% "\\(unknown [a-z]+\\)") {
x[i] <- "UNKNOWN"
next
}
found <- reference_data_to_use[mo == toupper(x_backup[i]), ..property][[1]]
# is a valid MO code
if (length(found) > 0) {
@ -598,6 +618,17 @@ exec_as.mo <- function(x, @@ -598,6 +618,17 @@ exec_as.mo <- function(x,
next
}
if (x_backup[i] %in% microorganisms.translation$mo_old) {
# is an old mo code, used in previous versions of this package
old_mo_warning <- TRUE
found <- reference_data_to_use[mo == microorganisms.translation[which(microorganisms.translation$mo_old == x_backup[i]), "mo_new"], ..property][[1]]
if (length(found) > 0) {
x[i] <- found[1L]
# don't save to history, as all items are already in microorganisms.translation
next
}
}
found <- reference_data_to_use[fullname_lower %in% tolower(c(x_backup[i], x_backup_without_spp[i])), ..property][[1]]
# most probable: is exact match in fullname
if (length(found) > 0) {
@ -665,19 +696,22 @@ exec_as.mo <- function(x, @@ -665,19 +696,22 @@ exec_as.mo <- function(x,
if (x_backup_without_spp[i] %like_case% "virus") {
# there is no fullname like virus, so don't try to coerce it
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
if (initial_search == TRUE) {
failures <- c(failures, x_backup[i])
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
x[i] <- NA_character_
next
}
# x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
# if (initial_search == TRUE) {
# failures <- c(failures, x_backup[i])
# set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
# }
# next
# }
# translate known trivial abbreviations to genus + species ----
if (!is.na(x_trimmed[i])) {
if (toupper(x_backup_without_spp[i]) %in% c('MRSA', 'MSSA', 'VISA', 'VRSA')
| x_backup_without_spp[i] %like_case% " (mrsa|mssa|visa|vrsa) ") {
x[i] <- microorganismsDT[mo == 'B_STPHY_AUR', ..property][[1]][1L]
x[i] <- microorganismsDT[mo == 'B_STPHY_AURS', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -685,7 +719,7 @@ exec_as.mo <- function(x, @@ -685,7 +719,7 @@ exec_as.mo <- function(x,
}
if (toupper(x_backup_without_spp[i]) %in% c('MRSE', 'MSSE')
| x_backup_without_spp[i] %like_case% " (mrse|msse) ") {
x[i] <- microorganismsDT[mo == 'B_STPHY_EPI', ..property][[1]][1L]
x[i] <- microorganismsDT[mo == 'B_STPHY_EPDR', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -715,7 +749,7 @@ exec_as.mo <- function(x, @@ -715,7 +749,7 @@ exec_as.mo <- function(x,
if (toupper(x_backup_without_spp[i]) %in% c("AIEC", "ATEC", "DAEC", "EAEC", "EHEC", "EIEC", "EPEC", "ETEC", "NMEC", "STEC", "UPEC")
# also support O-antigens of E. coli: O26, O103, O104, O111, O121, O145, O157
| x_backup_without_spp[i] %like_case% "o?(26|103|104|111|121|145|157)") {
x[i] <- microorganismsDT[mo == 'B_ESCHR_COL', ..property][[1]][1L]
x[i] <- microorganismsDT[mo == 'B_ESCHR_COLI', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -724,7 +758,7 @@ exec_as.mo <- function(x, @@ -724,7 +758,7 @@ exec_as.mo <- function(x,
if (toupper(x_backup_without_spp[i]) == 'MRPA'
| x_backup_without_spp[i] %like_case% " mrpa ") {
# multi resistant P. aeruginosa
x[i] <- microorganismsDT[mo == 'B_PSDMN_AER', ..property][[1]][1L]
x[i] <- microorganismsDT[mo == 'B_PSDMN_ARGN', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -733,7 +767,7 @@ exec_as.mo <- function(x, @@ -733,7 +767,7 @@ exec_as.mo <- function(x,
if (toupper(x_backup_without_spp[i]) == 'CRS'
| toupper(x_backup_without_spp[i]) == 'CRSM') {
# co-trim resistant S. maltophilia
x[i] <- microorganismsDT[mo == 'B_STNTR_MAL', ..property][[1]][1L]
x[i] <- microorganismsDT[mo == 'B_STNTR_MLTP', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -742,15 +776,15 @@ exec_as.mo <- function(x, @@ -742,15 +776,15 @@ exec_as.mo <- function(x,
if (toupper(x_backup_without_spp[i]) %in% c('PISP', 'PRSP', 'VISP', 'VRSP')
| x_backup_without_spp[i] %like_case% " (pisp|prsp|visp|vrsp) ") {
# peni I, peni R, vanco I, vanco R: S. pneumoniae
x[i] <- microorganismsDT[mo == 'B_STRPT_PNE', ..property][[1]][1L]
x[i] <- microorganismsDT[mo == 'B_STRPT_PNMN', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
next
}
if (x_backup_without_spp[i] %like_case% '^g[abcdfghk]s$') {
# Streptococci, like GBS = Group B Streptococci (B_STRPT_GRB)
x[i] <- microorganismsDT[mo == toupper(gsub("g([abcdfghk])s", "B_STRPT_GR\\1", x_backup_without_spp[i])), ..property][[1]][1L]
# Streptococci, like GBS = Group B Streptococci (B_STRPT_GRPB)
x[i] <- microorganismsDT[mo == toupper(gsub("g([abcdfghk])s", "B_STRPT_GRP\\1", x_backup_without_spp[i])), ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -758,7 +792,7 @@ exec_as.mo <- function(x, @@ -758,7 +792,7 @@ exec_as.mo <- function(x,
}
if (x_backup_without_spp[i] %like_case% '(streptococ|streptokok).* [abcdfghk]$') {
# Streptococci in different languages, like "estreptococos grupo B"
x[i] <- microorganismsDT[mo == toupper(gsub(".*(streptococ|streptokok|estreptococ).* ([abcdfghk])$", "B_STRPT_GR\\2", x_backup_without_spp[i])), ..property][[1]][1L]
x[i] <- microorganismsDT[mo == toupper(gsub(".*(streptococ|streptokok|estreptococ).* ([abcdfghk])$", "B_STRPT_GRP\\2", x_backup_without_spp[i])), ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -766,7 +800,7 @@ exec_as.mo <- function(x, @@ -766,7 +800,7 @@ exec_as.mo <- function(x,
}
if (x_backup_without_spp[i] %like_case% 'group [abcdfghk] (streptococ|streptokok|estreptococ)') {
# Streptococci in different languages, like "Group A Streptococci"
x[i] <- microorganismsDT[mo == toupper(gsub(".*group ([abcdfghk]) (streptococ|streptokok|estreptococ).*", "B_STRPT_GR\\1", x_backup_without_spp[i])), ..property][[1]][1L]
x[i] <- microorganismsDT[mo == toupper(gsub(".*group ([abcdfghk]) (streptococ|streptokok|estreptococ).*", "B_STRPT_GRP\\1", x_backup_without_spp[i])), ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -774,7 +808,7 @@ exec_as.mo <- function(x, @@ -774,7 +808,7 @@ exec_as.mo <- function(x,
}
if (x_backup_without_spp[i] %like_case% 'haemoly.*strept') {
# Haemolytic streptococci in different languages
x[i] <- microorganismsDT[mo == 'B_STRPT_HAE', ..property][[1]][1L]
x[i] <- microorganismsDT[mo == 'B_STRPT_HAEM', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -785,7 +819,7 @@ exec_as.mo <- function(x, @@ -785,7 +819,7 @@ exec_as.mo <- function(x,
| x_trimmed[i] %like_case% '[ck]oagulas[ea] negatie?[vf]'
| x_backup_without_spp[i] %like_case% '[ck]o?ns[^a-z]?$') {
# coerce S. coagulase negative
x[i] <- microorganismsDT[mo == 'B_STPHY_CNS', ..property][[1]][1L]
x[i] <- microorganismsDT[mo == 'B_STPHY_CONS', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -795,7 +829,7 @@ exec_as.mo <- function(x, @@ -795,7 +829,7 @@ exec_as.mo <- function(x,
| x_trimmed[i] %like_case% '[ck]oagulas[ea] positie?[vf]'
| x_backup_without_spp[i] %like_case% '[ck]o?ps[^a-z]?$') {
# coerce S. coagulase positive
x[i] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L]
x[i] <- microorganismsDT[mo == 'B_STPHY_COPS', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -806,7 +840,7 @@ exec_as.mo <- function(x, @@ -806,7 +840,7 @@ exec_as.mo <- function(x,
| x_backup_without_spp[i] %like_case% 'strepto.* milleri'
| x_backup_without_spp[i] %like_case% 'mgs[^a-z]?$') {
# Milleri Group Streptococcus (MGS)
x[i] <- microorganismsDT[mo == 'B_STRPT_MIL', ..property][[1]][1L]
x[i] <- microorganismsDT[mo == 'B_STRPT_MILL', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -816,7 +850,7 @@ exec_as.mo <- function(x, @@ -816,7 +850,7 @@ exec_as.mo <- function(x,
| x_backup_without_spp[i] %like_case% 'strepto.* viridans'
| x_backup_without_spp[i] %like_case% 'vgs[^a-z]?$') {
# Viridans Group Streptococcus (VGS)
x[i] <- microorganismsDT[mo == 'B_STRPT_VIR', ..property][[1]][1L]
x[i] <- microorganismsDT[mo == 'B_STRPT_VIRI', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -842,6 +876,15 @@ exec_as.mo <- function(x, @@ -842,6 +876,15 @@ exec_as.mo <- function(x,
}
next
}
if (x_backup_without_spp[i] %like_case% 'mycoba[ck]teri.[nm]?$') {
# coerce Gram positives
x[i] <- microorganismsDT[mo == 'B_MYCBC', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
next
}
if (x_backup_without_spp[i] %like_case% "salmonella [a-z]+ ?.*") {
if (x_backup_without_spp[i] %like_case% "salmonella group") {
# Salmonella Group A to Z, just return S. species for now
@ -852,38 +895,38 @@ exec_as.mo <- function(x, @@ -852,38 +895,38 @@ exec_as.mo <- function(x,
next
} else if (grepl("[sS]almonella [A-Z][a-z]+ ?.*", x_backup[i], ignore.case = FALSE)) {
# Salmonella with capital letter species like "Salmonella Goettingen" - they're all S. enterica
x[i] <- microorganismsDT[mo == 'B_SLMNL_ENT', ..property][[1]][1L]
x[i] <- microorganismsDT[mo == 'B_SLMNL_ENTR', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
uncertainties <- rbind(uncertainties,
format_uncertainty_as_df(uncertainty_level = 1,
input = x_backup_without_spp[i],
result_mo = "B_SLMNL_ENT"))
result_mo = "B_SLMNL_ENTR"))
next
}
}
# trivial names known to the field:
if ("meningococcus" %like_case% x_trimmed[i]) {
# coerce S. coagulase positive
x[i] <- microorganismsDT[mo == 'B_NESSR_MEN', ..property][[1]][1L]
# coerce Neisseria meningitidis
x[i] <- microorganismsDT[mo == 'B_NESSR_MNNG', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
next
}
if ("gonococcus" %like_case% x_trimmed[i]) {
# coerce S. coagulase positive
x[i] <- microorganismsDT[mo == 'B_NESSR_GON', ..property][[1]][1L]
# coerce Neisseria gonorrhoeae
x[i] <- microorganismsDT[mo == 'B_NESSR_GNRR', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
next
}
if ("pneumococcus" %like_case% x_trimmed[i]) {
# coerce S. coagulase positive
x[i] <- microorganismsDT[mo == 'B_STRPT_PNE', ..property][[1]][1L]
# coerce Streptococcus penumoniae
x[i] <- microorganismsDT[mo == 'B_STRPT_PNMN', ..property][[1]][1L]
if (initial_search == TRUE) {
set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history)
}
@ -1611,35 +1654,35 @@ exec_as.mo <- function(x, @@ -1611,35 +1654,35 @@ exec_as.mo <- function(x,
immediate. = TRUE)
}
x[x %in% CoNS] <- microorganismsDT[mo == 'B_STPHY_CNS', ..property][[1]][1L]
x[x %in% CoPS] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L]
x[x %in% CoNS] <- microorganismsDT[mo == 'B_STPHY_CONS', ..property][[1]][1L]
x[x %in% CoPS] <- microorganismsDT[mo == 'B_STPHY_COPS', ..property][[1]][1L]
if (Becker == "all") {
x[x %in% microorganismsDT[mo %like_case% '^B_STPHY_AUR', ..property][[1]]] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L]
x[x %in% microorganismsDT[mo %like_case% '^B_STPHY_AURS', ..property][[1]]] <- microorganismsDT[mo == 'B_STPHY_COPS', ..property][[1]][1L]
}
}
# Lancefield ----
if (Lancefield == TRUE | Lancefield == "all") {
# group A - S. pyogenes
x[x == microorganismsDT[mo == 'B_STRPT_PYO', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPT_GRA', ..property][[1]][1L]
x[x == microorganismsDT[mo == 'B_STRPT_PYGN', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPT_GRPA', ..property][[1]][1L]
# group B - S. agalactiae
x[x == microorganismsDT[mo == 'B_STRPT_AGA', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPT_GRB', ..property][[1]][1L]
x[x == microorganismsDT[mo == 'B_STRPT_AGLC', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPT_GRPB', ..property][[1]][1L]
# group C
S_groupC <- microorganismsDT %>% filter(genus == "Streptococcus",
species %in% c("equisimilis", "equi",
"zooepidemicus", "dysgalactiae")) %>%
pull(property)
x[x %in% S_groupC] <- microorganismsDT[mo == 'B_STRPT_GRC', ..property][[1]][1L]
x[x %in% S_groupC] <- microorganismsDT[mo == 'B_STRPT_GRPC', ..property][[1]][1L]
if (Lancefield == "all") {
# all Enterococci
x[x %like% "^(Enterococcus|B_ENTRC)"] <- microorganismsDT[mo == 'B_STRPT_GRD', ..property][[1]][1L]
x[x %like% "^(Enterococcus|B_ENTRC)"] <- microorganismsDT[mo == 'B_STRPT_GRPD', ..property][[1]][1L]
}
# group F - S. anginosus
x[x == microorganismsDT[mo == 'B_STRPT_ANG', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPT_GRF', ..property][[1]][1L]
x[x == microorganismsDT[mo == 'B_STRPT_ANGN', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPT_GRPF', ..property][[1]][1L]
# group H - S. sanguinis
x[x == microorganismsDT[mo == 'B_STRPT_SAN', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPT_GRH', ..property][[1]][1L]
x[x == microorganismsDT[mo == 'B_STRPT_SNGN', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPT_GRPH', ..property][[1]][1L]
# group K - S. salivarius
x[x == microorganismsDT[mo == 'B_STRPT_SAL', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPT_GRK', ..property][[1]][1L]
x[x == microorganismsDT[mo == 'B_STRPT_SLVR', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPT_GRPK', ..property][[1]][1L]
}
# Wrap up ----------------------------------------------------------------
@ -1672,6 +1715,10 @@ exec_as.mo <- function(x, @@ -1672,6 +1715,10 @@ exec_as.mo <- function(x,
print(mo_renamed())
}
if (old_mo_warning == TRUE) {
warning("The input contained old microorganism IDs from previous versions of this package. Please use as.mo() on these old codes.\nSUPPORT FOR THIS WILL BE DROPPED IN A FUTURE VERSION.", call. = FALSE)
}
x
}
@ -1682,6 +1729,7 @@ empty_result <- function(x) { @@ -1682,6 +1729,7 @@ empty_result <- function(x) {
#' @importFrom crayon italic
was_renamed <- function(name_old, name_new, ref_old = "", ref_new = "", mo = "") {
newly_set <- data.frame(old_name = name_old,
old_ref = ref_old,
new_name = name_new,
new_ref = ref_new,
mo = mo,
@ -1757,7 +1805,7 @@ pillar_shaft.mo <- function(x, ...) { @@ -1757,7 +1805,7 @@ pillar_shaft.mo <- function(x, ...) {
#' @noRd
summary.mo <- function(object, ...) {
# unique and top 1-3
x <- object
x <- as.mo(object)
top_3 <- unname(top_freq(freq(x), 3))
c("Class" = "mo",
"<NA>" = length(x[is.na(x)]),
@ -1803,7 +1851,7 @@ as.data.frame.mo <- function(x, ...) { @@ -1803,7 +1851,7 @@ as.data.frame.mo <- function(x, ...) {
"[<-.mo" <- function(i, j, ..., value) {
y <- NextMethod()
attributes(y) <- attributes(i)
class_integrity_check(y, "microbial code", AMR::microorganisms$mo)
class_integrity_check(y, "microbial code", c(as.character(AMR::microorganisms$mo), as.character(microorganisms.translation$mo_old)))
}
#' @exportMethod [[<-.mo
#' @export
@ -1811,7 +1859,7 @@ as.data.frame.mo <- function(x, ...) { @@ -1811,7 +1859,7 @@ as.data.frame.mo <- function(x, ...) {
"[[<-.mo" <- function(i, j, ..., value) {
y <- NextMethod()
attributes(y) <- attributes(i)
class_integrity_check(y, "microbial code", AMR::microorganisms$mo)
class_integrity_check(y, "microbial code", c(as.character(AMR::microorganisms$mo), as.character(microorganisms.translation$mo_old)))
}
#' @exportMethod c.mo
#' @export
@ -1819,7 +1867,7 @@ as.data.frame.mo <- function(x, ...) { @@ -1819,7 +1867,7 @@ as.data.frame.mo <- function(x, ...) {
c.mo <- function(x, ...) {
y <- NextMethod()
attributes(y) <- attributes(x)
class_integrity_check(y, "microbial code", AMR::microorganisms$mo)
class_integrity_check(y, "microbial code", c(as.character(AMR::microorganisms$mo), as.character(microorganisms.translation$mo_old)))
}
#' @rdname as.mo
@ -1898,9 +1946,12 @@ print.mo_renamed <- function(x, ...) { @@ -1898,9 +1946,12 @@ print.mo_renamed <- function(x, ...) {
}
for (i in 1:nrow(x)) {
message(blue(paste0("NOTE: ",
italic(x$old_name[i]), " was renamed ", italic(x$new_name[i]),
" (", gsub("et al.", italic("et al."), x$new_ref[i]), ")",
" (", x$mo[i], ")")))
italic(x$old_name[i]), ifelse(x$old_ref[i] %in% c("", NA), "",
paste0(" (", gsub("et al.", italic("et al."), x$old_ref[i]), ")")),
" was renamed ",
italic(x$new_name[i]), ifelse(x$new_ref[i] %in% c("", NA), "",
paste0(" (", gsub("et al.", italic("et al."), x$new_ref[i]), ")")),
" [", x$mo[i], "]")))
}
}

2
R/mo_history.R

@ -83,7 +83,7 @@ get_mo_history <- function(x, uncertainty_level, force = FALSE, disable = FALSE) @@ -83,7 +83,7 @@ get_mo_history <- function(x, uncertainty_level, force = FALSE, disable = FALSE)
if (base::is.null(history)) {
result <- NA
} else {
result <- data.frame(x = toupper(x), stringsAsFactors = FALSE) %>%
result <- data.frame(x = as.character(toupper(x)), stringsAsFactors = FALSE) %>%
left_join(history, by = "x") %>%
pull(mo)
}

2
R/rsi.R

@ -197,7 +197,7 @@ exec_as.rsi <- function(method, x, mo, ab, guideline) { @@ -197,7 +197,7 @@ exec_as.rsi <- function(method, x, mo, ab, guideline) {
mo_order <- as.mo(mo_order(mo))
mo_becker <- as.mo(mo, Becker = TRUE)
mo_lancefield <- as.mo(mo, Lancefield = TRUE)
guideline_param <- toupper(guideline)
if (guideline_param %in% c("CLSI", "EUCAST")) {
guideline_param <- AMR::rsi_translation %>%

BIN
R/sysdata.rda

Binary file not shown.

46
R/zzz.R

@ -54,29 +54,29 @@ @@ -54,29 +54,29 @@
.onAttach <- function(...) {
if (interactive() & !isFALSE(getOption("AMR_survey"))) {
options(AMR_survey = FALSE)
console_width <- options()$width - 1
url <- "https://www.surveymonkey.com/r/AMR_for_R"
txt <- paste0("Thanks for using the AMR package! ",
"As researchers, we are interested in how and why you use this package and if there are things you're missing from it. ",
"Please fill in our 2-minute survey at: ", url, ". ",
"This message can be turned off with: options(AMR_survey = FALSE)")
# make it honour new lines bases on console width:
txt <- unlist(strsplit(txt, " "))
txt_new <- ""
total_chars <- 0
for (i in 1:length(txt)) {
total_chars <- total_chars + nchar(txt[i]) + 1
if (total_chars > console_width) {
txt_new <- paste0(txt_new, "\n")
total_chars <- 0
}
txt_new <- paste0(txt_new, txt[i], " ")
}
# packageStartupMessage(txt_new)
}
# if (interactive() & !isFALSE(getOption("AMR_survey"))) {
# options(AMR_survey = FALSE)
# console_width <- options()$width - 1
# url <- "https://www.surveymonkey.com/r/AMR_for_R"
# txt <- paste0("Thanks for using the AMR package! ",
# "As researchers, we are interested in how and why you use this package and if there are things you're missing from it. ",
# "Please fill in our 2-minute survey at: ", url, ". ",
# "This message can be turned off with: options(AMR_survey = FALSE)")
#
# # make it honour new lines bases on console width:
# txt <- unlist(strsplit(txt, " "))
# txt_new <- ""
# total_chars <- 0
# for (i in 1:length(txt)) {
# total_chars <- total_chars + nchar(txt[i]) + 1
# if (total_chars > console_width) {
# txt_new <- paste0(txt_new, "\n")
# total_chars <- 0
# }
# txt_new <- paste0(txt_new, txt[i], " ")
# }
# # packageStartupMessage(txt_new)
# }
}
#' @importFrom data.table as.data.table setkey

2
cran-comments.md

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
# Version 0.8.0
* A NOTE for having a data directory over 3 MB. This is needed to offer users reference data for the complete taxonomy of microorganisms - one of the most important features of this pacakge. Has been this way since version 0.3.0.
* This package writes lines to `[user library]/AMR/inst/mo_history/mo_history.csv` when using the `as.mo()` function. Users are notified about this. The CSV file is never newly created or deleted by this package, it only changes this file to improve speed and reliability of the `as.mo()` function. Staged install still works. The source code was taken from the `extrafont` package on CRAN (version 0.17), that writes to the package folder in the user library exactly the same way. See the source code of `set_mo_history()` and `clear_mo_history()`.
* This package writes lines to `[user library]/AMR/mo_history/mo_history.csv` when using the `as.mo()` function, in the exact same way (and borrowed from) the `extrafont` package on CRAN (version 0.17) writes to the package folder. Users are notified about this and staged install still works. The CSV file is never newly created or deleted by this package, it only changes this file to improve speed and reliability of the `as.mo()` function. See the source code of `set_mo_history()` and `clear_mo_history()`.

BIN
data-raw/DRGLST1.xlsx

Binary file not shown.

7
data-raw/internals.R

@ -33,8 +33,12 @@ translations_file <- utils::read.delim(file = "data-raw/translations.tsv", @@ -33,8 +33,12 @@ translations_file <- utils::read.delim(file = "data-raw/translations.tsv",
allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1"
quote = "")
# Old microorganism codes -------------------------------------------------
microorganisms.translation <- readRDS("data-raw/microorganisms.translation.rds")
# Export to package as internal data ----
usethis::use_data(eucast_rules_file, translations_file,
usethis::use_data(eucast_rules_file, translations_file, microorganisms.translation,
internal = TRUE,
overwrite = TRUE,
version = 2)
@ -42,6 +46,7 @@ usethis::use_data(eucast_rules_file, translations_file, @@ -42,6 +46,7 @@ usethis::use_data(eucast_rules_file, translations_file,
# Remove from global environment ----
rm(eucast_rules_file)
rm(translations_file)
rm(microorganisms.translation)
# Clean mo history ----
mo_history_file <- file.path(file.path(system.file(package = "AMR"), "mo_history"), "mo_history.csv")

BIN
data-raw/microorganisms.translation.rds

Binary file not shown.

261
data-raw/reproduction_of_microorganisms.R

@ -102,22 +102,38 @@ MOs <- data_total %>% @@ -102,22 +102,38 @@ MOs <- data_total %>%
& !order %in% c("Eurotiales", "Microascales", "Mucorales", "Saccharomycetales", "Schizosaccharomycetales", "Tremellales", "Onygenales", "Pneumocystales"))
)
# or the genus has to be one of the genera we found in our hospitals last decades (Northern Netherlands, 2002-2018)
| genus %in% c("Absidia", "Acremonium", "Actinotignum", "Alternaria", "Anaerosalibacter", "Ancylostoma", "Anisakis", "Apophysomyces",
"Arachnia", "Ascaris", "Aureobacterium", "Aureobasidium", "Balantidum", "Bilophilia", "Branhamella", "Brochontrix",
"Brugia", "Calymmatobacterium", "Catabacter", "Cdc", "Chilomastix", "Chryseomonas", "Cladophialophora", "Cladosporium",
"Clonorchis", "Cordylobia", "Curvularia", "Demodex", "Dermatobia", "Diphyllobothrium", "Dracunculus", "Echinococcus",
"Enterobius", "Euascomycetes", "Exophiala", "Fasciola", "Fusarium", "Hendersonula", "Hymenolepis", "Hypomyces", "Kloeckera",
"Koserella", "Larva", "Leishmania", "Lelliottia", "Loa", "Lumbricus", "Malassezia", "Metagonimus", "Molonomonas",
"Mucor", "Nattrassia", "Necator", "Nectria", "Novospingobium", "Onchocerca", "Opistorchis", "Paragonimus", "Paramyxovirus",
"Pediculus", "Phoma", "Phthirus", "Pityrosporum", "Pseudallescheria", "Pulex", "Rhizomucor", "Rhizopus", "Rhodotorula",
"Salinococcus", "Sanguibacteroides", "Schistosoma", "Scopulariopsis", "Scytalidium", "Sporobolomyces", "Stomatococcus",
"Strongyloides", "Syncephalastraceae", "Taenia", "Torulopsis", "Trichinella", "Trichobilharzia", "Trichoderma", "Trichomonas",
"Trichosporon", "Trichuris", "Trypanosoma", "Wuchereria")
| genus %in% c("Absidia", "Acremonium", "Actinotignum", "Aedes", "Alternaria", "Anaerosalibacter", "Ancylostoma", "Angiostrongylus",
"Anisakis", "Anopheles", "Apophysomyces", "Arachnia", "Ascaris", "Aureobacterium", "Aureobasidium", "Balantidum", "Basidiobolus",
"Beauveria", "Bilophilia", "Branhamella", "Brochontrix", "Brugia", "Calymmatobacterium", "Capillaria", "Catabacter", "Cdc", "Chaetomium",
"Chilomastix", "Chryseomonas", "Chrysonilia", "Cladophialophora", "Cladosporium", "Clonorchis", "Conidiobolus", "Contracaecum",
"Cordylobia", "Curvularia", "Demodex", "Dermatobia", "Dicrocoelium", "Dioctophyma", "Diphyllobothrium", "Dipylidium", "Dirofilaria",
"Dracunculus", "Echinococcus", "Echinostoma", "Enterobius", "Enteromonas", "Euascomycetes", "Exophiala", "Exserohilum", "Fasciola",
"Fasciolopsis", "Fonsecaea", "Fusarium", "Gnathostoma", "Hendersonula", "Heterophyes", "Hymenolepis", "Hypomyces", "Hysterothylacium",
"Kloeckera", "Koserella", "Larva", "Lecythophora", "Leishmania", "Lelliottia", "Leptomyxida", "Leptosphaeria", "Loa", "Lucilia",
"Lumbricus", "Malassezia", "Malbranchea", "Mansonella", "Mesocestoides", "Metagonimus", "Metarrhizium", "Molonomonas", "Mortierella",
"Mucor", "Multiceps", "Mycocentrospora", "Nanophetus", "Nattrassia", "Necator", "Nectria", "Novospingobium", "Ochroconis",
"Oesophagostomum", "Oidiodendron", "Onchocerca", "Opisthorchis", "Opistorchis", "Paragonimus", "Paramyxovirus", "Pediculus",
"Phlebotomus", "Phocanema", "Phoma", "Phthirus", "Piedraia", "Pithomyces", "Pityrosporum", "Pseudallescheria", "Pseudoterranova",
"Pulex", "Retortamonas", "Rhizomucor", "Rhizopus", "Rhodotorula", "Salinococcus", "Sanguibacteroides", "Sarcophagidae", "Sarcoptes",
"Schistosoma", "Scolecobasidium", "Scopulariopsis", "Scytalidium", "Spirometra", "Sporobolomyces", "Stachybotrys", "Stenotrophomononas",
"Stomatococcus", "Strongyloides", "Syncephalastraceae", "Syngamus", "Taenia", "Ternidens", "Torulopsis", "Toxocara", "Trichinella",
"Trichobilharzia", "Trichoderma", "Trichomonas", "Trichosporon", "Trichostrongylus", "Trichuris", "Tritirachium", "Trombicula",
"Trypanosoma", "Tunga", "Wuchereria")
# or the taxonomic entry is old - the species was renamed
| !is.na(col_id_new)
) %>%
# really no Plantae (e.g. Dracunculus exist both as worm and as plant)
filter(kingdom != "Plantae")
filter(kingdom != "Plantae") %>%
filter(!rank %in% c("kingdom", "phylum", "class", "order", "family", "genus"))
# include all ranks other than species for the included species
MOs <- MOs %>% bind_rows(data_total %>%
filter((kingdom %in% MOs$kingdom & rank == "kingdom")
| (phylum %in% MOs$phylum & rank == "phylum")
| (class %in% MOs$class & rank == "class")
| (order %in% MOs$order & rank == "order")
| (family %in% MOs$family & rank == "family")
| (genus %in% MOs$genus & rank == "genus")))
# filter old taxonomic names so only the ones with an existing reference will be kept
MOs <- MOs %>%
@ -193,6 +209,11 @@ MOs.old <- MOs %>% @@ -193,6 +209,11 @@ MOs.old <- MOs %>%
distinct(fullname, .keep_all = TRUE) %>%
arrange(col_id)
MO.bak <- MOs
MOold.bak <- MOs.old
MOs <- MO.bak
MOs.old <- MOold.bak
MOs <- MOs %>%
filter(is.na(col_id_new) | source == "DSMZ") %>%
transmute(col_id,
@ -215,20 +236,93 @@ MOs <- MOs %>% @@ -215,20 +236,93 @@ MOs <- MOs %>%
species_id = gsub(".*/([a-f0-9]+)", "\\1", species_id),
source) %>%
#distinct(fullname, .keep_all = TRUE) %>%
filter(!grepl("unassigned", fullname, ignore.case = TRUE))
# Filter out the DSMZ records that were renamed and are now in MOs.old
MOs <- MOs %>%
filter(!(source == "DSMZ" & fullname %in% MOs.old$fullname),
!(source == "DSMZ" & fullname %in% (MOs %>% filter(source == "CoL") %>% pull(fullname)))) %>%
distinct(fullname, .keep_all = TRUE)
filter(!grepl("unassigned", fullname, ignore.case = TRUE)) %>%
# prefer DSMZ over CoL, since that's more recent
arrange(desc(source)) %>%
distinct(kingdom, fullname, .keep_all = TRUE)
# # Filter out the DSMZ records that were renamed and are now in MOs.old
# MOs <- MOs %>%
# filter(!(source == "DSMZ" & fullname %in% MOs.old$fullname)) %>%
# distinct(kingdom, fullname, .keep_all = TRUE) %>%
# filter(fullname != "")
# remove all genera that have no species - they are irrelevant for microbiology and almost all from the kingdom of Animalia
to_remove <- MOs %>%
filter(!kingdom %in% c("Bacteria", "Protozoa")) %>%
group_by(kingdom, genus) %>%
count() %>%
filter(n == 1) %>%
ungroup() %>%
mutate(kingdom_genus = paste(kingdom, genus)) %>%
pull(kingdom_genus)
MOs <- MOs %>% filter(!(paste(kingdom, genus) %in% to_remove))
rm(to_remove)
# add CoL ID from MOs.bak, for the cases where DSMZ took preference
MOs <- MOs %>%
mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
select(-col_id) %>%
left_join(MO.bak %>%
filter(is.na(col_id_new), !is.na(col_id)) %>%
transmute(col_id, kingdom_fullname = trimws(paste(kingdom, genus, species, subspecies))),
by = "kingdom_fullname") %>%
select(col_id, everything(), -kingdom_fullname)
MOs.old <- MOs.old %>%
# remove the ones that are in the MOs data set
filter(col_id_new %in% MOs$col_id) %>%
# and remove the ones that have the exact same fullname in the MOs data set, like Moraxella catarrhalis
left_join(MOs, by = "fullname") %>%
filter(col_id_new != col_id.y | is.na(col_id.y)) %>%
select(col_id = col_id.x, col_id_new, fullname, ref = ref.x)
# remove the records that are in MOs.old
MOs <- MOs %>% filter(!fullname %in% MOs.old$fullname)
# what characters are in the fullnames?
table(sort(unlist(strsplit(x = paste(MOs$fullname, collapse = ""), split = ""))))
table(MOs$kingdom, MOs$rank)
table(AMR::microorganisms$kingdom, AMR::microorganisms$rank)
# set prevalence per species
MOs <- MOs %>%
mutate(prevalence = case_when(
class == "Gammaproteobacteria"
| genus %in% c("Enterococcus", "Staphylococcus", "Streptococcus")
~ 1,
phylum %in% c("Proteobacteria",
"Firmicutes",
"Actinobacteria",
"Sarcomastigophora")
| genus %in% c("Aspergillus",
"Bacteroides",
"Candida",
"Capnocytophaga",
"Chryseobacterium",
"Cryptococcus",
"Elisabethkingia",
"Flavobacterium",
"Fusobacterium",
"Giardia",
"Leptotrichia",
"Mycoplasma",
"Prevotella",
"Rhodotorula",
"Treponema",
"Trichophyton",
"Ureaplasma")
| rank %in% c("kingdom", "phylum", "class", "order", "family")
~ 2,
TRUE ~ 3
))
# Add abbreviations so we can easily know which ones are which ones.
# These will become valid and unique microbial IDs for the AMR package.
MOs <- MOs %>%
arrange(prevalence, fullname) %>%
group_by(kingdom) %>%
mutate(abbr_other = case_when(
rank == "family" ~ paste0("[FAM]_",
@ -270,14 +364,14 @@ MOs <- MOs %>% @@ -270,14 +364,14 @@ MOs <- MOs %>%
# species abbreviations may be the same between genera
# because the genus abbreviation is part of the abbreviation
mutate(abbr_species = abbreviate(species,
minlength = 3,
use.classes = FALSE,
minlength = 4,
use.classes = TRUE,
method = "both.sides")) %>%
ungroup() %>%
group_by(genus, species) %>%
mutate(abbr_subspecies = abbreviate(subspecies,
minlength = 3,
use.classes = FALSE,
minlength = 4,
use.classes = TRUE,
method = "both.sides")) %>%
ungroup() %>%
# remove trailing underscores
@ -302,9 +396,6 @@ MOs <- MOs %>% @@ -302,9 +396,6 @@ MOs <- MOs %>%
# put `mo` in front, followed by the rest
select(mo, everything(), -abbr_other, -abbr_genus, -abbr_species, -abbr_subspecies)
# remove empty fullnames
MOs <- MOs %>% filter(fullname != "")
# add non-taxonomic entries
MOs <- MOs %>%
bind_rows(
@ -324,6 +415,7 @@ MOs <- MOs %>% @@ -324,6 +415,7 @@ MOs <- MOs %>%
ref = NA_character_,
species_id = "",
source = "manually added",
prevalence = 1,
stringsAsFactors = FALSE),
data.frame(mo = "B_GRAMN",
col_id = NA_integer_,
@ -340,6 +432,7 @@ MOs <- MOs %>% @@ -340,6 +432,7 @@ MOs <- MOs %>%
ref = NA_character_,
species_id = "",
source = "manually added",
prevalence = 1,
stringsAsFactors = FALSE),
data.frame(mo = "B_GRAMP",
col_id = NA_integer_,
@ -356,6 +449,7 @@ MOs <- MOs %>% @@ -356,6 +449,7 @@ MOs <- MOs %>%
ref = NA_character_,
species_id = "",
source = "manually added",
prevalence = 1,
stringsAsFactors = FALSE),
data.frame(mo = "F_YEAST",
col_id = NA_integer_,
@ -372,6 +466,7 @@ MOs <- MOs %>% @@ -372,6 +466,7 @@ MOs <- MOs %>%
ref = NA_character_,
species_id = "",
source = "manually added",
prevalence = 2,
stringsAsFactors = FALSE),
data.frame(mo = "F_FUNGUS",
col_id = NA_integer_,
@ -388,11 +483,12 @@ MOs <- MOs %>% @@ -388,11 +483,12 @@ MOs <- MOs %>%
ref = NA_character_,
species_id = "",
source = "manually added",
prevalence = 2,
stringsAsFactors = FALSE),
# CoNS
MOs %>%
filter(genus == "Staphylococcus", species == "epidermidis") %>% .[1,] %>%
mutate(mo = gsub("EPI", "CNS", mo),
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_CONS", mo),
col_id = NA_integer_,
species = "coagulase-negative",
fullname = "Coagulase-negative Staphylococcus (CoNS)",
@ -402,7 +498,7 @@ MOs <- MOs %>% @@ -402,7 +498,7 @@ MOs <- MOs %>%
# CoPS
MOs %>%
filter(genus == "Staphylococcus", species == "epidermidis") %>% .[1,] %>%
mutate(mo = gsub("EPI", "CPS", mo),
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_COPS", mo),
col_id = NA_integer_,
species = "coagulase-positive",
fullname = "Coagulase-positive Staphylococcus (CoPS)",
@ -413,18 +509,20 @@ MOs <- MOs %>% @@ -413,18 +509,20 @@ MOs <- MOs %>%
MOs %>%
filter(genus == "Streptococcus", species == "pyogenes") %>% .[1,] %>%
# we can keep all other details, since S. pyogenes is the only member of group A
mutate(mo = gsub("PYO", "GRA", mo),
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPA", mo),
species = "group A" ,
fullname = "Streptococcus group A"),
fullname = "Streptococcus group A",
source = "manually added"),
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
# we can keep all other details, since S. agalactiae is the only member of group B
mutate(mo = gsub("AGA", "GRB", mo),
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPB", mo),
species = "group B" ,
fullname = "Streptococcus group B"),
fullname = "Streptococcus group B",
source = "manually added"),
MOs %>%
filter(genus == "Streptococcus", species == "dysgalactiae") %>% .[1,] %>%
mutate(mo = gsub("DYS", "GRC", mo),
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPC", mo),
col_id = NA_integer_,
species = "group C" ,
fullname = "Streptococcus group C",
@ -433,7 +531,7 @@ MOs <- MOs %>% @@ -433,7 +531,7 @@ MOs <- MOs %>%
source = "manually added"),
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("AGA", "GRD", mo),
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPD", mo),
col_id = NA_integer_,
species = "group D" ,
fullname = "Streptococcus group D",
@ -442,7 +540,7 @@ MOs <- MOs %>% @@ -442,7 +540,7 @@ MOs <- MOs %>%
source = "manually added"),
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("AGA", "GRF", mo),
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPF", mo),
col_id = NA_integer_,
species = "group F" ,
fullname = "Streptococcus group F",
@ -451,7 +549,7 @@ MOs <- MOs %>% @@ -451,7 +549,7 @@ MOs <- MOs %>%
source = "manually added"),
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("AGA", "GRG", mo),
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPG", mo),
col_id = NA_integer_,
species = "group G" ,
fullname = "Streptococcus group G",
@ -460,7 +558,7 @@ MOs <- MOs %>% @@ -460,7 +558,7 @@ MOs <- MOs %>%
source = "manually added"),
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("AGA", "GRH", mo),
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPH", mo),
col_id = NA_integer_,
species = "group H" ,
fullname = "Streptococcus group H",
@ -469,7 +567,7 @@ MOs <- MOs %>% @@ -469,7 +567,7 @@ MOs <- MOs %>%
source = "manually added"),
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("AGA", "GRK", mo),
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPK", mo),
col_id = NA_integer_,
species = "group K" ,
fullname = "Streptococcus group K",
@ -479,7 +577,7 @@ MOs <- MOs %>% @@ -479,7 +577,7 @@ MOs <- MOs %>%
# Beta haemolytic Streptococci
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("AGA", "HAE", mo),
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_HAEM", mo),
col_id = NA_integer_,
species = "beta-haemolytic" ,
fullname = "Beta-haemolytic Streptococcus",
@ -489,7 +587,7 @@ MOs <- MOs %>% @@ -489,7 +587,7 @@ MOs <- MOs %>%
# Viridans Streptococci
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("AGA", "VIR", mo),
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_VIRI", mo),
col_id = NA_integer_,
species = "viridans" ,
fullname = "Viridans Group Streptococcus (VGS)",
@ -499,7 +597,7 @@ MOs <- MOs %>% @@ -499,7 +597,7 @@ MOs <- MOs %>%
# Milleri Streptococci
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("AGA", "MIL", mo),
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_MILL", mo),
col_id = NA_integer_,
species = "milleri" ,
fullname = "Milleri Group Streptococcus (MGS)",
@ -509,7 +607,7 @@ MOs <- MOs %>% @@ -509,7 +607,7 @@ MOs <- MOs %>%
# Trichomonas vaginalis is missing, same order as Dientamoeba
MOs %>%
filter(fullname == "Dientamoeba") %>%
mutate(mo = gsub("DNTMB", "THMNS", mo),
mutate(mo = gsub("(.*?)_.*", "\\1_THMNS", mo),
col_id = NA,
fullname = "Trichomonas",
family = "Trichomonadidae",
@ -519,8 +617,7 @@ MOs <- MOs %>% @@ -519,8 +617,7 @@ MOs <- MOs %>%
species_id = ""),
MOs %>%
filter(fullname == "Dientamoeba fragilis") %>%
mutate(mo = gsub("DNTMB", "THMNS", mo),
mo = gsub("FRA", "VAG", mo),
mutate(mo = gsub("(.*?)_.*", "\\1_THMNS_VAG", mo),
col_id = NA,
fullname = "Trichomonas vaginalis",
family = "Trichomonadidae",
@ -531,7 +628,7 @@ MOs <- MOs %>% @@ -531,7 +628,7 @@ MOs <- MOs %>%
species_id = ""),
MOs %>% # add family as such too
filter(fullname == "Monocercomonadidae") %>%
mutate(mo = gsub("MNCRCMND", "TRCHMNDD", mo),
mutate