Browse Source

memory for as.mo()

new-mo-algorithm
parent
commit
fdffc2791b
  1. 6
      .gitlab-ci.yml
  2. 4
      DESCRIPTION
  3. 1
      NAMESPACE
  4. 53
      NEWS.md
  5. 32
      R/misc.R
  6. 155
      R/mo.R
  7. 74
      R/mo_history.R
  8. 21
      R/mo_source.R
  9. 10
      README.md
  10. 10
      _pkgdown.yml
  11. 3
      appveyor.yml
  12. 2
      docs/LICENSE-text.html
  13. 375
      docs/articles/AMR.html
  14. BIN
      docs/articles/AMR_files/figure-html/plot 1-1.png
  15. BIN
      docs/articles/AMR_files/figure-html/plot 3-1.png
  16. BIN
      docs/articles/AMR_files/figure-html/plot 4-1.png
  17. BIN
      docs/articles/AMR_files/figure-html/plot 5-1.png
  18. 4
      docs/articles/EUCAST.html
  19. 4
      docs/articles/G_test.html
  20. 4
      docs/articles/SPSS.html
  21. 38
      docs/articles/WHONET.html
  22. 4
      docs/articles/atc_property.html
  23. 99
      docs/articles/benchmarks.html
  24. BIN
      docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png
  25. BIN
      docs/articles/benchmarks_files/figure-html/unnamed-chunk-6-1.png
  26. 4
      docs/articles/freq.html
  27. 2
      docs/articles/index.html
  28. 4
      docs/articles/mo_property.html
  29. 4
      docs/articles/resistance_predict.html
  30. 2
      docs/authors.html
  31. 2
      docs/index.html
  32. 22
      docs/news/index.html
  33. 2
      docs/reference/AMR-deprecated.html
  34. 2
      docs/reference/AMR.html
  35. 2
      docs/reference/WHOCC.html
  36. 2
      docs/reference/WHONET.html
  37. 2
      docs/reference/abname.html
  38. 2
      docs/reference/age.html
  39. 2
      docs/reference/age_groups.html
  40. 2
      docs/reference/antibiotics.html
  41. 2
      docs/reference/as.atc.html
  42. 2
      docs/reference/as.mic.html
  43. 18
      docs/reference/as.mo.html
  44. 6
      docs/reference/as.rsi.html
  45. 2
      docs/reference/atc_online.html
  46. 2
      docs/reference/atc_property.html
  47. 2
      docs/reference/availability.html
  48. 2
      docs/reference/catalogue_of_life.html
  49. 2
      docs/reference/catalogue_of_life_version.html
  50. 2
      docs/reference/count.html
  51. 2
      docs/reference/eucast_rules.html
  52. 2
      docs/reference/filter_ab_class.html
  53. 2
      docs/reference/first_isolate.html
  54. 2
      docs/reference/freq.html
  55. 2
      docs/reference/g.test.html
  56. 2
      docs/reference/get_locale.html
  57. 2
      docs/reference/ggplot_rsi.html
  58. 2
      docs/reference/guess_ab_col.html
  59. 19
      docs/reference/index.html
  60. 2
      docs/reference/join.html
  61. 2
      docs/reference/key_antibiotics.html
  62. 2
      docs/reference/kurtosis.html
  63. 2
      docs/reference/like.html
  64. 2
      docs/reference/mdro.html
  65. 2
      docs/reference/microorganisms.codes.html
  66. 2
      docs/reference/microorganisms.html
  67. 2
      docs/reference/microorganisms.old.html
  68. 2
      docs/reference/mo_property.html
  69. 2
      docs/reference/mo_source.html
  70. 2
      docs/reference/p.symbol.html
  71. 2
      docs/reference/portion.html
  72. 2
      docs/reference/read.4D.html
  73. 2
      docs/reference/resistance_predict.html
  74. 2
      docs/reference/rsi.html
  75. 2
      docs/reference/septic_patients.html
  76. 2
      docs/reference/skewness.html
  77. 14
      man/as.mo.Rd
  78. BIN
      man/figures/logo_certe.png
  79. BIN
      man/figures/logo_eh1h.png
  80. BIN
      man/figures/logo_interreg.png
  81. BIN
      man/figures/logo_rug.png
  82. BIN
      man/figures/logo_umcg.png
  83. 34
      tests/testthat/test-mo_history.R
  84. 40
      vignettes/benchmarks.Rmd

6
.gitlab-ci.yml

@ -61,10 +61,7 @@ cache: @@ -61,10 +61,7 @@ cache:
R:
stage: build
allow_failure: false
variables:
WARNINGS_ARE_ERRORS: 1
script:
- export WARNINGS_ARE_ERRORS=1
# remove vignettes folder and get VignetteBuilder field out of DESCRIPTION file
- rm -rf vignettes
- Rscript -e 'd <- read.dcf("DESCRIPTION"); d[, colnames(d) == "VignetteBuilder"] <- NA; write.dcf(d, "DESCRIPTION")'
@ -86,7 +83,8 @@ coverage: @@ -86,7 +83,8 @@ coverage:
- master
script:
- apt-get install --yes git
- Rscript -e 'cc <- covr::package_coverage(); covr::codecov(coverage = cc, token = "50ffa0aa-fee0-4f8b-a11d-8c7edc6d32ca"); cat("Code coverage:", covr::percent_coverage(cc))'
# codecov token is set in https://gitlab.com/msberends/AMR/settings/ci_cd
- Rscript -e 'cc <- covr::package_coverage(); covr::codecov(coverage = cc, token = "${codecov_token}"); cat("Code coverage:", covr::percent_coverage(cc))'
coverage: '/Code coverage: \d+\.\d+/'
pages:

4
DESCRIPTION

@ -1,6 +1,6 @@ @@ -1,6 +1,6 @@
Package: AMR
Version: 0.5.0.9022
Date: 2019-03-12
Version: 0.5.0.9023
Date: 2019-03-15
Title: Antimicrobial Resistance Analysis
Authors@R: c(
person(

1
NAMESPACE

@ -71,6 +71,7 @@ export(atc_umcg) @@ -71,6 +71,7 @@ export(atc_umcg)
export(availability)
export(brmo)
export(catalogue_of_life_version)
export(clean_mo_history)
export(count_I)
export(count_IR)
export(count_R)

53
NEWS.md

@ -101,34 +101,35 @@ We've got a new website: [https://msberends.gitlab.io/AMR](https://msberends.git @@ -101,34 +101,35 @@ We've got a new website: [https://msberends.gitlab.io/AMR](https://msberends.git
* Function `guess_mo()` is now deprecated in favour of `as.mo()` and will be removed in future versions
* Function `guess_atc()` is now deprecated in favour of `as.atc()` and will be removed in future versions
* Improvements for `as.mo()`:
* Now handles incorrect spelling like `i` instead of `y` and `f` instead of `ph`:
```r
# mo_fullname() uses as.mo() internally
mo_fullname("Sthafilokockus aaureuz")
#> [1] "Staphylococcus aureus"
mo_fullname("S. klossi")
#> [1] "Staphylococcus kloosii"
```
* Now handles incorrect spelling, like `i` instead of `y` and `f` instead of `ph`:
```r
# mo_fullname() uses as.mo() internally
mo_fullname("Sthafilokockus aaureuz")
#> [1] "Staphylococcus aureus"
mo_fullname("S. klossi")
#> [1] "Staphylococcus kloosii"
```
* Uncertainty of the algorithm is now divided into four levels, 0 to 3, where the default `allow_uncertain = TRUE` is equal to uncertainty level 2. Run `?as.mo` for more info about these levels.
```r
# equal:
as.mo(..., allow_uncertain = TRUE)
as.mo(..., allow_uncertain = 2)
# also equal:
as.mo(..., allow_uncertain = FALSE)
as.mo(..., allow_uncertain = 0)
```
Using `as.mo(..., allow_uncertain = 3)` could lead to very unreliable results.
```r
# equal:
as.mo(..., allow_uncertain = TRUE)
as.mo(..., allow_uncertain = 2)
# also equal:
as.mo(..., allow_uncertain = FALSE)
as.mo(..., allow_uncertain = 0)
```
Using `as.mo(..., allow_uncertain = 3)` could lead to very unreliable results.
* All microbial IDs that are found with zero uncertainty are now saved to a local file `~/.Rhistory_mo`. Use the new function `clean_mo_history()` to delete this file, which resets the algorithms.
* Incoercible results will now be considered 'unknown', MO code `UNKNOWN`. On foreign systems, properties of these will be translated to all languages already previously supported: German, Dutch, French, Italian, Spanish and Portuguese:
```r
mo_genus("qwerty", language = "es")
# Warning:
# one unique value (^= 100.0%) could not be coerced and is considered 'unknown': "qwerty". Use mo_failures() to review it.
#> [1] "(género desconocido)"
```
```r
mo_genus("qwerty", language = "es")
# Warning:
# one unique value (^= 100.0%) could not be coerced and is considered 'unknown': "qwerty". Use mo_failures() to review it.
#> [1] "(género desconocido)"
```
* Fix for vector containing only empty values
* Finds better results when input is in other languages
* Better handling for subspecies

32
R/misc.R

@ -75,7 +75,9 @@ check_available_columns <- function(tbl, col.list, info = TRUE) { @@ -75,7 +75,9 @@ check_available_columns <- function(tbl, col.list, info = TRUE) {
col.list.bak <- col.list
# are they available as upper case or lower case then?
for (i in 1:length(col.list)) {
if (toupper(col.list[i]) %in% colnames(tbl)) {
if (is.null(col.list[i]) | isTRUE(is.na(col.list[i]))) {
col.list[i] <- NULL
} else if (toupper(col.list[i]) %in% colnames(tbl)) {
col.list[i] <- toupper(col.list[i])
} else if (tolower(col.list[i]) %in% colnames(tbl)) {
col.list[i] <- tolower(col.list[i])
@ -124,7 +126,7 @@ size_humanreadable <- function(bytes, decimals = 1) { @@ -124,7 +126,7 @@ size_humanreadable <- function(bytes, decimals = 1) {
out
}
#' @importFrom crayon blue bold
#' @importFrom crayon blue bold red
#' @importFrom dplyr %>% pull
search_type_in_df <- function(tbl, type) {
# try to find columns based on type
@ -151,16 +153,22 @@ search_type_in_df <- function(tbl, type) { @@ -151,16 +153,22 @@ search_type_in_df <- function(tbl, type) {
}
# -- date
if (type == "date") {
for (i in 1:ncol(tbl)) {
if (any(colnames(tbl) %like% "^(Specimen date)")) {
# WHONET support
found <- colnames(tbl)[colnames(tbl) %like% "^(Specimen date)"][1]
} else if ("Date" %in% class(tbl %>% pull(i)) | "POSIXct" %in% class(tbl %>% pull(i))) {
found <- colnames(tbl)[i]
break
if (any(colnames(tbl) %like% "^(specimen date|specimen_date|spec_date)")) {
# WHONET support
found <- colnames(tbl)[colnames(tbl) %like% "^(specimen date|specimen_date|spec_date)"][1]
if (!any(class(tbl %>% pull(found)) %in% c("Date", "POSIXct"))) {
stop(red(paste0("ERROR: Found column `", bold(found), "` to be used as input for `col_", type,
"`, but this column contains no valid dates. Transform its values to valid dates first.")),
call. = FALSE)
}
} else {
for (i in 1:ncol(tbl)) {
if (any(class(tbl %>% pull(i)) %in% c("Date", "POSIXct"))) {
found <- colnames(tbl)[i]
break
}
}
}
}
# -- patient id
if (type == "patient_id") {
@ -170,8 +178,8 @@ search_type_in_df <- function(tbl, type) { @@ -170,8 +178,8 @@ search_type_in_df <- function(tbl, type) {
}
# -- specimen
if (type == "specimen") {
if (any(colnames(tbl) %like% "(specimen type)")) {
found <- colnames(tbl)[colnames(tbl) %like% "(specimen type)"][1]
if (any(colnames(tbl) %like% "(specimen type|spec_type)")) {
found <- colnames(tbl)[colnames(tbl) %like% "(specimen type|spec_type)"][1]
} else if (any(colnames(tbl) %like% "^(specimen)")) {
found <- colnames(tbl)[colnames(tbl) %like% "^(specimen)"][1]
}

155
R/mo.R

@ -31,10 +31,12 @@ @@ -31,10 +31,12 @@
#' This excludes \emph{Enterococci} at default (who are in group D), use \code{Lancefield = "all"} to also categorise all \emph{Enterococci} as group D.
#' @param allow_uncertain a logical (\code{TRUE} or \code{FALSE}) or a value between 0 and 3 to indicate whether the input should be checked for less possible results, see Details
#' @param reference_df a \code{data.frame} to use for extra reference when translating \code{x} to a valid \code{mo}. See \code{\link{set_mo_source}} and \code{\link{get_mo_source}} to automate the usage of your own codes (e.g. used in your analysis or organisation).
#' @param ... other parameters passed on to functions
#' @rdname as.mo
#' @aliases mo
#' @keywords mo Becker becker Lancefield lancefield guess
#' @details
#' \strong{General info} \cr
#' A microbial ID from this package (class: \code{mo}) typically looks like these examples:\cr
#' \preformatted{
#' Code Full name
@ -53,7 +55,9 @@ @@ -53,7 +55,9 @@
#'
#' Values that cannot be coered will be considered 'unknown' and have an MO code \code{UNKNOWN}.
#'
#' Use the \code{\link{mo_property}} functions to get properties based on the returned code, see Examples.
#' Use the \code{\link{mo_property}_*} functions to get properties based on the returned code, see Examples.
#'
#' All IDs that are found with zero uncertainty are saved to a local file (\code{"~/.Rhistory_mo"}) to improve speed for every next time. Use \code{clean_mo_history()} to delete this file, which resets the algorithms. Only previous results will be used from this version of the \code{AMR} package, since the taxonomic tree may change in the future for any organism.
#'
#' \strong{Intelligent rules} \cr
#' This function uses intelligent rules to help getting fast and logical results. It tries to find matches in this order:
@ -174,12 +178,14 @@ @@ -174,12 +178,14 @@
#' df <- df %>%
#' mutate(mo = as.mo(paste(genus, species)))
#' }
as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE, reference_df = get_mo_source()) {
as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE, reference_df = get_mo_source(), ...) {
if (!"AMR" %in% base::.packages()) {
library("AMR")
# check onLoad() in R/zzz.R: data tables are created there.
}
mo_hist <- get_mo_history(x, force = isTRUE(list(...)$force_mo_history))
if (mo_source_isvalid(reference_df)
& isFALSE(Becker)
& isFALSE(Lancefield)
@ -211,6 +217,13 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE, @@ -211,6 +217,13 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
& isFALSE(Lancefield)) {
y <- x
} else if (sum(is.na(mo_hist)) == 0
& isFALSE(Becker)
& isFALSE(Lancefield)) {
# check previously found results
y <- mo_hist
} else if (all(tolower(x) %in% microorganismsDT$fullname_lower)
& isFALSE(Becker)
& isFALSE(Lancefield)) {
@ -229,13 +242,22 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE, @@ -229,13 +242,22 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
on = "fullname_lower",
"mo"][[1]]
}
# save them too
mo_hist <- read_mo_history(force = isTRUE(list(...)$force_mo_history))
if (any(!x %in% mo_hist$x)) {
for (i in 1:length(y)) {
set_mo_history(x[i], y[i], force = isTRUE(list(...)$force_mo_history))
}
}
} else {
# will be checked for mo class in validation and uses exec_as.mo internally if necessary
y <- mo_validate(x = x, property = "mo",
Becker = Becker, Lancefield = Lancefield,
allow_uncertain = allow_uncertain, reference_df = reference_df,
force_mo_history = isTRUE(list(...)$force_mo_history))
}
} else {
# will be checked for mo class in validation and uses exec_as.mo internally if necessary
y <- mo_validate(x = x, property = "mo",
Becker = Becker, Lancefield = Lancefield,
allow_uncertain = allow_uncertain, reference_df = reference_df)
}
structure(.Data = y, class = "mo")
}
@ -249,9 +271,14 @@ is.mo <- function(x) { @@ -249,9 +271,14 @@ is.mo <- function(x) {
#' @importFrom dplyr %>% pull left_join n_distinct progress_estimated filter distinct
#' @importFrom data.table data.table as.data.table setkey
#' @importFrom crayon magenta red blue silver italic has_color
exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
allow_uncertain = TRUE, reference_df = get_mo_source(),
property = "mo", clear_options = TRUE) {
exec_as.mo <- function(x,
Becker = FALSE,
Lancefield = FALSE,
allow_uncertain = TRUE,
reference_df = get_mo_source(),
property = "mo",
clear_options = TRUE,
force_mo_history = FALSE) {
if (!"AMR" %in% base::.packages()) {
library("AMR")
@ -412,7 +439,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, @@ -412,7 +439,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
# replace hemolytic by haemolytic
x <- gsub("ha?emoly", "haemoly", x)
# place minus back in streptococci
x <- gsub("(alpha|beta|gamma) ha?emoly", "\\1-haemoly", x)
x <- gsub("(alpha|beta|gamma).?ha?emoly", "\\1-haemoly", x)
# remove genus as first word
x <- gsub("^Genus ", "", x)
# allow characters that resemble others
@ -458,6 +485,13 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, @@ -458,6 +485,13 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
progress$tick()$print()
found <- microorganismsDT[mo == get_mo_history(x_backup[i], force = force_mo_history), ..property][[1]]
# previously found result
if (length(found) > 0) {
x[i] <- found[1L]
next
}
found <- microorganismsDT[mo == toupper(x_backup[i]), ..property][[1]]
# is a valid MO code
if (length(found) > 0) {
@ -469,6 +503,9 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, @@ -469,6 +503,9 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
# most probable: is exact match in fullname
if (length(found) > 0) {
x[i] <- found[1L]
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
next
}
@ -494,6 +531,9 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, @@ -494,6 +531,9 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
# return first genus that begins with x_trimmed, e.g. when "E. spp."
if (length(found) > 0) {
x[i] <- found[1L]
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
next
}
}
@ -515,50 +555,80 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, @@ -515,50 +555,80 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
if (!is.na(x_trimmed[i])) {
if (toupper(x_backup_without_spp[i]) %in% c('MRSA', 'MSSA', 'VISA', 'VRSA')) {
x[i] <- microorganismsDT[mo == 'B_STPHY_AUR', ..property][[1]][1L]
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
next
}
if (toupper(x_backup_without_spp[i]) %in% c('MRSE', 'MSSE')) {
x[i] <- microorganismsDT[mo == 'B_STPHY_EPI', ..property][[1]][1L]
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
next
}
if (toupper(x_backup_without_spp[i]) == "VRE"
| x_backup_without_spp[i] %like% '(enterococci|enterokok|enterococo)[a-z]*?$') {
x[i] <- microorganismsDT[mo == 'B_ENTRC', ..property][[1]][1L]
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
next
}
if (toupper(x_backup_without_spp[i]) %in% c("EHEC", "EPEC", "EIEC", "STEC", "ATEC")) {
x[i] <- microorganismsDT[mo == 'B_ESCHR_COL', ..property][[1]][1L]
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
next
}
if (toupper(x_backup_without_spp[i]) == 'MRPA') {
# multi resistant P. aeruginosa
x[i] <- microorganismsDT[mo == 'B_PSDMN_AER', ..property][[1]][1L]
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
next
}
if (toupper(x_backup_without_spp[i]) == 'CRS'
| toupper(x_backup_without_spp[i]) == 'CRSM') {
# co-trim resistant S. maltophilia
x[i] <- microorganismsDT[mo == 'B_STNTR_MAL', ..property][[1]][1L]
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
next
}
if (toupper(x_backup_without_spp[i]) %in% c('PISP', 'PRSP', 'VISP', 'VRSP')) {
# peni I, peni R, vanco I, vanco R: S. pneumoniae
x[i] <- microorganismsDT[mo == 'B_STRPT_PNE', ..property][[1]][1L]
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
next
}
if (x_backup_without_spp[i] %like% '^G[ABCDFGHK]S$') {
# Streptococci, like GBS = Group B Streptococci (B_STRPT_GRB)
x[i] <- microorganismsDT[mo == gsub("G([ABCDFGHK])S", "B_STRPT_GR\\1", x_backup_without_spp[i], ignore.case = TRUE), ..property][[1]][1L]
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
next
}
if (x_backup_without_spp[i] %like% '(streptococ|streptokok).* [ABCDFGHK]$') {
# Streptococci in different languages, like "estreptococos grupo B"
x[i] <- microorganismsDT[mo == gsub(".*(streptococ|streptokok|estreptococ).* ([ABCDFGHK])$", "B_STRPT_GR\\2", x_backup_without_spp[i], ignore.case = TRUE), ..property][[1]][1L]
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
next
}
if (x_backup_without_spp[i] %like% 'group [ABCDFGHK] (streptococ|streptokok|estreptococ)') {
# Streptococci in different languages, like "Group A Streptococci"
x[i] <- microorganismsDT[mo == gsub(".*group ([ABCDFGHK]) (streptococ|streptokok|estreptococ).*", "B_STRPT_GR\\1", x_backup_without_spp[i], ignore.case = TRUE), ..property][[1]][1L]
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
next
}
# CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese) ----
@ -567,6 +637,9 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, @@ -567,6 +637,9 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
| x_backup_without_spp[i] %like% '[ck]o?ns[^a-z]?$') {
# coerce S. coagulase negative
x[i] <- microorganismsDT[mo == 'B_STPHY_CNS', ..property][[1]][1L]
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
next
}
if (x_backup_without_spp[i] %like% '[ck]oagulas[ea] positie?[vf]'
@ -574,24 +647,38 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, @@ -574,24 +647,38 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
| x_backup_without_spp[i] %like% '[ck]o?ps[^a-z]?$') {
# coerce S. coagulase positive
x[i] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L]
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
next
}
if (x_backup_without_spp[i] %like% 'gram[ -]?neg.*'
| x_backup_without_spp[i] %like% 'negatie?[vf]'
| x_trimmed[i] %like% 'gram[ -]?neg.*') {
# coerce Gram negatives
x[i] <- microorganismsDT[mo == 'B_GRAMN', ..property][[1]][1L]
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
next
}
if (x_backup_without_spp[i] %like% 'gram[ -]?pos.*'
| x_backup_without_spp[i] %like% 'positie?[vf]'
| x_trimmed[i] %like% 'gram[ -]?pos.*') {
# coerce Gram positives
x[i] <- microorganismsDT[mo == 'B_GRAMP', ..property][[1]][1L]
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
next
}
if (grepl("[sS]almonella [A-Z][a-z]+ ?.*", x_backup_without_spp[i], ignore.case = FALSE)) {
if (x_backup_without_spp[i] %like% "Salmonella group") {
# Salmonella Group A to Z, just return S. species for now
x[i] <- microorganismsDT[mo == 'B_SLMNL', ..property][[1]][1L]
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
options(mo_renamed = c(getOption("mo_renamed"),
magenta(paste0("Note: ",
italic("Salmonella"), " ", trimws(gsub("Salmonella", "", x_backup_without_spp[i])),
@ -601,6 +688,9 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, @@ -601,6 +688,9 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
} else {
# Salmonella with capital letter species like "Salmonella Goettingen" - they're all S. enterica
x[i] <- microorganismsDT[mo == 'B_SLMNL_ENT', ..property][[1]][1L]
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
options(mo_renamed = c(getOption("mo_renamed"),
magenta(paste0("Note: ",
italic("Salmonella"), " ", trimws(gsub("Salmonella", "", x_backup_without_spp[i])),
@ -618,12 +708,18 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, @@ -618,12 +708,18 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
found <- microorganismsDT[fullname_lower %in% tolower(c(x_species[i], x_trimmed_species[i])), ..property][[1]]
if (length(found) > 0) {
x[i] <- found[1L]
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
next
}
if (nchar(x_backup_without_spp[i]) >= 6) {
found <- microorganismsDT[fullname_lower %like% paste0("^", x_backup_without_spp[i], "[a-z]+"), ..property][[1]]
found <- microorganismsDT[fullname_lower %like% paste0("^", unregex(x_backup_without_spp[i]), "[a-z]+"), ..property][[1]]
if (length(found) > 0) {
x[i] <- found[1L]
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
next
}
}
@ -636,6 +732,9 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, @@ -636,6 +732,9 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
mo_found <- AMR::microorganisms.codes[toupper(x_backup[i]) == AMR::microorganisms.codes[, 1], "mo"][1L]
if (length(mo_found) > 0) {
x[i] <- microorganismsDT[mo == mo_found, ..property][[1]][1L]
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
next
}
}
@ -737,6 +836,9 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, @@ -737,6 +836,9 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
f.x_withspaces_end_only = x_withspaces_end_only[i],
g.x_backup_without_spp = x_backup_without_spp[i])
if (!empty_result(x[i])) {
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
next
}
# THEN TRY PREVALENT IN HUMAN INFECTIONS ----
@ -749,6 +851,9 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, @@ -749,6 +851,9 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
f.x_withspaces_end_only = x_withspaces_end_only[i],
g.x_backup_without_spp = x_backup_without_spp[i])
if (!empty_result(x[i])) {
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
next
}
# THEN UNPREVALENT IN HUMAN INFECTIONS ----
@ -761,6 +866,9 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, @@ -761,6 +866,9 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
f.x_withspaces_end_only = x_withspaces_end_only[i],
g.x_backup_without_spp = x_backup_without_spp[i])
if (!empty_result(x[i])) {
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
next
}
@ -784,16 +892,19 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, @@ -784,16 +892,19 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
ref_old = found[1, ref],
ref_new = microorganismsDT[col_id == found[1, col_id_new], ref],
mo = microorganismsDT[col_id == found[1, col_id_new], mo])
if (property == "mo") {
set_mo_history(x_backup[i], x[i], force = force_mo_history)
}
next
}
# check for uncertain results ----
uncertain_fn <- function(a.x_backup,
b.x_trimmed,
c.x_withspaces_start_end,
d.x_withspaces_start_only,
f.x_withspaces_end_only,
g.x_backup_without_spp) {
b.x_trimmed,
c.x_withspaces_start_end,
d.x_withspaces_start_only,
f.x_withspaces_end_only,
g.x_backup_without_spp) {
if (allow_uncertain == 0) {
# do not allow uncertainties
@ -936,15 +1047,15 @@ g.x_backup_without_spp) { @@ -936,15 +1047,15 @@ g.x_backup_without_spp) {
}
x[i] <- uncertain_fn(x_backup[i],
x_trimmed[i],
x_withspaces_start_end[i],
x_withspaces_start_end[i],
x_withspaces_start_only[i],
x_withspaces_end_only[i],
x_backup_without_spp[i])
if (!empty_result(x[i])) {
# no set_mo_history here; these are uncertain
next
}
# not found ----
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
failures <- c(failures, x_backup[i])
@ -1232,3 +1343,7 @@ nr2char <- function(x) { @@ -1232,3 +1343,7 @@ nr2char <- function(x) {
x
}
}
unregex <- function(x) {
gsub("[^a-zA-Z0-9 -]", "", x)
}

74
R/mo_history.R

@ -0,0 +1,74 @@ @@ -0,0 +1,74 @@
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# SOURCE #
# https://gitlab.com/msberends/AMR #
# #
# LICENCE #
# (c) 2019 Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# #
# This R package was created for academic research and was publicly #
# released in the hope that it will be useful, but it comes WITHOUT #
# ANY WARRANTY OR LIABILITY. #
# Visit our website for more info: https://msberends.gitab.io/AMR. #
# ==================================================================== #
# print successful as.mo coercions to file, not uncertain ones
#' @importFrom dplyr %>% filter
set_mo_history <- function(x, mo, force = FALSE) {
file_location <- base::path.expand('~/.Rhistory_mo')
if ((base::interactive() & mo != "UNKNOWN") | force == TRUE) {
mo_hist <- read_mo_history(force = force)
if (NROW(mo_hist[base::which(mo_hist$x == x & mo_hist$package_version == utils::packageVersion("AMR")),]) == 0) {
base::write(x = c(x, mo, base::as.character(utils::packageVersion("AMR"))),
file = file_location,
ncolumns = 3,
append = TRUE,
sep = "\t")
}
}
return(base::invisible())
}
get_mo_history <- function(x, force = FALSE) {
file_read <- read_mo_history(force = force)
if (base::is.null(file_read)) {
NA
} else {
data.frame(x, stringsAsFactors = FALSE) %>%
left_join(file_read, by = "x") %>%
pull(mo)
}
}
read_mo_history <- function(force = FALSE) {
file_location <- base::path.expand('~/.Rhistory_mo')
if (!base::file.exists(file_location) | (!base::interactive() & force == FALSE)) {
return(NULL)
}
file_read <- utils::read.table(file = file_location,
header = FALSE,
sep = "\t",
col.names = c("x", "mo", "package_version"),
stringsAsFactors = FALSE)
# Below: filter on current package version.
# Future fullnames may even be replaced by new taxonomic names, so new versions of
# the Catalogue of Life must not lead to data corruption.
file_read[base::which(file_read$package_version == utils::packageVersion("AMR")), c("x", "mo")]
}
#' @rdname as.mo
#' @export
clean_mo_history <- function() {
file_location <- base::path.expand('~/.Rhistory_mo')
if (base::file.exists(file_location)) {
base::unlink(file_location)
}
}

21
R/mo_source.R

@ -99,6 +99,8 @@ @@ -99,6 +99,8 @@
#' @inheritSection AMR Read more on our website!
set_mo_source <- function(path) {
file_location <- path.expand('~/mo_source.rds')
if (!is.character(path) | length(path) > 1) {
stop("`path` must be a character of length 1.")
}
@ -106,9 +108,9 @@ set_mo_source <- function(path) { @@ -106,9 +108,9 @@ set_mo_source <- function(path) {
if (path %in% c(NULL, "")) {
options(mo_source = NULL)
options(mo_source_timestamp = NULL)
if (file.exists("~/.mo_source.rds")) {
unlink("~/.mo_source.rds")
message("Removed mo_source file '~/.mo_source.rds'.")
if (file.exists(file_location)) {
unlink(file_location)
message("Removed mo_source file '", file_location, "'.")
}
return(invisible())
}
@ -165,23 +167,22 @@ set_mo_source <- function(path) { @@ -165,23 +167,22 @@ set_mo_source <- function(path) {
df <- as.data.frame(df, stringAsFactors = FALSE)
# success
if (file.exists("~/.mo_source.rds")) {
if (file.exists(file_location)) {
action <- "Updated"
} else {
action <- "Created"
}
saveRDS(df, "~/.mo_source.rds")
saveRDS(df, file_location)
options(mo_source = path)
options(mo_source_timestamp = as.character(file.info(path)$mtime))
message(action, " mo_source file '~/.mo_source.rds' from '", path, "'.")
message(action, " mo_source file '", file_location, "' from '", path, "'.")
}
#' @rdname mo_source
#' @export
get_mo_source <- function() {
if (is.null(getOption("mo_source", NULL))) {
return(NULL)
NULL
} else {
old_time <- as.POSIXct(getOption("mo_source_timestamp"))
new_time <- as.POSIXct(as.character(file.info(getOption("mo_source", ""))$mtime))
@ -195,9 +196,9 @@ get_mo_source <- function() { @@ -195,9 +196,9 @@ get_mo_source <- function() {
# set updated source
set_mo_source(getOption("mo_source"))
}
file_location <- path.expand('~/mo_source.rds')
readRDS(file_location)
}
readRDS("~/.mo_source.rds")
}
mo_source_isvalid <- function(x) {

10
README.md

@ -25,11 +25,11 @@ Bhanu N.M. Sinha <a href="https://orcid.org/0000-0003-1634-0010"><img src="https @@ -25,11 +25,11 @@ Bhanu N.M. Sinha <a href="https://orcid.org/0000-0003-1634-0010"><img src="https
<sup>a</sup> Thesis dissertant<br>
<sup>b</sup> Thesis advisor
<a href="https://www.rug.nl"><img src="pkgdown/logos/logo_rug.png" height="60px"></a>
<a href="https://www.umcg.nl"><img src="pkgdown/logos/logo_umcg.png" height="60px"></a>
<a href="https://www.certe.nl"><img src="pkgdown/logos/logo_certe.png" height="60px"></a>
<a href="http://www.eurhealth-1health.eu"><img src="pkgdown/logos/logo_eh1h.png" height="60px"></a>
<a href="http://www.eurhealth-1health.eu"><img src="pkgdown/logos/logo_interreg.png" height="60px"></a>
<a href="https://www.rug.nl"><img src="man/figures/logo_rug.png" height="60px"></a>
<a href="https://www.umcg.nl"><img src="man/figures/logo_umcg.png" height="60px"></a>
<a href="https://www.certe.nl"><img src="man/figures/logo_certe.png" height="60px"></a>
<a href="http://www.eurhealth-1health.eu"><img src="man/figures/logo_eh1h.png" height="60px"></a>
<a href="http://www.eurhealth-1health.eu"><img src="man/figures/logo_interreg.png" height="60px"></a>
## How to get this package
All stable versions of this package [are published on CRAN](https://CRAN.R-project.org/package=AMR), the official R network with a peer-reviewed submission process.

10
_pkgdown.yml

@ -147,7 +147,7 @@ reference: @@ -147,7 +147,7 @@ reference:
- '`WHONET`'
- '`microorganisms.codes`'
- '`microorganisms.old`'
- title: Other
- title: Other functions
desc: >
These functions are mostly for internal use, but some of
them may also be suitable for your analysis. Especially the
@ -155,7 +155,13 @@ reference: @@ -155,7 +155,13 @@ reference:
contents:
- '`get_locale`'
- '`like`'
- '`ab_property`'
- title: Deprecated functions
desc: >
These functions are deprecated, meaning that they still
work but show a warning with every use and will be removed
in a future version.
contents:
- '`AMR-deprecated`'
authors:
Matthijs S. Berends:

3
appveyor.yml

@ -55,9 +55,6 @@ on_failure: @@ -55,9 +55,6 @@ on_failure:
- 7z a failure.zip *.Rcheck\*
- appveyor PushArtifact failure.zip
#on_success:
# - Rscript -e "library(covr); cc <- package_coverage(); codecov(coverage = cc, token = '50ffa0aa-fee0-4f8b-a11d-8c7edc6d32ca'); cat('Code coverage:', percent_coverage(cc))"
artifacts:
- path: '*.Rcheck\**\*.log'
name: Logs

2
docs/LICENSE-text.html

@ -78,7 +78,7 @@ @@ -78,7 +78,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9022</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9023</span>
</span>
</div>

375
docs/articles/AMR.html

@ -40,7 +40,7 @@ @@ -40,7 +40,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9021</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9023</span>
</span>
</div>
@ -192,7 +192,7 @@ @@ -192,7 +192,7 @@
<h1>How to conduct AMR analysis</h1>
<h4 class="author">Matthijs S. Berends</h4>
<h4 class="date">05 March 2019</h4>
<h4 class="date">15 March 2019</h4>
<div class="hidden name"><code>AMR.Rmd</code></div>
@ -201,7 +201,7 @@ @@ -201,7 +201,7 @@
<p><strong>Note:</strong> values on this page will change with every website update since they are based on randomly created values and the page was written in <a href="https://rmarkdown.rstudio.com/">RMarkdown</a>. However, the methodology remains unchanged. This page was generated on 05 March 2019.</p>
<p><strong>Note:</strong> values on this page will change with every website update since they are based on randomly created values and the page was written in <a href="https://rmarkdown.rstudio.com/">RMarkdown</a>. However, the methodology remains unchanged. This page was generated on 15 March 2019.</p>
<div id="introduction" class="section level1">
<h1 class="hasAnchor">
<a href="#introduction" class="anchor"></a>Introduction</h1>
@ -217,21 +217,21 @@ @@ -217,21 +217,21 @@
</tr></thead>
<tbody>
<tr class="odd">
<td align="center">2019-03-05</td>
<td align="center">2019-03-15</td>
<td align="center">abcd</td>
<td align="center">Escherichia coli</td>
<td align="center">S</td>
<td align="center">S</td>
</tr>
<tr class="even">
<td align="center">2019-03-05</td>
<td align="center">2019-03-15</td>
<td align="center">abcd</td>
<td align="center">Escherichia coli</td>
<td align="center">S</td>
<td align="center">R</td>
</tr>
<tr class="odd">
<td align="center">2019-03-05</td>
<td align="center">2019-03-15</td>
<td align="center">efgh</td>
<td align="center">Escherichia coli</td>
<td align="center">R</td>
@ -327,21 +327,21 @@ @@ -327,21 +327,21 @@
</tr></thead>
<tbody>
<tr class="odd">
<td align="center">2010-01-23</td>
<td align="center">E2</td>
<td align="center">2011-03-23</td>
<td align="center">H4</td>
<td align="center">Hospital B</td>
<td align="center">Staphylococcus aureus</td>
<td align="center">I</td>
<td align="center">Escherichia coli</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">M</td>
</tr>
<tr class="even">
<td align="center">2017-12-07</td>
<td align="center">L8</td>
<td align="center">2016-02-07</td>
<td align="center">A10</td>
<td align="center">Hospital B</td>
<td align="center">Staphylococcus aureus</td>
<td align="center">Escherichia coli</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">S</td>
@ -349,48 +349,48 @@ @@ -349,48 +349,48 @@
<td align="center">M</td>
</tr>
<tr class="odd">
<td align="center">2012-07-19</td>
<td align="center">W5</td>
<td align="center">Hospital A</td>
<td align="center">Staphylococcus aureus</td>
<td align="center">2017-05-30</td>
<td align="center">Q9</td>
<td align="center">Hospital D</td>
<td align="center">Escherichia coli</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">R</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">F</td>
</tr>
<tr class="even">
<td align="center">2013-11-26</td>
<td align="center">L7</td>
<td align="center">Hospital A</td>
<td align="center">2016-09-19</td>
<td align="center">U5</td>
<td align="center">Hospital B</td>
<td align="center">Escherichia coli</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">R</td>
<td align="center">S</td>
<td align="center">M</td>
<td align="center">S</td>
<td align="center">F</td>
</tr>
<tr class="odd">
<td align="center">2016-01-24</td>
<td align="center">M7</td>
<td align="center">Hospital B</td>
<td align="center">Escherichia coli</td>
<td align="center">2016-03-20</td>
<td align="center">X10</td>
<td align="center">Hospital D</td>
<td align="center">Streptococcus pneumoniae</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">M</td>
<td align="center">F</td>
</tr>
<tr class="even">
<td align="center">2016-11-13</td>
<td align="center">V10</td>
<td align="center">Hospital A</td>
<td align="center">2012-07-29</td>
<td align="center">D10</td>
<td align="center">Hospital D</td>
<td align="center">Escherichia coli</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">F</td>
<td align="center">M</td>
</tr>
</tbody>
</table>
@ -411,15 +411,15 @@ @@ -411,15 +411,15 @@
#&gt;
#&gt; Item Count Percent Cum. Count Cum. Percent
#&gt; --- ----- ------- -------- ----------- -------------
#&gt; 1 M 10,562 52.8% 10,562 52.8%
#&gt; 2 F 9,438 47.2% 20,000 100.0%</code></pre>
#&gt; 1 M 10,422 52.1% 10,422 52.1%
#&gt; 2 F 9,578 47.9% 20,000 100.0%</code></pre>
<p>So, we can draw at least two conclusions immediately. From a data scientist perspective, the data looks clean: only values <code>M</code> and <code>F</code>. From a researcher perspective: there are slightly more men. Nothing we didn’t already know.</p>
<p>The data is already quite clean, but we still need to transform some variables. The <code>bacteria</code> column now consists of text, and we want to add more variables based on microbial IDs later on. So, we will transform this column to valid IDs. The <code><a href="https://dplyr.tidyverse.org/reference/mutate.html">mutate()</a></code> function of the <code>dplyr</code> package makes this really easy:</p>
<div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb12-1" title="1">data &lt;-<span class="st"> </span>data <span class="op">%&gt;%</span></a>
<a class="sourceLine" id="cb12-2" title="2"><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/mutate.html">mutate</a></span>(<span class="dt">bacteria =</span> <span class="kw"><a href="../reference/as.mo.html">as.mo</a></span>(bacteria))</a></code></pre></div>
<p>We also want to transform the antibiotics, because in real life data we don’t know if they are really clean. The <code><a href="../reference/as.rsi.html">as.rsi()</a></code> function ensures reliability and reproducibility in these kind of variables. The <code><a href="https://dplyr.tidyverse.org/reference/summarise_all.html">mutate_at()</a></code> will run the <code><a href="../reference/as.rsi.html">as.rsi()</a></code> function on defined variables:</p>
<p>We also want to transform the antibiotics, because in real life data we don’t know if they are really clean. The <code><a href="../reference/as.rsi.html">as.rsi()</a></code> function ensures reliability and reproducibility in these kind of variables. The <code><a href="https://dplyr.tidyverse.org/reference/mutate_all.html">mutate_at()</a></code> will run the <code><a href="../reference/as.rsi.html">as.rsi()</a></code> function on defined variables:</p>
<div class="sourceCode" id="cb13"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb13-1" title="1">data &lt;-<span class="st"> </span>data <span class="op">%&gt;%</span></a>
<a class="sourceLine" id="cb13-2" title="2"><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/summarise_all.html">mutate_at</a></span>(<span class="kw"><a href="https://dplyr.tidyverse.org/reference/vars.html">vars</a></span>(amox<span class="op">:</span>gent), as.rsi)</a></code></pre></div>
<a class="sourceLine" id="cb13-2" title="2"><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/mutate_all.html">mutate_at</a></span>(<span class="kw"><a href="https://dplyr.tidyverse.org/reference/vars.html">vars</a></span>(amox<span class="op">:</span>gent), as.rsi)</a></code></pre></div>
<p>Finally, we will apply <a href="http://www.eucast.org/expert_rules_and_intrinsic_resistance/">EUCAST rules</a> on our antimicrobial results. In Europe, most medical microbiological laboratories already apply these rules. Our package features their latest insights on intrinsic resistance and exceptional phenotypes. Moreover, the <code><a href="../reference/eucast_rules.html">eucast_rules()</a></code> function can also apply additional rules, like forcing <help title="ATC: J01CA01">ampicillin</help> = R when <help title="ATC: J01CR02">amoxicillin/clavulanic acid</help> = R.</p>
<p>Because the amoxicillin (column <code>amox</code>) and amoxicillin/clavulanic acid (column <code>amcl</code>) in our data were generated randomly, some rows will undoubtedly contain amox = S and amcl = R, which is technically impossible. The <code><a href="../reference/eucast_rules.html">eucast_rules()</a></code> fixes this:</p>
<div class="sourceCode" id="cb14"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb14-1" title="1">data &lt;-<span class="st"> </span><span class="kw"><a href="../reference/eucast_rules.html">eucast_rules</a></span>(data, <span class="dt">col_mo =</span> <span class="st">"bacteria"</span>)</a>
@ -443,10 +443,10 @@ @@ -443,10 +443,10 @@
<a class="sourceLine" id="cb14-19" title="19"><span class="co">#&gt; Kingella kingae (no changes)</span></a>
<a class="sourceLine" id="cb14-20" title="20"><span class="co">#&gt; </span></a>
<a class="sourceLine" id="cb14-21" title="21"><span class="co">#&gt; EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes (v3.1, 2016)</span></a>
<a class="sourceLine" id="cb14-22" title="22"><span class="co">#&gt; Table 1: Intrinsic resistance in Enterobacteriaceae (1344 changes)</span></a>
<a class="sourceLine" id="cb14-22" title="22"><span class="co">#&gt; Table 1: Intrinsic resistance in Enterobacteriaceae (1315 changes)</span></a>
<a class="sourceLine" id="cb14-23" title="23"><span class="co">#&gt; Table 2: Intrinsic resistance in non-fermentative Gram-negative bacteria (no changes)</span></a>
<a class="sourceLine" id="cb14-24" title="24"><span class="co">#&gt; Table 3: Intrinsic resistance in other Gram-negative bacteria (no changes)</span></a>
<a class="sourceLine" id="cb14-25" title="25"><span class="co">#&gt; Table 4: Intrinsic resistance in Gram-positive bacteria (2767 changes)</span></a>
<a class="sourceLine" id="cb14-25" title="25"><span class="co">#&gt; Table 4: Intrinsic resistance in Gram-positive bacteria (2799 changes)</span></a>
<a class="sourceLine" id="cb14-26" title="26"><span class="co">#&gt; Table 8: Interpretive rules for B-lactam agents and Gram-positive cocci (no changes)</span></a>
<a class="sourceLine" id="cb14-27" title="27"><span class="co">#&gt; Table 9: Interpretive rules for B-lactam agents and Gram-negative rods (no changes)</span></a>
<a class="sourceLine" id="cb14-28" title="28"><span class="co">#&gt; Table 10: Interpretive rules for B-lactam agents and other Gram-negative bacteria (no changes)</span></a>
@ -462,9 +462,9 @@ @@ -462,9 +462,9 @@
<a class="sourceLine" id="cb14-38" title="38"><span class="co">#&gt; Non-EUCAST: piperacillin/tazobactam = S where piperacillin = S (no changes)</span></a>
<a class="sourceLine" id="cb14-39" title="39"><span class="co">#&gt; Non-EUCAST: trimethoprim/sulfa = S where trimethoprim = S (no changes)</span></a>
<a class="sourceLine" id="cb14-40" title="40"><span class="co">#&gt; </span></a>
<a class="sourceLine" id="cb14-41" title="41"><span class="co">#&gt; =&gt; EUCAST rules affected 7,383 out of 20,000 rows</span></a>
<a class="sourceLine" id="cb14-41" title="41"><span class="co">#&gt; =&gt; EUCAST rules affected 7,488 out of 20,000 rows</span></a>
<a class="sourceLine" id="cb14-42" title="42"><span class="co">#&gt; -&gt; added 0 test results</span></a>
<a class="sourceLine" id="cb14-43" title="43"><span class="co">#&gt; -&gt; changed 4,111 test results (0 to S; 0 to I; 4,111 to R)</span></a></code></pre></div>
<a class="sourceLine" id="cb14-43" title="43"><span class="co">#&gt; -&gt; changed 4,114 test results (0 to S; 0 to I; 4,114 to R)</span></a></code></pre></div>
</div>
<div id="adding-new-variables" class="section level1">
<h1 class="hasAnchor">
@ -489,7 +489,7 @@ @@ -489,7 +489,7 @@
<a class="sourceLine" id="cb16-3" title="3"><span class="co">#&gt; </span><span class="al">NOTE</span><span class="co">: Using column `bacteria` as input for `col_mo`.</span></a>
<a class="sourceLine" id="cb16-4" title="4"><span class="co">#&gt; </span><span class="al">NOTE</span><span class="co">: Using column `date` as input for `col_date`.</span></a>
<a class="sourceLine" id="cb16-5" title="5"><span class="co">#&gt; </span><span class="al">NOTE</span><span class="co">: Using column `patient_id` as input for `col_patient_id`.</span></a>
<a class="sourceLine" id="cb16-6" title="6"><span class="co">#&gt; =&gt; Found 5,676 first isolates (28.4% of total)</span></a></code></pre></div>
<a class="sourceLine" id="cb16-6" title="6"><span class="co">#&gt; =&gt; Found 5,688 first isolates (28.4% of total)</span></a></code></pre></div>
<p>So only 28.4% is suitable for resistance analysis! We can now filter on it with the <code><a href="https://dplyr.tidyverse.org/reference/filter.html">filter()</a></code> function, also from the <code>dplyr</code> package:</p>
<div class="sourceCode" id="cb17"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb17-1" title="1">data_1st &lt;-<span class="st"> </span>data <span class="op">%&gt;%</span><span class="st"> </span></a>
<a class="sourceLine" id="cb17-2" title="2"><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/filter.html">filter</a></span>(first <span class="op">==</span><span class="st"> </span><span class="ot">TRUE</span>)</a></code></pre></div>
@ -516,19 +516,19 @@ @@ -516,19 +516,19 @@
<tbody>
<tr class="odd">
<td align="center">1</td>
<td align="center">2010-01-20</td>
<td align="center">L10</td>
<td align="center">2010-04-01</td>
<td align="center">K1</td>
<td align="center">B_ESCHR_COL</td>
<td align="center">I</td>
<td align="center">R</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">TRUE</td>
</tr>
<tr class="even">
<td align="center">2</td>
<td align="center">2010-03-26</td>
<td align="center">L10</td>
<td align="center">2010-04-30</td>
<td align="center">K1</td>
<td align="center">B_ESCHR_COL</td>
<td align="center">R</td>
<td align="center">S</td>
@ -538,10 +538,10 @@ @@ -538,10 +538,10 @@
</tr>
<tr class="odd">
<td align="center">3</td>
<td align="center">2010-05-05</td>
<td align="center">L10</td>
<td align="center">2010-10-12</td>
<td align="center">K1</td>
<td align="center">B_ESCHR_COL</td>
<td align="center">S</td>
<td align="center">R</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">S</td>
@ -549,19 +549,19 @@ @@ -549,19 +549,19 @@
</tr>
<tr class="even">
<td align="center">4</td>
<td align="center">2010-06-20</td>
<td align="center">L10</td>
<td align="center">2010-12-05</td>
<td align="center">K1</td>
<td align="center">B_ESCHR_COL</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">R</td>
<td align="center">FALSE</td>
</tr>
<tr class="odd">
<td align="center">5</td>
<td align="center">2010-07-10</td>
<td align="center">L10</td>
<td align="center">2011-01-19</td>
<td align="center">K1</td>
<td align="center">B_ESCHR_COL</td>
<td align="center">S</td>
<td align="center">S</td>
@ -571,21 +571,21 @@ @@ -571,21 +571,21 @@
</tr>
<tr class="even">
<td align="center">6</td>
<td align="center">2010-08-01</td>
<td align="center">L10</td>
<td align="center">2011-04-07</td>
<td align="center">K1</td>
<td align="center">B_ESCHR_COL</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">FALSE</td>
<td align="center">TRUE</td>
</tr>
<tr class="odd">
<td align="center">7</td>
<td align="center">2010-08-27</td>
<td align="center">L10</td>
<td align="center">2011-06-16</td>
<td align="center">K1</td>
<td align="center">B_ESCHR_COL</td>
<td align="center">R</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">S</td>
@ -593,19 +593,19 @@ @@ -593,19 +593,19 @@
</tr>
<tr class="even">
<td align="center">8</td>
<td align="center">2010-09-09</td>
<td align="center">L10</td>
<td align="center">2011-07-16</td>
<td align="center">K1</td>
<td align="center">B_ESCHR_COL</td>
<td align="center">R</td>
<td align="center">S</td>
<td align="center">R</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">FALSE</td>
</tr>
<tr class="odd">
<td align="center">9</td>
<td align="center">2010-09-26</td>
<td align="center">L10</td>
<td align="center">2011-08-25</td>
<td align="center">K1</td>
<td align="center">B_ESCHR_COL</td>
<td align="center">R</td>
<td align="center">S<