Browse Source

(v1.5.0.9022) mo properties speed improvement

new-mo-algorithm
parent
commit
062c49fca1
  1. 7
      .github/workflows/check.yaml
  2. 2
      DESCRIPTION
  3. 5
      NEWS.md
  4. 12
      R/aa_helper_functions.R
  5. 25
      R/mo.R
  6. 25
      R/mo_property.R
  7. BIN
      R/sysdata.rda
  8. BIN
      data-raw/AMR_latest.tar.gz
  9. 9
      data-raw/_internals.R
  10. 2
      docs/404.html
  11. 2
      docs/LICENSE-text.html
  12. 2
      docs/articles/index.html
  13. 2
      docs/authors.html
  14. 2
      docs/index.html
  15. 12
      docs/news/index.html
  16. 2
      docs/pkgdown.yml
  17. 2
      docs/reference/index.html
  18. 2
      docs/survey.html
  19. 10
      tests/testthat/test-data.R

7
.github/workflows/check.yaml

@ -53,9 +53,11 @@ jobs: @@ -53,9 +53,11 @@ jobs:
- {os: macOS-latest, r: 'devel', allowfail: false}
- {os: macOS-latest, r: 'release', allowfail: false}
- {os: macOS-latest, r: 'oldrel', allowfail: false}
- {os: windows-latest, r: 'devel', allowfail: false}
- {os: windows-latest, r: 'release', allowfail: false}
- {os: windows-latest, r: 'oldrel', allowfail: false}
- {os: ubuntu-20.04, r: 'devel', allowfail: false, rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
- {os: ubuntu-20.04, r: 'release', allowfail: false, rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
- {os: ubuntu-20.04, r: 'oldrel', allowfail: false, rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
@ -66,7 +68,8 @@ jobs: @@ -66,7 +68,8 @@ jobs:
- {os: ubuntu-20.04, r: '3.3', allowfail: true, rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
# - {os: ubuntu-20.04, r: '3.2', allowfail: true, rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
# - {os: ubuntu-20.04, r: '3.1', allowfail: true, rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
- {os: ubuntu-20.04, r: '3.0', allowfail: false, rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
- {os: ubuntu-20.04, r: '3.0', allowfail: true, rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
- {os: ubuntu-16.04, r: 'devel', allowfail: false, rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
- {os: ubuntu-16.04, r: 'release', allowfail: false, rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
- {os: ubuntu-16.04, r: 'oldrel', allowfail: false, rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
@ -77,7 +80,7 @@ jobs: @@ -77,7 +80,7 @@ jobs:
- {os: ubuntu-16.04, r: '3.3', allowfail: true, rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
# - {os: ubuntu-16.04, r: '3.2', allowfail: true, rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
# - {os: ubuntu-16.04, r: '3.1', allowfail: true, rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
- {os: ubuntu-16.04, r: '3.0', allowfail: false, rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
- {os: ubuntu-16.04, r: '3.0', allowfail: true, rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
env:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true

2
DESCRIPTION

@ -1,5 +1,5 @@ @@ -1,5 +1,5 @@
Package: AMR
Version: 1.5.0.9021
Version: 1.5.0.9022
Date: 2021-02-21
Title: Antimicrobial Resistance Data Analysis
Authors@R: c(

5
NEWS.md

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
# AMR 1.5.0.9021
# AMR 1.5.0.9022
## <small>Last updated: 21 February 2021</small>
### New
@ -58,7 +58,8 @@ @@ -58,7 +58,8 @@
* Functions `print()` and `summary()` on a Principal Components Analysis object (`pca()`) now print additional group info if the original data was grouped using `dplyr::group_by()`
* Improved speed and reliability of `guess_ab_col()`. As this also internally improves the reliability of `first_isolate()` and `mdro()`, this might have a slight impact on the results of those functions.
* Fix for `mo_name()` when used in other languages than English
* The `like()` function (and its fast alias `%like%`) now always use Perl compatibility, improving speed for many functions in this package
* The `like()` function (and its fast alias `%like%`) now always use Perl compatibility, improving speed for many functions in this package (e.g., `as.mo()` is now up to 4 times faster)
* *Staphylococcus cornubiensis* is now correctly categorised as coagulase-positive
### Other
* Big documentation updates

12
R/aa_helper_functions.R

@ -112,7 +112,7 @@ check_dataset_integrity <- function() { @@ -112,7 +112,7 @@ check_dataset_integrity <- function() {
require("AMR")
})
stop_if(!valid_microorganisms | !valid_antibiotics,
"the data set `microorganisms` or `antibiotics` was overwritten in your environment because another package with the same object names was loaded _after_ the AMR package, preventing the AMR package from working correctly. Please load the AMR package last.")
"the data set `microorganisms` or `antibiotics` was overwritten in your environment because another package with the same object name(s) was loaded _after_ the AMR package, preventing the AMR package from working correctly. Please load the AMR package last.")
invisible(TRUE)
}
@ -538,7 +538,7 @@ meet_criteria <- function(object, @@ -538,7 +538,7 @@ meet_criteria <- function(object,
if (!is.null(pkg_env$meet_criteria_error_txt)) {
error_txt <- pkg_env$meet_criteria_error_txt
pkg_env$meet_criteria_error_txt <- NULL
stop(error_txt, call. = FALSE)
stop(error_txt, call. = FALSE) # don't use stop_() here, pkg may not be loaded yet
}
pkg_env$meet_criteria_error_txt <- NULL
@ -867,7 +867,13 @@ font_grey <- function(..., collapse = " ") { @@ -867,7 +867,13 @@ font_grey <- function(..., collapse = " ") {
try_colour(..., before = "\033[38;5;249m", after = "\033[39m", collapse = collapse)
}
font_grey_bg <- function(..., collapse = " ") {
try_colour(..., before = "\033[48;5;254m", after = "\033[49m", collapse = collapse)
if (tryCatch(rstudioapi::getThemeInfo()$dark == TRUE, error = function(e) FALSE)) {
# similar to HTML #444444
try_colour(..., before = "\033[48;5;238m", after = "\033[49m", collapse = collapse)
} else {
# similar to HTML #eeeeee
try_colour(..., before = "\033[48;5;254m", after = "\033[49m", collapse = collapse)
}
}
font_green_bg <- function(..., collapse = " ") {
try_colour(..., before = "\033[42m", after = "\033[49m", collapse = collapse)

25
R/mo.R

@ -178,13 +178,6 @@ as.mo <- function(x, @@ -178,13 +178,6 @@ as.mo <- function(x,
return(set_clean_class(x, new_class = c("mo", "character")))
}
if (tryCatch(all(x == "" | gsub(".*(unknown ).*", "unknown name", tolower(x), perl = TRUE) %in% MO_lookup$fullname_lower, na.rm = TRUE)
& isFALSE(Becker)
& isFALSE(Lancefield), error = function(e) FALSE)) {
# to improve speed, special case for taxonomically correct full names (case-insensitive)
return(MO_lookup[match(gsub(".*(unknown ).*", "unknown name", tolower(x), perl = TRUE), MO_lookup$fullname_lower), "mo", drop = TRUE])
}
# start off with replaced language-specific non-ASCII characters with ASCII characters
x <- parse_and_convert(x)
# replace mo codes used in older package versions
@ -198,6 +191,13 @@ as.mo <- function(x, @@ -198,6 +191,13 @@ as.mo <- function(x,
x[trimws2(x) %like% translate_AMR("no .*growth", language = language)] <- NA_character_
x[trimws2(x) %like% paste0("^(", translate_AMR("no|not", language = language), ") [a-z]+")] <- "UNKNOWN"
uncertainty_level <- translate_allow_uncertain(allow_uncertain)
if (tryCatch(all(x == "" | gsub(".*(unknown ).*", "unknown name", tolower(x), perl = TRUE) %in% MO_lookup$fullname_lower, na.rm = TRUE)
& isFALSE(Becker)
& isFALSE(Lancefield), error = function(e) FALSE)) {
# to improve speed, special case for taxonomically correct full names (case-insensitive)
return(MO_lookup[match(gsub(".*(unknown ).*", "unknown name", tolower(x), perl = TRUE), MO_lookup$fullname_lower), "mo", drop = TRUE])
}
if (!is.null(reference_df)
&& check_validity_mo_source(reference_df)
@ -481,11 +481,11 @@ exec_as.mo <- function(x, @@ -481,11 +481,11 @@ exec_as.mo <- function(x,
# Fill in fullnames and MO codes at once
known_names <- tolower(x_backup) %in% MO_lookup$fullname_lower
x[known_names] <- MO_lookup[match(tolower(x_backup)[known_names], MO_lookup$fullname), property, drop = TRUE]
x[known_names] <- MO_lookup[match(tolower(x_backup)[known_names], MO_lookup$fullname_lower), property, drop = TRUE]
known_codes <- toupper(x_backup) %in% MO_lookup$mo
x[known_codes] <- MO_lookup[match(toupper(x_backup)[known_codes], MO_lookup$mo), property, drop = TRUE]
already_known <- known_names | known_codes
# now only continue where the right taxonomic output is not already known
if (any(!already_known)) {
x_known <- x[already_known]
@ -984,7 +984,6 @@ exec_as.mo <- function(x, @@ -984,7 +984,6 @@ exec_as.mo <- function(x,
g.x_backup_without_spp %pm>% substr(1, x_length / 2),
".* ",
g.x_backup_without_spp %pm>% substr((x_length / 2) + 1, x_length))
print(x_split)
found <- lookup(fullname_lower %like_case% x_split,
haystack = data_to_check)
if (!is.na(found)) {
@ -1977,13 +1976,13 @@ parse_and_convert <- function(x) { @@ -1977,13 +1976,13 @@ parse_and_convert <- function(x) {
if (NCOL(x) > 2) {
stop("a maximum of two columns is allowed", call. = FALSE)
} else if (NCOL(x) == 2) {
# support Tidyverse selection like: df %pm>% select(colA, colB)
# support Tidyverse selection like: df %>% select(colA, colB)
# paste these columns together
x <- as.data.frame(x, stringsAsFactors = FALSE)
colnames(x) <- c("A", "B")
x <- paste(x$A, x$B)
} else {
# support Tidyverse selection like: df %pm>% select(colA)
# support Tidyverse selection like: df %>% select(colA)
x <- as.data.frame(x, stringsAsFactors = FALSE)[[1]]
}
}
@ -1991,6 +1990,8 @@ parse_and_convert <- function(x) { @@ -1991,6 +1990,8 @@ parse_and_convert <- function(x) {
parsed <- iconv(x, to = "UTF-8")
parsed[is.na(parsed) & !is.na(x)] <- iconv(x[is.na(parsed) & !is.na(x)], from = "Latin1", to = "ASCII//TRANSLIT")
parsed <- gsub('"', "", parsed, fixed = TRUE)
parsed <- gsub(" +", " ", parsed, perl = TRUE)
parsed <- trimws(parsed)
}, error = function(e) stop(e$message, call. = FALSE)) # this will also be thrown when running `as.mo(no_existing_object)`
parsed
}

25
R/mo_property.R

@ -694,22 +694,22 @@ mo_property <- function(x, property = "fullname", language = get_locale(), ...) @@ -694,22 +694,22 @@ mo_property <- function(x, property = "fullname", language = get_locale(), ...)
mo_validate <- function(x, property, language, ...) {
check_dataset_integrity()
if (tryCatch(all(x[!is.na(x)] %in% MO_lookup$mo) & length(list(...)) == 0, error = function(e) FALSE)) {
# special case for mo_* functions where class is already <mo>
return(MO_lookup[match(x, MO_lookup$mo), property, drop = TRUE])
}
dots <- list(...)
Becker <- dots$Becker
if (is.null(Becker)) {
if (is.null(Becker) | property %in% c("kingdom", "phylum", "class", "order", "family", "genus")) {
Becker <- FALSE
}
Lancefield <- dots$Lancefield
if (is.null(Lancefield)) {
if (is.null(Lancefield) | property %in% c("kingdom", "phylum", "class", "order", "family", "genus")) {
Lancefield <- FALSE
}
has_Becker_or_Lancefield <- Becker %in% c(TRUE, "all") | Lancefield %in% c(TRUE, "all")
if (tryCatch(all(x[!is.na(x)] %in% MO_lookup$mo) & !has_Becker_or_Lancefield, error = function(e) FALSE)) {
# special case for mo_* functions where class is already <mo>
return(MO_lookup[match(x, MO_lookup$mo), property, drop = TRUE])
}
# try to catch an error when inputting an invalid argument
# so the 'call.' can be set to FALSE
tryCatch(x[1L] %in% MO_lookup[1, property, drop = TRUE],
@ -722,8 +722,13 @@ mo_validate <- function(x, property, language, ...) { @@ -722,8 +722,13 @@ mo_validate <- function(x, property, language, ...) {
# because it's already a valid MO
x <- exec_as.mo(x, property = property, initial_search = FALSE, language = language, ...)
} else if (!all(x %in% MO_lookup[, property, drop = TRUE])
| Becker %in% c(TRUE, "all")
| Lancefield %in% c(TRUE, "all")) {
| has_Becker_or_Lancefield) {
accepted_args <- names(as.list(args("as.mo")))
accepted_args <- accepted_args[!accepted_args %in% c("", "...", "x", "property")]
stop_if(!all(names(dots) %in% names(as.list(args("as.mo")))),
"invalid argument(s): ", vector_and(names(dots)[!names(dots) %in% names(as.list(args("as.mo")))], quotes = "'"),
".\nAccepted arguments are ", vector_and(accepted_args, quotes = "'"), ".",
call = FALSE)
x <- exec_as.mo(x, property = property, language = language, ...)
}

BIN
R/sysdata.rda

Binary file not shown.

BIN
data-raw/AMR_latest.tar.gz

Binary file not shown.

9
data-raw/_internals.R

@ -51,18 +51,19 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) { @@ -51,18 +51,19 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
"lentus", "lugdunensis", "massiliensis", "microti",
"muscae", "nepalensis", "pasteuri", "petrasii",
"pettenkoferi", "piscifermentans", "pseudoxylosus",
"rostri", "saccharolyticus", "saprophyticus",
"pulvereri", "rostri", "saccharolyticus", "saprophyticus",
"sciuri", "simulans", "stepanovicii", "succinus",
"vitulinus", "warneri", "xylosus")
"vitulinus", "vitulus", "warneri", "xylosus")
| (MO_staph$species == "schleiferi" & MO_staph$subspecies %in% c("schleiferi", ""))),
"mo", drop = TRUE]
} else if (type == "CoPS") {
MO_staph[which(MO_staph$species %in% c("coagulase-positive",
"simiae", "agnetis",
"agnetis", "argenteus",
"cornubiensis",
"delphini", "lutrae",
"hyicus", "intermedius",
"pseudintermedius", "pseudointermedius",
"schweitzeri", "argenteus")
"schweitzeri", "simiae")
| (MO_staph$species == "schleiferi" & MO_staph$subspecies == "coagulans")),
"mo", drop = TRUE]
}

2
docs/404.html

@ -81,7 +81,7 @@ @@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="https://msberends.github.io/AMR//index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9021</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9022</span>
</span>
</div>

2
docs/LICENSE-text.html

@ -81,7 +81,7 @@ @@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9021</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9022</span>
</span>
</div>

2
docs/articles/index.html

@ -81,7 +81,7 @@ @@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9021</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9022</span>
</span>
</div>

2
docs/authors.html

@ -81,7 +81,7 @@ @@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9021</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9022</span>
</span>
</div>

2
docs/index.html

@ -43,7 +43,7 @@ @@ -43,7 +43,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9021</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9022</span>
</span>
</div>

12
docs/news/index.html

@ -81,7 +81,7 @@ @@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9021</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9022</span>
</span>
</div>
@ -236,9 +236,9 @@ @@ -236,9 +236,9 @@
<small>Source: <a href='https://github.com/msberends/AMR/blob/master/NEWS.md'><code>NEWS.md</code></a></small>
</div>
<div id="amr-1509021" class="section level1">
<h1 class="page-header" data-toc-text="1.5.0.9021">
<a href="#amr-1509021" class="anchor"></a>AMR 1.5.0.9021<small> Unreleased </small>
<div id="amr-1509022" class="section level1">
<h1 class="page-header" data-toc-text="1.5.0.9022">
<a href="#amr-1509022" class="anchor"></a>AMR 1.5.0.9022<small> Unreleased </small>
</h1>
<div id="last-updated-21-february-2021" class="section level2">
<h2 class="hasAnchor">
@ -326,7 +326,9 @@ @@ -326,7 +326,9 @@
</li>
<li>Improved speed and reliability of <code><a href="../reference/guess_ab_col.html">guess_ab_col()</a></code>. As this also internally improves the reliability of <code><a href="../reference/first_isolate.html">first_isolate()</a></code> and <code><a href="../reference/mdro.html">mdro()</a></code>, this might have a slight impact on the results of those functions.</li>
<li>Fix for <code><a href="../reference/mo_property.html">mo_name()</a></code> when used in other languages than English</li>
<li>The <code><a href="../reference/like.html">like()</a></code> function (and its fast alias <code><a href="../reference/like.html">%like%</a></code>) now always use Perl compatibility, improving speed for many functions in this package</li>
<li>The <code><a href="../reference/like.html">like()</a></code> function (and its fast alias <code><a href="../reference/like.html">%like%</a></code>) now always use Perl compatibility, improving speed for many functions in this package (e.g., <code><a href="../reference/as.mo.html">as.mo()</a></code> is now up to 4 times faster)</li>
<li>
<em>Staphylococcus cornubiensis</em> is now correctly categorised as coagulase-positive</li>
</ul>
</div>
<div id="other" class="section level3">

2
docs/pkgdown.yml

@ -12,7 +12,7 @@ articles: @@ -12,7 +12,7 @@ articles:
datasets: datasets.html
resistance_predict: resistance_predict.html
welcome_to_AMR: welcome_to_AMR.html
last_built: 2021-02-21T19:14Z
last_built: 2021-02-21T21:55Z
urls:
reference: https://msberends.github.io/AMR//reference
article: https://msberends.github.io/AMR//articles

2
docs/reference/index.html

@ -81,7 +81,7 @@ @@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9021</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9022</span>
</span>
</div>

2
docs/survey.html

@ -81,7 +81,7 @@ @@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9021</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9022</span>
</span>
</div>

10
tests/testthat/test-data.R

@ -86,3 +86,13 @@ test_that("CoL version info works", { @@ -86,3 +86,13 @@ test_that("CoL version info works", {
expect_output(print(catalogue_of_life_version()))
})
test_that("CoNS/CoPS are up to date", {
uncategorised <- subset(microorganisms,
genus == "Staphylococcus" &
!species %in% c("", "aureus") &
!mo %in% c(MO_CONS, MO_COPS))
expect(NROW(uncategorised) == 0,
failure_message = paste0("Staphylococcal species not categorised as CoNS/CoPS: S. ",
uncategorised$species, " (", uncategorised$mo, ")"))
})

Loading…
Cancel
Save