Browse Source

(v1.1.0.9021) 1st isolates update

pull/67/head
parent
commit
d9a4b0bcaf
  1. 4
      DESCRIPTION
  2. 1
      NAMESPACE
  3. 6
      NEWS.md
  4. 8
      R/resistance_predict.R
  5. 26
      R/rsi.R
  6. 24
      data-raw/country_analysis.R
  7. 1
      data-raw/microorganisms.txt
  8. 16
      data-raw/reproduction_of_microorganisms.R
  9. 682
      data-raw/reproduction_of_microorganisms_new.R
  10. BIN
      data/microorganisms.old.rda
  11. BIN
      data/microorganisms.rda
  12. 2
      docs/404.html
  13. 2
      docs/LICENSE-text.html
  14. 426
      docs/articles/AMR.html
  15. BIN
      docs/articles/AMR_files/figure-html/plot 1-1.png
  16. BIN
      docs/articles/AMR_files/figure-html/plot 3-1.png
  17. BIN
      docs/articles/AMR_files/figure-html/plot 4-1.png
  18. BIN
      docs/articles/AMR_files/figure-html/plot 5-1.png
  19. 68
      docs/articles/MDR.html
  20. 30
      docs/articles/PCA.html
  21. BIN
      docs/articles/PCA_files/figure-html/unnamed-chunk-5-1.png
  22. BIN
      docs/articles/PCA_files/figure-html/unnamed-chunk-6-1.png
  23. BIN
      docs/articles/PCA_files/figure-html/unnamed-chunk-7-1.png
  24. 103
      docs/articles/benchmarks.html
  25. BIN
      docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png
  26. BIN
      docs/articles/benchmarks_files/figure-html/unnamed-chunk-6-1.png
  27. 2
      docs/articles/index.html
  28. 4
      docs/articles/resistance_predict.html
  29. BIN
      docs/articles/resistance_predict_files/figure-html/unnamed-chunk-6-1.png
  30. BIN
      docs/articles/resistance_predict_files/figure-html/unnamed-chunk-7-1.png
  31. 2
      docs/authors.html
  32. BIN
      docs/countries.png
  33. BIN
      docs/countries_large.png
  34. 4
      docs/index.html
  35. 14
      docs/news/index.html
  36. 2
      docs/pkgdown.yml
  37. 2
      docs/reference/ab_property.html
  38. 2
      docs/reference/as.mo.html
  39. 2
      docs/reference/catalogue_of_life.html
  40. 4
      docs/reference/index.html
  41. 2
      docs/reference/microorganisms.codes.html
  42. 12
      docs/reference/microorganisms.html
  43. 4
      docs/reference/microorganisms.old.html
  44. 2
      docs/reference/mo_property.html
  45. 2
      docs/reference/mo_source.html
  46. 2
      index.md
  47. 6
      man/microorganisms.Rd
  48. 2
      man/microorganisms.old.Rd
  49. BIN
      pkgdown/logos/countries.png
  50. BIN
      pkgdown/logos/countries_large.png
  51. 18
      tests/testthat/test-first_isolate.R

4
DESCRIPTION

@ -1,6 +1,6 @@ @@ -1,6 +1,6 @@
Package: AMR
Version: 1.1.0.9020
Date: 2020-05-27
Version: 1.1.0.9021
Date: 2020-05-28
Title: Antimicrobial Resistance Analysis
Authors@R: c(
person(role = c("aut", "cre"),

1
NAMESPACE

@ -254,7 +254,6 @@ importFrom(graphics,arrows) @@ -254,7 +254,6 @@ importFrom(graphics,arrows)
importFrom(graphics,axis)
importFrom(graphics,barplot)
importFrom(graphics,par)
importFrom(graphics,plot)
importFrom(graphics,points)
importFrom(graphics,text)
importFrom(stats,complete.cases)

6
NEWS.md

@ -1,5 +1,5 @@ @@ -1,5 +1,5 @@
# AMR 1.1.0.9020
## <small>Last updated: 27-May-2020</small>
# AMR 1.1.0.9021
## <small>Last updated: 28-May-2020</small>
### Breaking
* Removed code dependency on all other R packages, making this package fully independent of the development process of others. This is a major code change, but will probably not be noticeable by most users.
@ -14,7 +14,7 @@ @@ -14,7 +14,7 @@
### Changed
* Taxonomy:
* Updated the taxonomy of microorganisms tot May 2020, using the Catalogue of Life (CoL), the Global Biodiversity Information Facility (GBIF) and the List of Prokaryotic names with Standing in Nomenclature (LPSN, hosted by DSMZ since February 2020)
* Updated the taxonomy of microorganisms tot May 2020, using the Catalogue of Life (CoL), the Global Biodiversity Information Facility (GBIF) and the List of Prokaryotic names with Standing in Nomenclature (LPSN, hosted by DSMZ since February 2020). **Note:** a taxonomic update may always impact determination of first isolates (using `first_isolate()`), since some bacterial names might be renamed to other genera or other (sub)species. This is expected behaviour.
* Removed the Catalogue of Life IDs (like 776351), since they now work with a species ID (hexadecimal string)
* EUCAST rules:
* The `eucast_rules()` function no longer applies "other" rules at default that are made available by this package (like setting ampicillin = R when ampicillin + enzyme inhibitor = R). The default input value for `rules` is now `c("breakpoints", "expert")` instead of `"all"`, but this can be changed by the user. To return to the old behaviour, set `options(AMR.eucast_rules = "all")`.

8
R/resistance_predict.R

@ -304,7 +304,7 @@ rsi_predict <- resistance_predict @@ -304,7 +304,7 @@ rsi_predict <- resistance_predict
#' @exportMethod plot.mic
#' @export
#' @importFrom graphics plot axis arrows points
#' @importFrom graphics axis arrows points
#' @rdname resistance_predict
plot.resistance_predict <- function(x, main = paste("Resistance Prediction of", x_name), ...) {
x_name <- paste0(ab_name(attributes(x)$ab), " (", attributes(x)$ab, ")")
@ -314,6 +314,12 @@ plot.resistance_predict <- function(x, main = paste("Resistance Prediction of", @@ -314,6 +314,12 @@ plot.resistance_predict <- function(x, main = paste("Resistance Prediction of",
} else {
ylab <- "%IR"
}
# get plot() generic; this was moved from the 'graphics' pkg to the 'base' pkg in R 4.0.0
if (as.integer(R.Version()$major) >= 4) {
plot <- get("plot", envir = asNamespace("base"))
} else {
plot <- get("plot", envir = asNamespace("graphics"))
}
plot(x = x$year,
y = x$value,
ylim = c(0, 1),

26
R/rsi.R

@ -563,25 +563,20 @@ summary.rsi <- function(object, ...) { @@ -563,25 +563,20 @@ summary.rsi <- function(object, ...) {
#' @exportMethod plot.rsi
#' @export
#' @importFrom graphics plot text
#' @importFrom graphics text axis
#' @noRd
plot.rsi <- function(x,
lwd = 2,
ylim = NULL,
ylab = "Percentage",
xlab = "Antimicrobial Interpretation",
main = paste("Susceptibility Analysis of", deparse(substitute(x))),
main = paste("Resistance Overview of", deparse(substitute(x))),
axes = FALSE,
...) {
suppressWarnings(
data <- data.frame(x = x,
y = 1,
stringsAsFactors = TRUE) %>%
group_by(x) %>%
summarise(n = sum(y)) %>%
filter(!is.na(x)) %>%
mutate(s = round((n / sum(n)) * 100, 1))
)
data <- as.data.frame(table(x), stringsAsFactors = FALSE)
colnames(data) <- c("x", "n")
data$s <- round((data$n / sum(data$n)) * 100, 1)
if (!"S" %in% data$x) {
data <- rbind(data, data.frame(x = "S", n = 0, s = 0))
}
@ -592,10 +587,17 @@ plot.rsi <- function(x, @@ -592,10 +587,17 @@ plot.rsi <- function(x,
data <- rbind(data, data.frame(x = "R", n = 0, s = 0))
}
# don't use as.rsi() here, it will confuse plot()
data$x <- factor(data$x, levels = c("S", "I", "R"), ordered = TRUE)
ymax <- if_else(max(data$s) > 95, 105, 100)
# get plot() generic; this was moved from the 'graphics' pkg to the 'base' pkg in R 4.0.0
if (as.integer(R.Version()$major) >= 4) {
plot <- get("plot", envir = asNamespace("base"))
} else {
plot <- get("plot", envir = asNamespace("graphics"))
}
plot(x = data$x,
y = data$s,
lwd = lwd,
@ -623,7 +625,7 @@ plot.rsi <- function(x, @@ -623,7 +625,7 @@ plot.rsi <- function(x,
barplot.rsi <- function(height,
col = c("chartreuse4", "chartreuse3", "brown3"),
xlab = ifelse(beside, "Antimicrobial Interpretation", ""),
main = paste("Antimicrobial resistance of", deparse(substitute(height))),
main = paste("Resistance Overview of", deparse(substitute(height))),
ylab = "Frequency",
beside = TRUE,
axes = beside,

24
data-raw/country_analysis.R

@ -90,9 +90,29 @@ countries_geometry <- sf::st_as_sf(map('world', plot = FALSE, fill = TRUE)) %>% @@ -90,9 +90,29 @@ countries_geometry <- sf::st_as_sf(map('world', plot = FALSE, fill = TRUE)) %>%
not_antarctica = as.integer(ID != "Antarctica"),
countries_name = ifelse(included == 1, as.character(ID), NA))
# add countries not in the list
countries_missing <- unique(ip_tbl$country[!ip_tbl$country %in% countries_geometry$countries_code])
for (i in seq_len(length(countries_missing))) {
countries_geometry <- countries_geometry %>%
rbind(countries_geometry %>%
filter(ID == "Netherlands") %>%
mutate(ID = countrycode::countrycode(countries_missing[i],
origin = 'iso2c',
destination = 'country.name'),
countries_code = countries_missing[i],
included = 1,
not_antarctica = 1,
countries_name = countrycode::countrycode(countries_missing[i],
origin = 'iso2c',
destination = 'country.name')))
}
# how many?
countries_geometry %>% filter(included == 1) %>% nrow()
countries_geometry$countries_name <- gsub("UK", "United Kingdom", countries_geometry$countries_name, fixed = TRUE)
countries_geometry$countries_name <- gsub("USA", "United States", countries_geometry$countries_name, fixed = TRUE)
countries_plot <- ggplot(countries_geometry) +
geom_sf(aes(fill = included, colour = not_antarctica),
size = 0.25,
@ -101,9 +121,9 @@ countries_plot <- ggplot(countries_geometry) + @@ -101,9 +121,9 @@ countries_plot <- ggplot(countries_geometry) +
theme(panel.grid = element_blank(),
axis.title = element_blank(),
axis.text = element_blank()) +
scale_fill_gradient(low = "white", high = "#CAD6EA", ) +
scale_fill_gradient(low = "white", high = "#128f7645") +
# this makes the border Antarctica turn white (invisible):
scale_colour_gradient(low = "white", high = "#81899B")
scale_colour_gradient(low = "white", high = "#128f76")
countries_plot_mini <- countries_plot
countries_plot_mini$data <- countries_plot_mini$data %>% filter(ID != "Antarctica")

1
data-raw/microorganisms.txt

@ -37251,6 +37251,7 @@ @@ -37251,6 +37251,7 @@
"B_MYCBC_TKNS" "Mycobacterium tokaiense" "Bacteria" "Actinobacteria" "(unknown class)" "Actinomycetales" "Mycobacteriaceae" "Mycobacterium" "tokaiense" "" "species" "Tsukamura, 1981" "c457ca4ae3a404100c8ce8c82a6100cc" "CoL" 2 "72477006"
"B_MYCBC_TRPL" "Mycobacterium triplex" "Bacteria" "Actinobacteria" "(unknown class)" "Actinomycetales" "Mycobacteriaceae" "Mycobacterium" "triplex" "" "species" "Floyd et al., 1997" "f23c2b6cad7a0e20374cdf3d3ff55dce" "CoL" 2 "113860005"
"B_MYCBC_TRVL" "Mycobacterium triviale" "Bacteria" "Actinobacteria" "(unknown class)" "Actinomycetales" "Mycobacteriaceae" "Mycobacterium" "triviale" "" "species" "Kubica, 1970" "9cb8b676cce27952821e173b12bfff3f" "CoL" 2 "40333002"
"B_MYCBC_TBRC" "Mycobacterium tuberculosis" "Bacteria" "Actinobacteria" "(unknown class)" "Actinomycetales" "Mycobacteriaceae" "Mycobacterium" "tuberculosis" "" "species" "Lehmann et al., 2018" "778540" "DSMZ" 2 "c(\"113861009\", \"113858008\")"
"B_MYCBC_TUSC" "Mycobacterium tusciae" "Bacteria" "Actinobacteria" "(unknown class)" "Actinomycetales" "Mycobacteriaceae" "Mycobacterium" "tusciae" "" "species" "Tortoli et al., 1999" "7a8ff8f5a2b16131366fe6e8dfb6b570" "CoL" 2
"B_MYCBC_ULCR" "Mycobacterium ulcerans" "Bacteria" "Actinobacteria" "(unknown class)" "Actinomycetales" "Mycobacteriaceae" "Mycobacterium" "ulcerans" "" "species" "MacCallum et al., 1950" "96b3a2e207e76f4725132034d7d0bde1" "CoL" 2 "40713003"
"B_MYCBC_VACC" "Mycobacterium vaccae" "Bacteria" "Actinobacteria" "(unknown class)" "Actinomycetales" "Mycobacteriaceae" "Mycobacterium" "vaccae" "" "species" "Bonicke et al., 1964" "adbc928aba39beadc25b2ba7e8214c91" "CoL" 2 "54925005"

16
data-raw/reproduction_of_microorganisms.R

@ -920,6 +920,22 @@ testthat::test_file("tests/testthat/test-data.R") @@ -920,6 +920,22 @@ testthat::test_file("tests/testthat/test-data.R")
testthat::test_file("tests/testthat/test-mo.R")
testthat::test_file("tests/testthat/test-mo_property.R")
# edit 2020-05-28
# Not sure why it now says M. tuberculosis was renamed to M. africanum (B_MYCBC_AFRC), but that's not true
microorganisms <- microorganisms %>%
bind_rows(microorganisms %>%
filter(mo == "B_MYCBC_AFRC") %>%
mutate(mo = "B_MYCBC_TBRC", snomed = list(c("113861009", "113858008")),
ref = "Lehmann et al., 2018",species_id = "778540",
source = "DSMZ", species = "tuberculosis",
fullname = "Mycobacterium tuberculosis")) %>%
arrange(fullname)
class(microorganisms$mo) <- c("mo", "character")
microorganisms.old <- microorganisms.old %>% filter(fullname != "Mycobacterium tuberculosis")
usethis::use_data(microorganisms, overwrite = TRUE, version = 2)
usethis::use_data(microorganisms.old, overwrite = TRUE, version = 2)
# OLD CODE ----------------------------------------------------------------

682
data-raw/reproduction_of_microorganisms_new.R

@ -1,682 +0,0 @@ @@ -1,682 +0,0 @@
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# SOURCE #
# https://gitlab.com/msberends/AMR #
# #
# LICENCE #
# (c) 2018-2020 Berends MS, Luz CF et al. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
# Visit our website for more info: https://msberends.gitlab.io/AMR. #
# ==================================================================== #
# ---------------------------------------------------------------------------------
# Reproduction of the `microorganisms` data set
# ---------------------------------------------------------------------------------
# Data retrieved from:
#
# [1] Catalogue of Life (CoL) through the Encyclopaedia of Life
# https://opendata.eol.org/dataset/catalogue-of-life/
# * Download the resource file with a name like "Catalogue of Life yyyy-mm-dd"
# * Extract "taxon.tab"
#
# [2] Global Biodiversity Information Facility (GBIF)
# https://doi.org/10.15468/39omei
# * Extract "Taxon.tsv"
#
# [3] Deutsche Sammlung von Mikroorganismen und Zellkulturen (DSMZ)
# https://www.dsmz.de/support/bacterial-nomenclature-up-to-date-downloads.html
# * Download the latest "Complete List" as xlsx file (DSMZ_bactnames.xlsx)
# ---------------------------------------------------------------------------------
library(dplyr)
library(AMR)
data_col <- data.table::fread("Documents/taxon.tab")
data_gbif <- data.table::fread("Documents/Taxon.tsv")
# read the xlsx file from DSMZ (only around 2.5 MB):
data_dsmz <- readxl::read_xlsx("Downloads/DSMZ_bactnames.xlsx")
# the CoL data is over 3.7M rows:
data_col %>% freq(kingdom)
# Item Count Percent Cum. Count Cum. Percent
# --- ---------- ---------- -------- ----------- -------------
# 1 Animalia 2,225,627 59.1% 2,225,627 59.1%
# 2 Plantae 1,177,412 31.3% 3,403,039 90.4%
# 3 Fungi 290,145 7.7% 3,693,184 98.1%
# 4 Chromista 47,126 1.3% 3,740,310 99.3%
# 5 Bacteria 14,478 0.4% 3,754,788 99.7%
# 6 Protozoa 6,060 0.2% 3,760,848 99.9%
# 7 Viruses 3,827 0.1% 3,764,675 100.0%
# 8 Archaea 610 0.0% 3,765,285 100.0%
# the GBIF data is over 5.8M rows:
data_gbif %>% freq(kingdom)
# Item Count Percent Cum. Count Cum. Percent
# --- --------------- ---------- -------- ----------- -------------
# 1 Animalia 3,264,138 55.7% 3,264,138 55.7%
# 2 Plantae 1,814,962 31.0% 5,079,100 86.7%
# 3 Fungi 538,086 9.2% 5,617,186 95.9%
# 4 Chromista 181,374 3.1% 5,798,560 99.0%
# 5 Bacteria 24,048 0.4% 5,822,608 99.4%
# 6 Protozoa 15,138 0.3% 5,837,746 99.7%
# 7 incertae sedis 9,995 0.2% 5,847,741 99.8%
# 8 Viruses 9,630 0.2% 5,857,371 100.0%
# 9 Archaea 771 0.0% 5,858,142 100.0%
# Clean up helper function ------------------------------------------------
clean_new <- function(new) {
new %>%
# only the ones that have no new ID to refer to a newer name
filter(is.na(col_id_new)) %>%
filter(
(
# we only want all MICROorganisms and no viruses
!kingdom %in% c("Animalia", "Chromista", "Plantae", "Viruses")
# and not all fungi: Aspergillus, Candida, Trichphyton and Pneumocystis are the most important,
# so only keep these orders from the fungi:
& !(kingdom == "Fungi"
& !order %in% c("Eurotiales", "Saccharomycetales", "Schizosaccharomycetales", "Tremellales", "Onygenales", "Pneumocystales"))
)
# or the family has to contain a genus we found in our hospitals last decades (Northern Netherlands, 2002-2018)
| genus %in% c("Absidia", "Acremonium", "Actinotignum", "Alternaria", "Anaerosalibacter", "Ancylostoma", "Anisakis", "Apophysomyces",
"Arachnia", "Ascaris", "Aureobacterium", "Aureobasidium", "Balantidum", "Bilophilia", "Branhamella", "Brochontrix",
"Brugia", "Calymmatobacterium", "Catabacter", "Chilomastix", "Chryseomonas", "Cladophialophora", "Cladosporium",
"Clonorchis", "Cordylobia", "Curvularia", "Demodex", "Dermatobia", "Diphyllobothrium", "Dracunculus", "Echinococcus",
"Enterobius", "Euascomycetes", "Exophiala", "Fasciola", "Fusarium", "Hendersonula", "Hymenolepis", "Kloeckera",
"Koserella", "Larva", "Leishmania", "Lelliottia", "Loa", "Lumbricus", "Malassezia", "Metagonimus", "Molonomonas",
"Mucor", "Nattrassia", "Necator", "Novospingobium", "Onchocerca", "Opistorchis", "Paragonimus", "Paramyxovirus",
"Pediculus", "Phoma", "Phthirus", "Pityrosporum", "Pseudallescheria", "Pulex", "Rhizomucor", "Rhizopus", "Rhodotorula",
"Salinococcus", "Sanguibacteroides", "Schistosoma", "Scopulariopsis", "Scytalidium", "Sporobolomyces", "Stomatococcus",
"Strongyloides", "Syncephalastraceae", "Taenia", "Torulopsis", "Trichinella", "Trichobilharzia", "Trichomonas",
"Trichosporon", "Trichuris", "Trypanosoma", "Wuchereria")) %>%
mutate(
authors2 = iconv(ref, from = "UTF-8", to = "ASCII//TRANSLIT"),
# remove leading and trailing brackets
authors2 = gsub("^[(](.*)[)]$", "\\1", authors2),
# only take part after brackets if there's a name
authors2 = ifelse(grepl(".*[)] [a-zA-Z]+.*", authors2),
gsub(".*[)] (.*)", "\\1", authors2),
authors2),
# get year from last 4 digits
lastyear = as.integer(gsub(".*([0-9]{4})$", "\\1", authors2)),
# can never be later than now
lastyear = ifelse(lastyear > as.integer(format(Sys.Date(), "%Y")),
NA,
lastyear),
# get authors without last year
authors = gsub("(.*)[0-9]{4}$", "\\1", authors2),
# remove nonsense characters from names
authors = gsub("[^a-zA-Z,'& -]", "", authors),
# remove trailing and leading spaces
authors = trimws(authors),
# only keep first author and replace all others by 'et al'
authors = gsub("(,| and| et| &| ex| emend\\.?) .*", " et al.", authors),
# et al. always with ending dot
authors = gsub(" et al\\.?", " et al.", authors),
authors = gsub(" ?,$", "", authors),
# don't start with 'sensu' or 'ehrenb'
authors = gsub("^(sensu|Ehrenb.?) ", "", authors, ignore.case = TRUE),
# no initials, only surname
authors = gsub("^([A-Z]+ )+", "", authors, ignore.case = FALSE),
# combine author and year if year is available
ref = ifelse(!is.na(lastyear),
paste0(authors, ", ", lastyear),
authors),
# fix beginning and ending
ref = gsub(", $", "", ref),
ref = gsub("^, ", "", ref)) %>%
# remove text if it contains 'Not assigned' like phylum in viruses
mutate_all(~gsub("Not assigned", "", .)) %>%
# Remove non-ASCII characters (these are not allowed by CRAN)
lapply(iconv, from = "UTF-8", to = "ASCII//TRANSLIT") %>%
as_tibble(stringsAsFactors = FALSE) %>%
mutate(fullname = trimws(case_when(rank == "family" ~ family,
rank == "order" ~ order,
rank == "class" ~ class,
rank == "phylum" ~ phylum,
rank == "kingdom" ~ kingdom,
TRUE ~ paste(genus, species, subspecies))))
}
clean_old <- function(old, new) {
old %>%
# only the ones that exist in the new data set
filter(col_id_new %in% new$col_id) %>%
mutate(
authors2 = iconv(ref, from = "UTF-8", to = "ASCII//TRANSLIT"),
# remove leading and trailing brackets
authors2 = gsub("^[(](.*)[)]$", "\\1", authors2),
# only take part after brackets if there's a name
authors2 = ifelse(grepl(".*[)] [a-zA-Z]+.*", authors2),
gsub(".*[)] (.*)", "\\1", authors2),
authors2),
# get year from last 4 digits
lastyear = as.integer(gsub(".*([0-9]{4})$", "\\1", authors2)),
# can never be later than now
lastyear = ifelse(lastyear > as.integer(format(Sys.Date(), "%Y")),
NA,
lastyear),
# get authors without last year
authors = gsub("(.*)[0-9]{4}$", "\\1", authors2),
# remove nonsense characters from names
authors = gsub("[^a-zA-Z,'& -]", "", authors),
# remove trailing and leading spaces
authors = trimws(authors),
# only keep first author and replace all others by 'et al'
authors = gsub("(,| and| et| &| ex| emend\\.?) .*", " et al.", authors),
# et al. always with ending dot
authors = gsub(" et al\\.?", " et al.", authors),
authors = gsub(" ?,$", "", authors),
# don't start with 'sensu' or 'ehrenb'
authors = gsub("^(sensu|Ehrenb.?) ", "", authors, ignore.case = TRUE),
# no initials, only surname
authors = gsub("^([A-Z]+ )+", "", authors, ignore.case = FALSE),
# combine author and year if year is available
ref = ifelse(!is.na(lastyear),
paste0(authors, ", ", lastyear),
authors),
# fix beginning and ending
ref = gsub(", $", "", ref),
ref = gsub("^, ", "", ref)) %>%
# remove text if it contains 'Not assigned' like phylum in viruses
mutate_all(~gsub("Not assigned", "", .)) %>%
# Remove non-ASCII characters (these are not allowed by CRAN)
lapply(iconv, from = "UTF-8", to = "ASCII//TRANSLIT") %>%
as_tibble(stringsAsFactors = FALSE) %>%
select(col_id_new, fullname, ref, authors2) %>%
left_join(new %>% select(col_id, fullname_new = fullname), by = c(col_id_new = "col_id")) %>%
mutate(fullname = trimws(
gsub("(.*)[(].*", "\\1",
stringr::str_replace(
string = fullname,
pattern = stringr::fixed(authors2),
replacement = "")) %>%
gsub(" (var|f|subsp)[.]", "", .))) %>%
select(-c("col_id_new", "authors2")) %>%
filter(!is.na(fullname), !is.na(fullname_new)) %>%
filter(fullname != fullname_new, !fullname %like% "^[?]")
}
# clean CoL and GBIF ----
# clean data_col
data_col <- data_col %>%
as_tibble() %>%
select(col_id = taxonID,
col_id_new = acceptedNameUsageID,
fullname = scientificName,
kingdom,
phylum,
class,
order,
family,
genus,
species = specificEpithet,
subspecies = infraspecificEpithet,
rank = taxonRank,
ref = scientificNameAuthorship,
species_id = furtherInformationURL) %>%
mutate(source = "CoL")
# split into old and new
data_col.new <- data_col %>% clean_new()
data_col.old <- data_col %>% clean_old(new = data_col.new)
rm(data_col)
# clean data_gbif
data_gbif <- data_gbif %>%
as_tibble() %>%
filter(
# no uncertain taxonomic placements
taxonRemarks != "doubtful",
kingdom != "incertae sedis",
taxonRank != "unranked") %>%
transmute(col_id = taxonID,
col_id_new = acceptedNameUsageID,
fullname = scientificName,
kingdom,
phylum,
class,
order,
family,
genus,
species = specificEpithet,
subspecies = infraspecificEpithet,
rank = taxonRank,
ref = scientificNameAuthorship,
species_id = as.character(parentNameUsageID)) %>%
mutate(source = "GBIF")
# split into old and new
data_gbif.new <- data_gbif %>% clean_new()
data_gbif.old <- data_gbif %>% clean_old(new = data_gbif.new)
rm(data_gbif)
# put CoL and GBIF together ----
MOs.new <- bind_rows(data_col.new,
data_gbif.new) %>%
mutate(taxonomic_tree_length = nchar(trimws(paste(kingdom, phylum, class, order, family, genus, species, subspecies)))) %>%
arrange(desc(taxonomic_tree_length)) %>%
distinct(fullname, .keep_all = TRUE) %>%
select(-c("col_id_new", "authors2", "authors", "lastyear", "taxonomic_tree_length")) %>%
arrange(fullname)
MOs.old <- bind_rows(data_col.old,
data_gbif.old) %>%
distinct(fullname, .keep_all = TRUE) %>%
arrange(fullname)
# clean up DSMZ ---
data_dsmz <- data_dsmz %>%
as_tibble() %>%
transmute(col_id = NA_integer_,
col_id_new = NA_integer_,
fullname = "",
# kingdom = "",
# phylum = "",
# class = "",
# order = "",
# family = "",
genus = ifelse(is.na(GENUS), "", GENUS),
species = ifelse(is.na(SPECIES), "", SPECIES),
subspecies = ifelse(is.na(SUBSPECIES), "", SUBSPECIES),
rank = ifelse(species == "", "genus", "species"),
ref = AUTHORS,
species_id = as.character(RECORD_NO),
source = "DSMZ")
# DSMZ only contains genus/(sub)species, try to find taxonomic properties based on genus and data_col
ref_taxonomy <- MOs.new %>%
distinct(genus, .keep_all = TRUE) %>%
filter(family != "") %>%
filter(genus %in% data_dsmz$genus) %>%
distinct(genus, .keep_all = TRUE) %>%
select(kingdom, phylum, class, order, family, genus)
data_dsmz <- data_dsmz %>%
left_join(ref_taxonomy, by = "genus") %>%
mutate(kingdom = "Bacteria")
data_dsmz.new <- data_dsmz %>%
clean_new() %>%
distinct(fullname, .keep_all = TRUE) %>%
select(colnames(MOs.new)) %>%
arrange(fullname)
# combine everything ----
MOs <- bind_rows(MOs.new,
data_dsmz.new) %>%
distinct(fullname, .keep_all = TRUE) %>%
# not the ones that are old
filter(!fullname %in% MOs.old$fullname) %>%
arrange(fullname) %>%
mutate(col_id = ifelse(source != "CoL", NA_integer_, col_id)) %>%
filter(fullname != "")
rm(data_col.new)
rm(data_col.old)
rm(data_gbif.new)
rm(data_gbif.old)
rm(data_dsmz)
rm(data_dsmz.new)
rm(ref_taxonomy)
rm(MOs.new)
MOs.bak <- MOs
# Trichomonas trick ----
# for species in Trypanosoma and Trichomonas we observe al lot of taxonomic info missing
MOs %>% filter(genus %in% c("Trypanosoma", "Trichomonas")) %>% View()
MOs[which(MOs$genus == "Trypanosoma"), "kingdom"] <- MOs[which(MOs$fullname == "Trypanosoma"),]$kingdom
MOs[which(MOs$genus == "Trypanosoma"), "phylum"] <- MOs[which(MOs$fullname == "Trypanosoma"),]$phylum
MOs[which(MOs$genus == "Trypanosoma"), "class"] <- MOs[which(MOs$fullname == "Trypanosoma"),]$class
MOs[which(MOs$genus == "Trypanosoma"), "order"] <- MOs[which(MOs$fullname == "Trypanosoma"),]$order
MOs[which(MOs$genus == "Trypanosoma"), "family"] <- MOs[which(MOs$fullname == "Trypanosoma"),]$family
MOs[which(MOs$genus == "Trichomonas"), "kingdom"] <- MOs[which(MOs$fullname == "Trichomonas"),]$kingdom
MOs[which(MOs$genus == "Trichomonas"), "phylum"] <- MOs[which(MOs$fullname == "Trichomonas"),]$phylum
MOs[which(MOs$genus == "Trichomonas"), "class"] <- MOs[which(MOs$fullname == "Trichomonas"),]$class
MOs[which(MOs$genus == "Trichomonas"), "order"] <- MOs[which(MOs$fullname == "Trichomonas"),]$order
MOs[which(MOs$genus == "Trichomonas"), "family"] <- MOs[which(MOs$fullname == "Trichomonas"),]$family
# fill taxonomic properties that are missing
MOs <- MOs %>%
mutate(phylum = ifelse(phylum %in% c(NA, ""), "(unknown phylum)", phylum),
class = ifelse(class %in% c(NA, ""), "(unknown class)", class),
order = ifelse(order %in% c(NA, ""), "(unknown order)", order),
family = ifelse(family %in% c(NA, ""), "(unknown family)", family))
# Abbreviations ----
# Add abbreviations so we can easily know which ones are which ones.
# These will become valid and unique microbial IDs for the AMR package.
MOs <- MOs %>%
arrange(kingdom, fullname) %>%
group_by(kingdom) %>%
mutate(abbr_other = case_when(
rank == "family" ~ paste0("[FAM]_",
abbreviate(family,
minlength = 8,
use.classes = TRUE,
method = "both.sides",
strict = FALSE)),
rank == "order" ~ paste0("[ORD]_",
abbreviate(order,
minlength = 8,
use.classes = TRUE,
method = "both.sides",
strict = FALSE)),
rank == "class" ~ paste0("[CLS]_",
abbreviate(class,
minlength = 8,
use.classes = TRUE,
method = "both.sides",
strict = FALSE)),
rank == "phylum" ~ paste0("[PHL]_",
abbreviate(phylum,
minlength = 8,
use.classes = TRUE,
method = "both.sides",
strict = FALSE)),
rank == "kingdom" ~ paste0("[KNG]_", kingdom),
TRUE ~ NA_character_
)) %>%
# abbreviations determined per kingdom and family
# becuase they are part of the abbreviation
mutate(abbr_genus = abbreviate(genus,
minlength = 7,
use.classes = TRUE,
method = "both.sides",
strict = FALSE)) %>%
ungroup() %>%
group_by(genus) %>%
# species abbreviations may be the same between genera
# because the genus abbreviation is part of the abbreviation
mutate(abbr_species = abbreviate(stringr::str_to_title(species),
minlength = 3,
use.classes = FALSE,
method = "both.sides")) %>%
ungroup() %>%
group_by(genus, species) %>%
mutate(abbr_subspecies = abbreviate(stringr::str_to_title(subspecies),
minlength = 3,
use.classes = FALSE,
method = "both.sides")) %>%
ungroup() %>%
# remove trailing underscores
mutate(mo = gsub("_+$", "",
toupper(paste(
# first character: kingdom
ifelse(kingdom %in% c("Animalia", "Plantae"),
substr(kingdom, 1, 2),
substr(kingdom, 1, 1)),
# next: genus, species, subspecies
ifelse(is.na(abbr_other),
paste(abbr_genus,
abbr_species,
abbr_subspecies,
sep = "_"),
abbr_other),
sep = "_")))) %>%
mutate(mo = ifelse(duplicated(.$mo),
# these one or two must be unique too
paste0(mo, "1"),
mo),
fullname = ifelse(fullname == "",
trimws(paste(genus, species, subspecies)),
fullname)) %>%
# put `mo` in front, followed by the rest
select(mo, everything(), -abbr_other, -abbr_genus, -abbr_species, -abbr_subspecies)
# add non-taxonomic entries
MOs <- MOs %>%
bind_rows(
# Unknowns
data.frame(mo = "UNKNOWN",
col_id = NA_integer_,
fullname = "(unknown name)",
kingdom = "(unknown kingdom)",
phylum = "(unknown phylum)",
class = "(unknown class)",
order = "(unknown order)",
family = "(unknown family)",
genus = "(unknown genus)",
species = "(unknown species)",
subspecies = "(unknown subspecies)",
rank = "(unknown rank)",
ref = NA_character_,
species_id = "",
source = "manually added",
stringsAsFactors = FALSE),
data.frame(mo = "B_GRAMN",
col_id = NA_integer_,
fullname = "(unknown Gram-negatives)",
kingdom = "Bacteria",
phylum = "(unknown phylum)",
class = "(unknown class)",
order = "(unknown order)",
family = "(unknown family)",
genus = "(unknown Gram-negatives)",
species = "(unknown species)",
subspecies = "(unknown subspecies)",
rank = "species",
ref = NA_character_,
species_id = "",
source = "manually added",
stringsAsFactors = FALSE),
data.frame(mo = "B_GRAMP",
col_id = NA_integer_,
fullname = "(unknown Gram-positives)",
kingdom = "Bacteria",
phylum = "(unknown phylum)",
class = "(unknown class)",
order = "(unknown order)",
family = "(unknown family)",
genus = "(unknown Gram-positives)",
species = "(unknown species)",
subspecies = "(unknown subspecies)",
rank = "species",
ref = NA_character_,
species_id = "",
source = "manually added",
stringsAsFactors = FALSE),
# CoNS
MOs %>%
filter(genus == "Staphylococcus", species == "") %>% .[1,] %>%
mutate(mo = paste(mo, "CNS", sep = "_"),
rank = "species",
col_id = NA_integer_,
species = "coagulase-negative",
fullname = "Coagulase-negative Staphylococcus (CoNS)",
ref = NA_character_,
species_id = "",
source = "manually added"),
# CoPS
MOs %>%
filter(genus == "Staphylococcus", species == "") %>% .[1,] %>%
mutate(mo = paste(mo, "CPS", sep = "_"),
rank = "species",
col_id = NA_integer_,
species = "coagulase-positive",
fullname = "Coagulase-positive Staphylococcus (CoPS)",
ref = NA_character_,
species_id = "",
source = "manually added"),
# Streptococci groups A, B, C, F, H, K
MOs %>%
filter(genus == "Streptococcus", species == "pyogenes") %>% .[1,] %>%
# we can keep all other details, since S. pyogenes is the only member of group A
mutate(mo = paste(MOs[MOs$fullname == "Streptococcus",]$mo, "GRA", sep = "_"),
species = "group A" ,
fullname = "Streptococcus group A"),
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
# we can keep all other details, since S. agalactiae is the only member of group B
mutate(mo = paste(MOs[MOs$fullname == "Streptococcus",]$mo, "GRB", sep = "_"),
species = "group B" ,
fullname = "Streptococcus group B"),
MOs %>%
filter(genus == "Streptococcus", species == "dysgalactiae") %>% .[1,] %>%
mutate(mo = paste(MOs[MOs$fullname == "Streptococcus",]$mo, "GRC", sep = "_"),
col_id = NA_integer_,
species = "group C" ,
fullname = "Streptococcus group C",
ref = NA_character_,
species_id = "",
source = "manually added"),
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = paste(MOs[MOs$fullname == "Streptococcus",]$mo, "GRD", sep = "_"),
col_id = NA_integer_,
species = "group D" ,
fullname = "Streptococcus group D",
ref = NA_character_,
species_id = "",
source = "manually added"),
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = paste(MOs[MOs$fullname == "Streptococcus",]$mo, "GRF", sep = "_"),
col_id = NA_integer_,
species = "group F" ,
fullname = "Streptococcus group F",
ref = NA_character_,
species_id = "",
source = "manually added"),
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = paste(MOs[MOs$fullname == "Streptococcus",]$mo, "GRG", sep = "_"),
col_id = NA_integer_,
species = "group G" ,
fullname = "Streptococcus group G",
ref = NA_character_,
species_id = "",
source = "manually added"),
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = paste(MOs[MOs$fullname == "Streptococcus",]$mo, "GRH", sep = "_"),
col_id = NA_integer_,
species = "group H" ,
fullname = "Streptococcus group H",
ref = NA_character_,
species_id = "",
source = "manually added"),
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = paste(MOs[MOs$fullname == "Streptococcus",]$mo, "GRK", sep = "_"),
col_id = NA_integer_,
species = "group K" ,
fullname = "Streptococcus group K",
ref = NA_character_,
species_id = "",
source = "manually added"),
# Beta-haemolytic Streptococci
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = paste(MOs[MOs$fullname == "Streptococcus",]$mo, "HAE", sep = "_"),
col_id = NA_integer_,
species = "beta-haemolytic" ,
fullname = "Beta-haemolytic Streptococcus",
ref = NA_character_,
species_id = "",
source = "manually added")
)
# everything distinct?
sum(duplicated(MOs$mo))
colnames(MOs)
# set prevalence per species
MOs <- MOs %>%
mutate(prevalence = case_when(
class == "Gammaproteobacteria"
| genus %in% c("Enterococcus", "Staphylococcus", "Streptococcus")
| mo %in% c("UNKNOWN", "B_GRAMN", "B_GRAMP")
~ 1,
phylum %in% c("Proteobacteria",
"Firmicutes",
"Actinobacteria",
"Sarcomastigophora")
| genus %in% c("Aspergillus",
"Bacteroides",
"Candida",
"Capnocytophaga",
"Chryseobacterium",
"Cryptococcus",
"Elisabethkingia",
"Flavobacterium",
"Fusobacterium",
"Giardia",
"Leptotrichia",
"Mycoplasma",
"Prevotella",
"Rhodotorula",
"Treponema",
"Trichophyton",
"Trichomonas",
"Ureaplasma")
| rank %in% c("kingdom", "phylum", "class", "order", "family")
~ 2,
TRUE ~ 3
))
# arrange
MOs <- MOs %>% arrange(fullname)
# transform
MOs <- as.data.frame(MOs, stringsAsFactors = FALSE)
MOs.old <- as.data.frame(MOs.old, stringsAsFactors = FALSE)
class(MOs$mo) <- "mo"
MOs$col_id <- as.integer(MOs$col_id)
# get differences in MO codes between this data and the package version
MO_diff <- AMR::microorganisms %>%
mutate(pastedtext = paste(mo, fullname)) %>%
filter(!pastedtext %in% (MOs %>% mutate(pastedtext = paste(mo, fullname)) %>% pull(pastedtext))) %>%
select(mo_old = mo, fullname, pastedtext) %>%
left_join(MOs %>%
transmute(mo_new = mo, fullname_new = fullname, pastedtext = paste(mo, fullname)), "pastedtext") %>%
select(mo_old, mo_new, fullname_new)
mo_diff2 <- AMR::microorganisms %>%
select(mo, fullname) %>%
left_join(MOs %>%
select(mo, fullname),
by = "fullname",
suffix = c("_old", "_new")) %>%
filter(mo_old != mo_new,
#!mo_new %in% mo_old,
!mo_old %like% "\\[")
mo_diff3 <- tibble(previous_old = names(AMR:::make_trans_tbl()),
previous_new = AMR:::make_trans_tbl()) %>%
left_join(AMR::microorganisms %>% select(mo, fullname), by = c(previous_new = "mo")) %>%
left_join(MOs %>% select(mo_new = mo, fullname), by = "fullname")
# what did we win most?
MOs %>% filter(!fullname %in% AMR::microorganisms$fullname) %>% freq(genus)
# what did we lose most?
AMR::microorganisms %>%
filter(kingdom != "Chromista" & !fullname %in% MOs$fullname & !fullname %in% MOs.old$fullname) %>%
freq(genus)
# save
saveRDS(MOs, "microorganisms.rds")
saveRDS(MOs.old, "microorganisms.old.rds")
# on the server, do:
usethis::use_data(microorganisms, overwrite = TRUE, version = 2)
usethis::use_data(microorganisms.old, overwrite = TRUE, version = 2)
rm(microorganisms)
rm(microorganisms.old)
# TO DO AFTER THIS
# * Update the year and dim()s in R/data.R
# * Rerun data-raw/reproduction_of_rsi_translation.R
# * Run unit tests

BIN
data/microorganisms.old.rda

Binary file not shown.

BIN
data/microorganisms.rda

Binary file not shown.

2
docs/404.html

@ -81,7 +81,7 @@ @@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="https://msberends.gitlab.io/AMR/index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9021</span>
</span>
</div>

2
docs/LICENSE-text.html

@ -81,7 +81,7 @@ @@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9021</span>
</span>
</div>

426
docs/articles/AMR.html

@ -39,7 +39,7 @@ @@ -39,7 +39,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9021</span>
</span>
</div>
@ -186,7 +186,7 @@ @@ -186,7 +186,7 @@
<h1 data-toc-skip>How to conduct AMR analysis</h1>
<h4 class="author">Matthijs S. Berends</h4>
<h4 class="date">25 May 2020</h4>
<h4 class="date">28 May 2020</h4>
<small class="dont-index">Source: <a href="https://gitlab.com/msberends/AMR/blob/master/vignettes/AMR.Rmd"><code>vignettes/AMR.Rmd</code></a></small>
<div class="hidden name"><code>AMR.Rmd</code></div>
@ -195,7 +195,7 @@ @@ -195,7 +195,7 @@
<p><strong>Note:</strong> values on this page will change with every website update since they are based on randomly created values and the page was written in <a href="https://rmarkdown.rstudio.com/">R Markdown</a>. However, the methodology remains unchanged. This page was generated on 25 May 2020.</p>
<p><strong>Note:</strong> values on this page will change with every website update since they are based on randomly created values and the page was written in <a href="https://rmarkdown.rstudio.com/">R Markdown</a>. However, the methodology remains unchanged. This page was generated on 28 May 2020.</p>
<div id="introduction" class="section level1">
<h1 class="hasAnchor">
<a href="#introduction" class="anchor"></a>Introduction</h1>
@ -226,21 +226,21 @@ @@ -226,21 +226,21 @@
</tr></thead>
<tbody>
<tr class="odd">
<td align="center">2020-05-25</td>
<td align="center">2020-05-28</td>
<td align="center">abcd</td>
<td align="center">Escherichia coli</td>
<td align="center">S</td>
<td align="center">S</td>
</tr>
<tr class="even">
<td align="center">2020-05-25</td>
<td align="center">2020-05-28</td>
<td align="center">abcd</td>
<td align="center">Escherichia coli</td>
<td align="center">S</td>
<td align="center">R</td>
</tr>
<tr class="odd">
<td align="center">2020-05-25</td>
<td align="center">2020-05-28</td>
<td align="center">efgh</td>
<td align="center">Escherichia coli</td>
<td align="center">R</td>
@ -336,10 +336,10 @@ @@ -336,10 +336,10 @@
</tr></thead>
<tbody>
<tr class="odd">
<td align="center">2015-03-17</td>
<td align="center">U1</td>
<td align="center">Hospital D</td>
<td align="center">Escherichia coli</td>
<td align="center">2015-07-23</td>
<td align="center">Z5</td>
<td align="center">Hospital B</td>
<td align="center">Streptococcus pneumoniae</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">S</td>
@ -347,59 +347,59 @@ @@ -347,59 +347,59 @@
<td align="center">F</td>
</tr>
<tr class="even">
<td align="center">2017-08-02</td>
<td align="center">P6</td>
<td align="center">Hospital B</td>
<td align="center">Streptococcus pneumoniae</td>
<td align="center">R</td>
<td align="center">2017-01-21</td>
<td align="center">D6</td>
<td align="center">Hospital C</td>
<td align="center">Staphylococcus aureus</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">F</td>
<td align="center">S</td>
<td align="center">M</td>
</tr>
<tr class="odd">
<td align="center">2017-06-24</td>
<td align="center">E4</td>
<td align="center">Hospital C</td>
<td align="center">2014-05-20</td>
<td align="center">Y10</td>
<td align="center">Hospital A</td>
<td align="center">Escherichia coli</td>
<td align="center">S</td>
<td align="center">I</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">M</td>
<td align="center">F</td>
</tr>
<tr class="even">
<td align="center">2011-02-12</td>
<td align="center">I10</td>
<td align="center">2017-11-02</td>
<td align="center">M10</td>
<td align="center">Hospital D</td>
<td align="center">Streptococcus pneumoniae</td>
<td align="center">R</td>
<td align="center">Staphylococcus aureus</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">R</td>
<td align="center">S</td>
<td align="center">M</td>
</tr>
<tr class="odd">
<td align="center">2010-03-17</td>
<td align="center">Q3</td>
<td align="center">2014-08-26</td>
<td align="center">B4</td>
<td align="center">Hospital C</td>
<td align="center">Staphylococcus aureus</td>
<td align="center">R</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">F</td>
<td align="center">S</td>
<td align="center">M</td>
</tr>
<tr class="even">
<td align="center">2010-08-19</td>
<td align="center">A7</td>
<td align="center">Hospital D</td>
<td align="center">2013-05-29</td>
<td align="center">R3</td>
<td align="center">Hospital B</td>
<td align="center">Escherichia coli</td>
<td align="center">S</td>
<td align="center">R</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">M</td>
<td align="center">F</td>
</tr>
</tbody>
</table>
@ -432,16 +432,16 @@ Longest: 1</p> @@ -432,16 +432,16 @@ Longest: 1</p>
<tr class="odd">
<td align="left">1</td>
<td align="left">M</td>
<td align="right">10,403</td>
<td align="right">52.02%</td>
<td align="right">10,403</td>
<td align="right">52.02%</td>
<td align="right">10,518</td>
<td align="right">52.59%</td>
<td align="right">10,518</td>
<td align="right">52.59%</td>
</tr>
<tr class="even">
<td align="left">2</td>
<td align="left">F</td>
<td align="right">9,597</td>
<td align="right">47.99%</td>
<td align="right">9,482</td>
<td align="right">47.41%</td>
<td align="right">20,000</td>
<td align="right">100.00%</td>
</tr>
@ -481,7 +481,7 @@ Longest: 1</p> @@ -481,7 +481,7 @@ Longest: 1</p>
<span class="co"># NOTE: Using column `bacteria` as input for `col_mo`.</span>
<span class="co"># NOTE: Using column `date` as input for `col_date`.</span>
<span class="co"># NOTE: Using column `patient_id` as input for `col_patient_id`.</span></pre></body></html></div>
<p>So only 28.2% is suitable for resistance analysis! We can now filter on it with the <code><a href="https://dplyr.tidyverse.org/reference/filter.html">filter()</a></code> function, also from the <code>dplyr</code> package:</p>
<p>So only 28.5% is suitable for resistance analysis! We can now filter on it with the <code><a href="https://dplyr.tidyverse.org/reference/filter.html">filter()</a></code> function, also from the <code>dplyr</code> package:</p>
<div class="sourceCode" id="cb16"><html><body><pre class="r"><span class="no">data_1st</span> <span class="kw">&lt;-</span> <span class="no">data</span> <span class="kw">%&gt;%</span>
<span class="fu"><a href="https://dplyr.tidyverse.org/reference/filter.html">filter</a></span>(<span class="no">first</span> <span class="kw">==</span> <span class="fl">TRUE</span>)</pre></body></html></div>
<p>For future use, the above two syntaxes can be shortened with the <code><a href="../reference/first_isolate.html">filter_first_isolate()</a></code> function:</p>
@ -491,7 +491,7 @@ Longest: 1</p> @@ -491,7 +491,7 @@ Longest: 1</p>
<div id="first-weighted-isolates" class="section level2">
<h2 class="hasAnchor">
<a href="#first-weighted-isolates" class="anchor"></a>First <em>weighted</em> isolates</h2>
<p>We made a slight twist to the CLSI algorithm, to take into account the antimicrobial susceptibility profile. Have a look at all isolates of patient K4, sorted on date:</p>
<p>We made a slight twist to the CLSI algorithm, to take into account the antimicrobial susceptibility profile. Have a look at all isolates of patient P10, sorted on date:</p>
<table class="table">
<thead><tr class="header">
<th align="center">isolate</th>
@ -507,52 +507,52 @@ Longest: 1</p> @@ -507,52 +507,52 @@ Longest: 1</p>
<tbody>
<tr class="odd">
<td align="center">1</td>
<td align="center">2010-01-01</td>
<td align="center">K4</td>
<td align="center">2010-01-22</td>
<td align="center">P10</td>
<td align="center">B_ESCHR_COLI</td>
<td align="center">R</td>
<td align="center">R</td>
<td align="center">I</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">TRUE</td>
</tr>
<tr class="even">
<td align="center">2</td>
<td align="center">2010-02-09</td>
<td align="center">K4</td>
<td align="center">2010-02-24</td>
<td align="center">P10</td>
<td align="center">B_ESCHR_COLI</td>
<td align="center">S</td>
<td align="center">I</td>
<td align="center">R</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">R</td>
<td align="center">FALSE</td>
</tr>
<tr class="odd">
<td align="center">3</td>
<td align="center">2010-03-03</td>
<td align="center">K4</td>
<td align="center">2010-03-10</td>
<td align="center">P10</td>
<td align="center">B_ESCHR_COLI</td>
<td align="center">I</td>
<td align="center">S</td>
<td align="center">R</td>
<td align="center">S</td>
<td align="center">R</td>
<td align="center">R</td>
<td align="center">FALSE</td>
</tr>
<tr class="even">
<td align="center">4</td>
<td align="center">2010-04-25</td>
<td align="center">K4</td>
<td align="center">2010-03-25</td>
<td align="center">P10</td>
<td align="center">B_ESCHR_COLI</td>
<td align="center">R</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">S</td>
<td align="center">R</td>
<td align="center">FALSE</td>
</tr>
<tr class="odd">
<td align="center">5</td>
<td align="center">2010-07-04</td>
<td align="center">K4</td>
<td align="center">2010-04-30</td>
<td align="center">P10</td>
<td align="center">B_ESCHR_COLI</td>
<td align="center">S</td>
<td align="center">S</td>
@ -562,62 +562,62 @@ Longest: 1</p> @@ -562,62 +562,62 @@ Longest: 1</p>
</tr>
<tr class="even">
<td align="center">6</td>
<td align="center">2010-09-04</td>
<td align="center">K4</td>
<td align="center">2010-05-05</td>
<td align="center">P10</td>
<td align="center">B_ESCHR_COLI</td>
<td align="center">S</td>
<td align="center">I</td>
<td align="center">S</td>
<td align="center">R</td>
<td align="center">S</td>
<td align="center">FALSE</td>
</tr>
<tr class="odd">
<td align="center">7</td>
<td align="center">2010-10-01</td>
<td align="center">K4</td>