(v1.0.1.9000) first PCA implementation

v1.8.2
parent f444c24ed3
commit fa0d9c58d9
  1. 4
      DESCRIPTION
  2. 5
      NAMESPACE
  3. 6
      NEWS.md
  4. 10
      R/age.R
  5. 349
      R/ggplot_pca.R
  6. 62
      R/ggplot_rsi.R
  7. 2
      R/lifecycle.R
  8. 128
      R/pca.R
  9. 15
      _pkgdown.yml
  10. 6
      data-raw/country_analysis.R
  11. 9
      docs/404.html
  12. 9
      docs/LICENSE-text.html
  13. 13
      docs/articles/EUCAST.html
  14. 345
      docs/articles/PCA.html
  15. BIN
      docs/articles/PCA_files/figure-html/unnamed-chunk-5-1.png
  16. BIN
      docs/articles/PCA_files/figure-html/unnamed-chunk-6-1.png
  17. BIN
      docs/articles/PCA_files/figure-html/unnamed-chunk-7-1.png
  18. 124
      docs/articles/benchmarks.html
  19. BIN
      docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png
  20. BIN
      docs/articles/benchmarks_files/figure-html/unnamed-chunk-6-1.png
  21. 10
      docs/articles/index.html
  22. 9
      docs/authors.html
  23. 9
      docs/index.html
  24. 63
      docs/news/index.html
  25. 1
      docs/pkgdown.yml
  26. 11
      docs/reference/age.html
  27. 17
      docs/reference/age_groups.html
  28. 415
      docs/reference/ggplot_pca.html
  29. 69
      docs/reference/index.html
  30. 11
      docs/reference/lifecycle.html
  31. 362
      docs/reference/pca.html
  32. 9
      docs/sitemap.xml
  33. 2
      man/age.Rd
  34. 8
      man/age_groups.Rd
  35. 119
      man/ggplot_pca.Rd
  36. 2
      man/lifecycle.Rd
  37. 87
      man/pca.Rd
  38. 4
      vignettes/EUCAST.Rmd
  39. 91
      vignettes/PCA.Rmd
  40. 6
      vignettes/benchmarks.Rmd

@ -1,6 +1,6 @@
Package: AMR
Version: 1.0.1
Date: 2020-02-22
Version: 1.0.1.9000
Date: 2020-03-07
Title: Antimicrobial Resistance Analysis
Authors@R: c(
person(role = c("aut", "cre"),

@ -37,6 +37,7 @@ S3method(pillar_shaft,rsi)
S3method(plot,mic)
S3method(plot,resistance_predict)
S3method(plot,rsi)
S3method(prcomp,data.frame)
S3method(print,ab)
S3method(print,bug_drug_combinations)
S3method(print,catalogue_of_life_version)
@ -120,6 +121,7 @@ export(g.test)
export(geom_rsi)
export(get_locale)
export(get_mo_source)
export(ggplot_pca)
export(ggplot_rsi)
export(ggplot_rsi_predict)
export(guess_ab_col)
@ -169,6 +171,7 @@ export(mrgn)
export(n_rsi)
export(p.symbol)
export(p_symbol)
export(pca)
export(portion_I)
export(portion_IR)
export(portion_R)
@ -224,6 +227,7 @@ exportMethods(kurtosis.default)
exportMethods(kurtosis.matrix)
exportMethods(plot.mic)
exportMethods(plot.rsi)
exportMethods(prcomp.data.frame)
exportMethods(print.ab)
exportMethods(print.bug_drug_combinations)
exportMethods(print.catalogue_of_life_version)
@ -318,6 +322,7 @@ importFrom(pillar,pillar_shaft)
importFrom(pillar,type_sum)
importFrom(rlang,as_label)
importFrom(rlang,enquos)
importFrom(rlang,eval_tidy)
importFrom(stats,complete.cases)
importFrom(stats,glm)
importFrom(stats,lm)

@ -1,3 +1,9 @@
# AMR 1.0.1.9000
### New
* Support for easy principal component analysis for AMR, using the new `pca()` function
* Plotting biplots for principal component analysis using the new `ggplot_pca()` function
# AMR 1.0.1
### Changed

@ -27,7 +27,7 @@
#' @param reference reference date(s) (defaults to today), will be coerced with [as.POSIXlt()] and cannot be lower than `x`
#' @param exact a logical to indicate whether age calculation should be exact, i.e. with decimals. It divides the number of days of [year-to-date](https://en.wikipedia.org/wiki/Year-to-date) (YTD) of `x` by the number of days in the year of `reference` (either 365 or 366).
#' @param na.rm a logical to indicate whether missing values should be removed
#' @return An integer (no decimals) if `exact = FALSE`, a double (with decimals) otherwise
#' @return An [integer] (no decimals) if `exact = FALSE`, a [double] (with decimals) otherwise
#' @seealso To split ages into groups, use the [age_groups()] function.
#' @importFrom dplyr if_else
#' @inheritSection AMR Read more on our website!
@ -95,8 +95,8 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE) {
#' @inheritSection lifecycle Stable lifecycle
#' @param x age, e.g. calculated with [age()]
#' @param split_at values to split `x` at, defaults to age groups 0-11, 12-24, 25-54, 55-74 and 75+. See Details.
#' @param na.rm a logical to indicate whether missing values should be removed
#' @details To split ages, the input can be:
#' @param na.rm a [logical] to indicate whether missing values should be removed
#' @details To split ages, the input for the `split_at` parameter can be:
#'
#' * A numeric vector. A vector of e.g. `c(10, 20)` will split on 0-9, 10-19 and 20+. A value of only `50` will split on 0-49 and 50+.
#' The default is to split on young children (0-11), youth (12-24), young adults (25-54), middle-aged adults (55-74) and elderly (75+).
@ -104,8 +104,8 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE) {
#' - `"children"` or `"kids"`, equivalent of: `c(0, 1, 2, 4, 6, 13, 18)`. This will split on 0, 1, 2-3, 4-5, 6-12, 13-17 and 18+.
#' - `"elderly"` or `"seniors"`, equivalent of: `c(65, 75, 85)`. This will split on 0-64, 65-74, 75-84, 85+.
#' - `"fives"`, equivalent of: `1:20 * 5`. This will split on 0-4, 5-9, 10-14, ..., 90-94, 95-99, 100+.
#' - `"tens"`, equivalent of: `1:10 * 10`. This will split on 0-9, 10-19, 20-29, ... 80-89, 90-99, 100+.
#' @return Ordered [`factor`]
#' - `"tens"`, equivalent of: `1:10 * 10`. This will split on 0-9, 10-19, 20-29, ..., 80-89, 90-99, 100+.
#' @return Ordered [factor]
#' @seealso To determine ages, based on one or more reference dates, use the [age()] function.
#' @export
#' @inheritSection AMR Read more on our website!

@ -0,0 +1,349 @@
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# SOURCE #
# https://gitlab.com/msberends/AMR #
# #
# LICENCE #
# (c) 2018-2020 Berends MS, Luz CF et al. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
# Visit our website for more info: https://msberends.gitlab.io/AMR. #
# ==================================================================== #
#' PCA biplot with `ggplot2`
#'
#' This function is to produce a `ggplot2` variant of a so-called [biplot](https://en.wikipedia.org/wiki/Biplot) for PCA (principal component analysis), but is more flexible and more appealing than the base \R [biplot()] function.
#' @inheritSection lifecycle Maturing lifecycle
#' @param x an object returned by [pca()], [prcomp()] or [princomp()]
#' @inheritParams stats::biplot.prcomp
#' @param labels an optional vector of labels for the observations. If set, the labels will be placed below their respective points. When using the [pca()] function as input for `x`, this will be determined automatically based on the attribute `non_numeric_cols`, see [pca()].
#' @param labels_textsize the size of the text used for the labels
#' @param labels_text_placement adjustment factor the placement of the variable names (`>=1` means further away from the arrow head)
#' @param groups an optional vector of groups for the labels, with the same length as `labels`. If set, the points and labels will be coloured according to these groups. When using the [pca()] function as input for `x`, this will be determined automatically based on the attribute `non_numeric_cols`, see [pca()].
#' @param ellipse a logical to indicate whether a normal data ellipse should be drawn for each group (set with `groups`)
#' @param ellipse_prob statistical size of the ellipse in normal probability
#' @param ellipse_size the size of the ellipse line
#' @param ellipse_alpha the alpha (transparency) of the ellipse line
#' @param points_alpha the alpha (transparency) of the points
#' @param arrows a logical to indicate whether arrows should be drawn
#' @param arrows_textsize the size of the text for variable names
#' @param arrows_colour the colour of the arrow and their text
#' @param arrows_size the size (thickness) of the arrow lines
#' @param arrows_textsize the size of the text at the end of the arrows
#' @param arrows_alpha the alpha (transparency) of the arrows and their text
#' @param base_textsize the text size for all plot elements except the labels and arrows
#' @param ... Parameters passed on to functions
#' @source The [ggplot_pca()] function is based on the [ggbiplot()] function from the `ggbiplot` package by Vince Vu, as found on GitHub: <https://github.com/vqv/ggbiplot> (retrieved: 2 March 2020, their latest commit: [`7325e88`](https://github.com/vqv/ggbiplot/commit/7325e880485bea4c07465a0304c470608fffb5d9); 12 February 2015).
#'
#' As per their GPL-2 licence that demands documentation of code changes, the changes made based on the source code were:
#' 1. Rewritten code to remove the dependency on packages `plyr`, `scales` and `grid`
#' 2. Parametrised more options, like arrow and ellipse settings
#' 3. Added total amount of explained variance as a caption in the plot
#' 4. Cleaned all syntax based on the `lintr` package
#' 5. Updated documentation
#' @details The default colours for labels and points is set with [scale_colour_viridis_d()], but these can be changed by adding another scale for colour, like [scale_colour_brewer()].
#' @rdname ggplot_pca
#' @export
#' @examples
#' # `example_isolates` is a dataset available in the AMR package.
#' # See ?example_isolates.
#'
#' # See ?pca for more info about Principal Component Analysis (PCA).
#' library(dplyr)
#' pca_model <- example_isolates %>%
#' filter(mo_genus(mo) == "Staphylococcus") %>%
#' group_by(species = mo_shortname(mo)) %>%
#' summarise_if (is.rsi, resistance) %>%
#' pca(FLC, AMC, CXM, GEN, TOB, TMP, SXT, CIP, TEC, TCY, ERY)
#'
#' # old
#' biplot(pca_model)
#'
#' # new
#' ggplot_pca(pca_model)
ggplot_pca <- function(x,
choices = 1:2,
scale = TRUE,
labels = NULL,
labels_textsize = 3,
labels_text_placement = 1.5,
groups = NULL,
ellipse = FALSE,
ellipse_prob = 0.68,
ellipse_size = 0.5,
ellipse_alpha = 0.25,
points_size = 2,
points_alpha = 0.25,
arrows = TRUE,
arrows_colour = "darkblue",
arrows_size = 0.5,
arrows_textsize = 3,
arrows_alpha = 0.75,
base_textsize = 10,
...) {
stopifnot_installed_package("ggplot2")
calculations <- pca_calculations(pca_model = x,
groups = groups,
groups_missing = missing(groups),
labels = labels,
labels_missing = missing(labels),
choices = choices,
scale = scale,
ellipse_prob = ellipse_prob,
labels_text_placement = labels_text_placement)
nobs.factor <- calculations$nobs.factor
d <- calculations$d
u <- calculations$u
v <- calculations$v
choices <- calculations$choices
df.u <- calculations$df.u
df.v <- calculations$df.v
r <- calculations$r
ell <- calculations$ell
groups <- calculations$groups
group_name <- calculations$group_name
labels <- calculations$labels
stopifnot(length(choices) == 2)
# Append the proportion of explained variance to the axis labels
if ((1 - as.integer(scale)) == 0) {
u.axis.labs <- paste("Standardised PC", choices, sep = "")
} else {
u.axis.labs <- paste("PC", choices, sep = "")
}
u.axis.labs <- paste(u.axis.labs,
paste0("\n(explained var: ",
percentage(x$sdev[choices] ^ 2 / sum(x$sdev ^ 2)), ")"))
# Score Labels
if (!is.null(labels)) {
df.u$labels <- labels
}
# Grouping variable
if (!is.null(groups)) {
df.u$groups <- groups
}
# Base plot
g <- ggplot2::ggplot(data = df.u,
ggplot2::aes(x = xvar, y = yvar)) +
ggplot2::xlab(u.axis.labs[1]) +
ggplot2::ylab(u.axis.labs[2]) +
ggplot2::expand_limits(x = c(-1.15, 1.15),
y = c(-1.15, 1.15))
# Draw either labels or points
if (!is.null(df.u$labels)) {
if (!is.null(df.u$groups)) {
g <- g +
ggplot2::geom_point(ggplot2::aes(colour = groups),
alpha = points_alpha,
size = points_size) +
ggplot2::geom_text(ggplot2::aes(label = labels, colour = groups),
nudge_y = -0.05,
size = labels_textsize) +
ggplot2::scale_colour_viridis_d() +
ggplot2::labs(colour = group_name)
} else {
g <- g +
ggplot2::geom_point(alpha = points_alpha,
size = points_size) +
ggplot2::geom_text(ggplot2::aes(label = labels),
nudge_y = -0.05,
size = labels_textsize)
}
} else {
if (!is.null(df.u$groups)) {
g <- g +
ggplot2::geom_point(ggplot2::aes(colour = groups),
alpha = points_alpha,
size = points_size) +
ggplot2::scale_colour_viridis_d() +
ggplot2::labs(colour = group_name)
} else {
g <- g + ggplot2::geom_point(alpha = points_alpha,
size = points_size)
}
}
# Overlay a concentration ellipse if there are groups
if (!is.null(df.u$groups) & isTRUE(ellipse)) {
g <- g +
ggplot2::geom_path(data = ell,
ggplot2::aes(colour = groups, group = groups),
size = ellipse_size,
alpha = points_alpha)
}
# Label the variable axes
if (arrows == TRUE) {
g <- g +
ggplot2::geom_segment(data = df.v,
ggplot2::aes(x = 0, y = 0, xend = xvar, yend = yvar),
arrow = ggplot2::arrow(length = ggplot2::unit(0.5, "picas"),
angle = 20,
ends = "last",
type = "open"),
colour = arrows_colour,
size = arrows_size,
alpha = arrows_alpha) +
ggplot2::geom_text(data = df.v,
ggplot2::aes(label = varname, x = xvar, y = yvar, angle = angle, hjust = hjust),
colour = arrows_colour,
size = arrows_textsize,
alpha = arrows_alpha)
}
# Add caption label about total explained variance
g <- g + ggplot2::labs(caption = paste0("Total explained variance: ",
percentage(sum(x$sdev[choices] ^ 2 / sum(x$sdev ^ 2)))))
# mark-up nicely
g <- g + ggplot2::theme_minimal(base_size = base_textsize) +
ggplot2::theme(panel.grid.major = ggplot2::element_line(colour = "grey85"),
panel.grid.minor = ggplot2::element_blank(),
# centre title and subtitle
plot.title = ggplot2::element_text(hjust = 0.5),
plot.subtitle = ggplot2::element_text(hjust = 0.5))
g
}
#' @importFrom dplyr bind_rows
pca_calculations <- function(pca_model,
groups = NULL,
groups_missing = TRUE,
labels = NULL,
labels_missing = TRUE,
choices = 1:2,
scale = 1,
ellipse_prob = 0.68,
labels_text_placement = 1.5) {
non_numeric_cols <- attributes(pca_model)$non_numeric_cols
if (groups_missing) {
groups <- tryCatch(non_numeric_cols[[1]],
error = function(e) NULL)
group_name <- tryCatch(colnames(non_numeric_cols[1]),
error = function(e) NULL)
}
if (labels_missing) {
labels <- tryCatch(non_numeric_cols[[2]],
error = function(e) NULL)
}
if (!is.null(groups) & is.null(labels)) {
# turn them around
labels <- groups
groups <- NULL
group_name <- NULL
}
# Recover the SVD
if (inherits(pca_model, "prcomp")) {
nobs.factor <- sqrt(nrow(pca_model$x) - 1)
d <- pca_model$sdev
u <- sweep(pca_model$x, 2, 1 / (d * nobs.factor), FUN = "*")
v <- pca_model$rotation
} else if (inherits(pca_model, "princomp")) {
nobs.factor <- sqrt(pca_model$n.obs)
d <- pca_model$sdev
u <- sweep(pca_model$scores, 2, 1 / (d * nobs.factor), FUN = "*")
v <- pca_model$loadings
} else if (inherits(pca_model, "PCA")) {
nobs.factor <- sqrt(nrow(pca_model$call$X))
d <- unlist(sqrt(pca_model$eig)[1])
u <- sweep(pca_model$ind$coord, 2, 1 / (d * nobs.factor), FUN = "*")
v <- sweep(pca_model$var$coord, 2, sqrt(pca_model$eig[seq_len(ncol(pca_model$var$coord)), 1]), FUN = "/")
} else if (inherits(pca_model, "lda")) {
nobs.factor <- sqrt(pca_model$N)
d <- pca_model$svd
u <- predict(pca_model)$x / nobs.factor
v <- pca_model$scaling
d.total <- sum(d ^ 2)
} else {
stop("Expected a object of class prcomp, princomp, PCA, or lda")
}
# Scores
choices <- pmin(choices, ncol(u))
obs.scale <- 1 - as.integer(scale)
df.u <- as.data.frame(sweep(u[, choices], 2, d[choices] ^ obs.scale, FUN = "*"))
# Directions
v <- sweep(v, 2, d ^ as.integer(scale), FUN = "*")
df.v <- as.data.frame(v[, choices])
names(df.u) <- c("xvar", "yvar")
names(df.v) <- names(df.u)
df.u <- df.u * nobs.factor
# Scale the radius of the correlation circle so that it corresponds to
# a data ellipse for the standardized PC scores
circle_prob <- 0.69
r <- sqrt(qchisq(circle_prob, df = 2)) * prod(colMeans(df.u ^ 2)) ^ (0.25)
# Scale directions
v.scale <- rowSums(v ^ 2)
df.v <- r * df.v / sqrt(max(v.scale))
# Grouping variable
if (!is.null(groups)) {
df.u$groups <- groups
}
df.v$varname <- rownames(v)
# Variables for text label placement
df.v$angle <- with(df.v, (180 / pi) * atan(yvar / xvar))
df.v$hjust <- with(df.v, (1 - labels_text_placement * sign(xvar)) / 2)
if (!is.null(df.u$groups)) {
theta <<- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
circle <<- cbind(cos(theta), sin(theta))
ell <- bind_rows(
sapply(unique(df.u$groups), function(g, df = df.u) {
x <- df[which(df$groups == g), , drop = FALSE]
if (nrow(x) <= 2) {
return(NULL)
}
sigma <- var(cbind(x$xvar, x$yvar))
mu <- c(mean(x$xvar), mean(x$yvar))
ed <- sqrt(qchisq(ellipse_prob, df = 2))
data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = "+"),
groups = x$groups[1])
}))
names(ell)[1:2] <- c("xvar", "yvar")
} else {
ell <- NULL
}
list(nobs.factor = nobs.factor,
d = d,
u = u,
v = v,
choices = choices,
df.u = df.u,
df.v = df.v,
r = r,
ell = ell,
groups = groups,
group_name = group_name,
labels = labels
)
}

@ -186,12 +186,12 @@ ggplot_rsi <- function(data,
x.title = "Antimicrobial",
y.title = "Proportion",
...) {
stopifnot_installed_package("ggplot2")
x <- x[1]
facet <- facet[1]
# we work with aes_string later on
x_deparse <- deparse(substitute(x))
if (x_deparse != "x") {
@ -210,16 +210,16 @@ ggplot_rsi <- function(data,
if (facet %in% c("NULL", "")) {
facet <- NULL
}
if (is.null(position)) {
position <- "fill"
}
p <- ggplot2::ggplot(data = data) +
geom_rsi(position = position, x = x, fill = fill, translate_ab = translate_ab,
combine_SI = combine_SI, combine_IR = combine_IR, ...) +
theme_rsi()
if (fill == "interpretation") {
# set RSI colours
if (isFALSE(colours) & missing(datalabels.colour)) {
@ -228,12 +228,12 @@ ggplot_rsi <- function(data,
}
p <- p + scale_rsi_colours(colours = colours)
}
if (identical(position, "fill")) {
# proportions, so use y scale with percentage
p <- p + scale_y_percent(breaks = breaks, limits = limits)
}
if (datalabels == TRUE) {
p <- p + labels_rsi_count(position = position,
x = x,
@ -243,17 +243,17 @@ ggplot_rsi <- function(data,
datalabels.size = datalabels.size,
datalabels.colour = datalabels.colour)
}
if (!is.null(facet)) {
p <- p + facet_rsi(facet = facet, nrow = nrow)
}
p <- p + ggplot2::labs(title = title,
subtitle = subtitle,
caption = caption,
x = x.title,
y = y.title)
p
}
@ -267,24 +267,24 @@ geom_rsi <- function(position = NULL,
combine_SI = TRUE,
combine_IR = FALSE,
...) {
stopifnot_installed_package("ggplot2")
if (is.data.frame(position)) {
stop("`position` is invalid. Did you accidentally use '%>%' instead of '+'?", call. = FALSE)
}
y <- "value"
if (missing(position) | is.null(position)) {
position <- "fill"
}
if (identical(position, "fill")) {
position <- ggplot2::position_fill(vjust = 0.5, reverse = TRUE)
}
x <- x[1]
# we work with aes_string later on
x_deparse <- deparse(substitute(x))
if (x_deparse != "x") {
@ -293,33 +293,33 @@ geom_rsi <- function(position = NULL,
if (x %like% '".*"') {
x <- substr(x, 2, nchar(x) - 1)
}
if (tolower(x) %in% tolower(c("ab", "abx", "antibiotics"))) {
x <- "antibiotic"
} else if (tolower(x) %in% tolower(c("SIR", "RSI", "interpretations", "result"))) {
x <- "interpretation"
}
ggplot2::layer(geom = "bar", stat = "identity", position = position,
mapping = ggplot2::aes_string(x = x, y = y, fill = fill),
params = list(...), data = function(x) {
rsi_df(data = x,
translate_ab = translate_ab,
language = language,
combine_SI = combine_SI,
combine_IR = combine_IR)
translate_ab = translate_ab,
language = language,
combine_SI = combine_SI,
combine_IR = combine_IR)
})
}
#' @rdname ggplot_rsi
#' @export
facet_rsi <- function(facet = c("interpretation", "antibiotic"), nrow = NULL) {
stopifnot_installed_package("ggplot2")
facet <- facet[1]
# we work with aes_string later on
facet_deparse <- deparse(substitute(facet))
if (facet_deparse != "facet") {
@ -328,13 +328,13 @@ facet_rsi <- function(facet = c("interpretation", "antibiotic"), nrow = NULL) {
if (facet %like% '".*"') {
facet <- substr(facet, 2, nchar(facet) - 1)
}
if (tolower(facet) %in% tolower(c("SIR", "RSI", "interpretations", "result"))) {
facet <- "interpretation"
} else if (tolower(facet) %in% tolower(c("ab", "abx", "antibiotics"))) {
facet <- "antibiotic"
}
ggplot2::facet_wrap(facets = facet, scales = "free_x", nrow = nrow)
}
@ -343,7 +343,7 @@ facet_rsi <- function(facet = c("interpretation", "antibiotic"), nrow = NULL) {
#' @export
scale_y_percent <- function(breaks = seq(0, 1, 0.1), limits = NULL) {
stopifnot_installed_package("ggplot2")
if (all(breaks[breaks != 0] > 1)) {
breaks <- breaks / 100
}
@ -362,7 +362,7 @@ scale_rsi_colours <- function(colours = c(S = "#61a8ff",
stopifnot_installed_package("ggplot2")
# previous colour: palette = "RdYlGn"
# previous colours: values = c("#b22222", "#ae9c20", "#7cfc00")
if (!identical(colours, FALSE)) {
original_cols <- c(S = "#61a8ff",
SI = "#61a8ff",

@ -32,7 +32,7 @@
#' This page contains a section for every lifecycle (with text borrowed from the aforementioned `tidyverse` website), so they can be used in the manual pages of our functions.
#' @section Experimental lifecycle:
#' \if{html}{\figure{lifecycle_experimental.svg}{options: style=margin-bottom:5px} \cr}
#' The [lifecycle][AMR::lifecycle] of this function is **experimental**. An experimental function is in the very early stages of development. The unlying code might be changing frequently as we rapidly iterate and explore variations in search of the best fit. Experimental functions might be removed without deprecation, so you are generally best off waiting until a function is more mature before you use it in production code. Experimental functions will not be included in releases we submit to CRAN.
#' The [lifecycle][AMR::lifecycle] of this function is **experimental**. An experimental function is in the very early stages of development. The unlying code might be changing frequently as we rapidly iterate and explore variations in search of the best fit. Experimental functions might be removed without deprecation, so you are generally best off waiting until a function is more mature before you use it in production code. Experimental functions will not be included in releases we submit to CRAN, since they have not yet matured enough.
#' @section Maturing lifecycle:
#' \if{html}{\figure{lifecycle_maturing.svg}{options: style=margin-bottom:5px} \cr}
#' The [lifecycle][AMR::lifecycle] of this function is **maturing**. The unlying code of a maturing function has been roughed out, but finer details might still change. We will strive to maintain backward compatibility, but the function needs wider usage and more extensive testing in order to optimise the unlying code.

@ -0,0 +1,128 @@
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# SOURCE #
# https://gitlab.com/msberends/AMR #
# #
# LICENCE #
# (c) 2018-2020 Berends MS, Luz CF et al. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
# Visit our website for more info: https://msberends.gitlab.io/AMR. #
# ==================================================================== #
#' Principal Component Analysis (for AMR)
#'
#' Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels.
#' @inheritSection lifecycle Experimental lifecycle
#' @param x a [data.frame] containing numeric columns
#' @param ... columns of `x` to be selected for PCA
#' @inheritParams stats::prcomp
#' @details The [pca()] function takes a [data.frame] as input and performs the actual PCA with the R function [prcomp()].
#'
#' The result of the [pca()] function is a [`prcomp`] object, with an additional attribute `non_numeric_cols` which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by [ggplot_pca()].
#' @rdname pca
#' @exportMethod prcomp.data.frame
#' @export
#' @examples
#' # `example_isolates` is a dataset available in the AMR package.
#' # See ?example_isolates.
#'
#' # calculate the resistance per group first
#' library(dplyr)
#' resistance_data <- example_isolates %>%
#' group_by(order = mo_order(mo), # group on anything, like order
#' genus = mo_genus(mo)) %>% # and genus as we do here
#' summarise_if(is.rsi, resistance) # then get resistance of all drugs
#'
#' # now conduct PCA for certain antimicrobial agents
#' pca_result <- resistance_data %>%
#' pca(AMC, CXM, CTX, CAZ, GEN, TOB, TMP, SXT)
#'
#' pca_result
#' summary(pca_result)
#' biplot(pca_result)
#' ggplot_pca(pca_result) # a new and convenient plot function
prcomp.data.frame <- function(x,
...,
retx = TRUE,
center = TRUE,
scale. = TRUE,
tol = NULL,
rank. = NULL) {
x <- pca_transform_x(x = x, ... = ...)
pca_data <- x[, which(sapply(x, function(x) is.numeric(x)))]
message(blue(paste0("NOTE: Columns selected for PCA: ", paste0(bold(colnames(pca_data)), collapse = "/"),
".\n Total observations available: ", nrow(pca_data), ".")))
stats:::prcomp.default(pca_data, retx = retx, center = center, scale. = scale., tol = tol, rank. = rank.)
}
#' @rdname pca
#' @export
pca <- function(x, ...) {
if (!is.data.frame(x)) {
stop("this function only takes a data.frame as input")
}
pca_model <- prcomp(x, ...)
x <- pca_transform_x(x = x, ... = ...)
attr(pca_model, "non_numeric_cols") <- x[, sapply(x, function(y) !is.numeric(y) & !all(is.na(y))), drop = FALSE]
pca_model
}
#' @importFrom dplyr ungroup %>% filter_all all_vars
#' @importFrom rlang enquos eval_tidy
pca_transform_x <- function(x, ...) {
# unset data.table, tbl_df, etc.
# also removes groups made by dplyr::group_by
x <- as.data.frame(x, stringsAsFactors = FALSE)
x.bak <- x
user_exprs <- enquos(...)
if (length(user_exprs) > 0) {
new_list <- list(0)
for (i in seq_len(length(user_exprs))) {
new_list[[i]] <- tryCatch(eval_tidy(user_exprs[[i]], data = x),
error = function(e) stop(e$message, call. = FALSE))
if (length(new_list[[i]]) == 1) {
if (i == 1) {
# only for first item:
if (is.character(new_list[[i]]) & new_list[[i]] %in% colnames(x)) {
# this is to support: df %>% pca("mycol")
new_list[[i]] <- x[, new_list[[i]]]
}
} else {
# remove item - it's a parameter like `center`
new_list[[i]] <- NULL
}
}
}
x <- as.data.frame(new_list, stringsAsFactors = FALSE)
if (any(sapply(x, function(y) !is.numeric(y)))) {
warning("Be sure to first calculate the resistance (or susceptibility) of variables with antimicrobial test results, since PCA works with numeric variables only. Please see Examples in ?pca.")
}
# set column names
tryCatch(colnames(x) <- sapply(user_exprs, function(y) as_label(y)),
error = function(e) warning("column names could not be set"))
# keep only numeric columns
x <- x[, sapply(x, function(y) is.numeric(y))]
# bind the data set with the non-numeric columns
x <- cbind(x.bak[, sapply(x.bak, function(y) !is.numeric(y) & !all(is.na(y))), drop = FALSE], x)
}
x %>%
ungroup() %>% # would otherwise select the grouping vars
filter_all(all_vars(!is.na(.)))
}

@ -44,6 +44,9 @@ navbar:
- text: "Predict antimicrobial resistance"
icon: "fa-dice"
href: "articles/resistance_predict.html"
- text: "Conduct principal component analysis for AMR"
icon: "fa-compress"
href: "articles/PCA.html"
- text: "Determine multi-drug resistance (MDR)"
icon: "fa-skull-crossbones"
href: "articles/MDR.html"
@ -94,7 +97,6 @@ reference:
- "`guess_ab_col`"
- "`mo_source`"
- "`read.4D`"
- "`rsi_translation`"
- title: "Enhancing your data"
desc: >
Functions to add new data to your existing data, such as the determination
@ -117,28 +119,31 @@ reference:
Functions for conducting AMR analysis, like counting isolates, calculating
resistance or susceptibility, or make plots.
contents:
- "`proportion`"
- "`count`"
- "`availability`"
- "`bug_drug_combinations`"
- "`count`"
- "`resistance_predict`"
- "`pca`"
- "`filter_ab_class`"
- "`g.test`"
- "`ggplot_rsi`"
- "`ggplot_pca`"
- "`kurtosis`"
- "`portion`"
- "`resistance_predict`"
- "`skewness`"
- title: "Included data sets"
desc: >
Scientifically reliable references for microorganisms and
antibiotics, and example data sets to use for practise.
contents:
- "`microorganisms`"
- "`antibiotics`"
- "`antivirals`"
- "`example_isolates`"
- "`example_isolates_unclean`"
- "`rsi_translation`"
- "`microorganisms.codes`"
- "`microorganisms.old`"
- "`microorganisms`"
- "`WHONET`"
- title: "Background information"
desc: >

@ -154,7 +154,11 @@ data %>%
origin = 'iso2c',
destination = 'country.name')) %>%
summarise(first = min(timestamp_server)) %>%
arrange(desc(first))
arrange(desc(first)) %>%
mutate(frame = case_when(first <= as.POSIXct("2019-06-30") ~ "Q1-Q2 2019",
first <= as.POSIXct("2019-12-31") ~ "Q3-Q4 2019",
TRUE ~ "Q1-Q2 2020")) %>%
View()
#
# p1 <- data %>%
# group_by(country) %>%

@ -78,7 +78,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="https://msberends.gitlab.io/AMR/index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
</span>
</div>
@ -114,6 +114,13 @@
Predict antimicrobial resistance
</a>
</li>
<li>
<a href="articles/PCA.html">
<span class="fa fa-compress"></span>
Conduct principal component analysis for AMR
</a>
</li>
<li>
<a href="articles/MDR.html">
<span class="fa fa-skull-crossbones"></span>

@ -78,7 +78,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
</span>
</div>
@ -114,6 +114,13 @@
Predict antimicrobial resistance
</a>
</li>
<li>
<a href="articles/PCA.html">
<span class="fa fa-compress"></span>
Conduct principal component analysis for AMR
</a>
</li>
<li>
<a href="articles/MDR.html">
<span class="fa fa-skull-crossbones"></span>

@ -39,7 +39,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
</span>
</div>
@ -75,6 +75,13 @@
Predict antimicrobial resistance
</a>
</li>
<li>
<a href="../articles/PCA.html">
<span class="fa fa-compress-alt"></span>
Conduct Principal Component Analysis for AMR
</a>
</li>
<li>
<a href="../articles/MDR.html">
<span class="fa fa-skull-crossbones"></span>
@ -179,7 +186,7 @@
<h1>How to apply EUCAST rules</h1>
<h4 class="author">Matthijs S. Berends</h4>
<h4 class="date">23 February 2020</h4>
<h4 class="date">07 March 2020</h4>
<div class="hidden name"><code>EUCAST.Rmd</code></div>
@ -293,7 +300,7 @@
</tr>
</tbody>
</table>
<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1"></a><span class="kw"><a href="../reference/eucast_rules.html">eucast_rules</a></span>(data, <span class="dt">info =</span> <span class="ot">FALSE</span>)</span></code></pre></div>
<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1"></a><span class="kw"><a href="../reference/eucast_rules.html">eucast_rules</a></span>(data)</span></code></pre></div>
<table class="table">
<thead><tr class="header">
<th align="left">mo</th>

@ -0,0 +1,345 @@
<!DOCTYPE html>
<!-- Generated by pkgdown: do not edit by hand --><html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>How to conduct principal component analysis (PCA) for AMR • AMR (for R)</title>
<!-- favicons --><link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png">
<link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png">
<link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png">
<link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png">
<link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png">
<link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png">
<!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js" integrity="sha256-FgpCb/KJQlLNfOu91ta32o/NMZxltwRo8QtmkMRdAu8=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.3.7/flatly/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous">
<script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha256-U5ZEeKfGNOja007MMD3YBI0A3OSZOQbeG6z2f2Y0hu8=" crossorigin="anonymous"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.7.1/css/all.min.css" integrity="sha256-nAmazAk6vS34Xqo0BSrTb+abbtFlgsFK7NKSi6o7Y78=" crossorigin="anonymous">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.7.1/css/v4-shims.min.css" integrity="sha256-6qHlizsOWFskGlwVOKuns+D1nB6ssZrHQrNj1wGplHc=" crossorigin="anonymous">
<!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.4/clipboard.min.js" integrity="sha256-FiZwavyI2V6+EXO1U+xzLG3IKldpiTFf3153ea9zikQ=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.9.4/headroom.min.js" integrity="sha256-DJFC1kqIhelURkuza0AvYal5RxMtpzLjFhsnVIeuk+U=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.9.4/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet">
<script src="../pkgdown.js"></script><link href="../extra.css" rel="stylesheet">
<script src="../extra.js"></script><meta property="og:title" content="How to conduct principal component analysis (PCA) for AMR">
<meta property="og:description" content="">
<meta property="og:image" content="https://msberends.gitlab.io/AMR/logo.png">
<meta name="twitter:card" content="summary">
<!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
</head>
<body>
<div class="container template-article">
<header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
</span>
</div>
<div id="navbar" class="navbar-collapse collapse">
<ul class="nav navbar-nav">
<li>
<a href="../index.html">
<span class="fa fa-home"></span>
Home
</a>
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
<span class="fa fa-question-circle"></span>
How to
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="../articles/AMR.html">
<span class="fa fa-directions"></span>
Conduct AMR analysis
</a>
</li>
<li>
<a href="../articles/resistance_predict.html">
<span class="fa fa-dice"></span>
Predict antimicrobial resistance
</a>
</li>
<li>
<a href="../articles/PCA.html">
<span class="fa fa-compress"></span>
Conduct principal component analysis for AMR
</a>
</li>
<li>
<a href="../articles/MDR.html">
<span class="fa fa-skull-crossbones"></span>
Determine multi-drug resistance (MDR)
</a>
</li>
<li>
<a href="../articles/WHONET.html">
<span class="fa fa-globe-americas"></span>
Work with WHONET data
</a>
</li>
<li>
<a href="../articles/SPSS.html">
<span class="fa fa-file-upload"></span>
Import data from SPSS/SAS/Stata
</a>
</li>
<li>
<a href="../articles/EUCAST.html">
<span class="fa fa-exchange-alt"></span>
Apply EUCAST rules
</a>
</li>
<li>
<a href="../reference/mo_property.html">
<span class="fa fa-bug"></span>
Get properties of a microorganism
</a>
</li>
<li>
<a href="../reference/ab_property.html">
<span class="fa fa-capsules"></span>
Get properties of an antibiotic
</a>
</li>
<li>
<a href="../articles/benchmarks.html">
<span class="fa fa-shipping-fast"></span>
Other: benchmarks
</a>
</li>
</ul>
</li>
<li>
<a href="../reference/">
<span class="fa fa-book-open"></span>
Manual
</a>
</li>
<li>
<a href="../authors.html">
<span class="fa fa-users"></span>
Authors
</a>
</li>
<li>
<a href="../news/">
<span class="far fa far fa-newspaper"></span>
Changelog
</a>
</li>
</ul>
<ul class="nav navbar-nav navbar-right">
<li>
<a href="https://gitlab.com/msberends/AMR">
<span class="fab fa fab fa-gitlab"></span>
Source Code
</a>
</li>
<li>
<a href="../LICENSE-text.html">
<span class="fa fa-book"></span>
Licence
</a>
</li>
</ul>
</div>
<!--/.nav-collapse -->
</div>
<!--/.container -->
</div>
<!--/.navbar -->
</header><div class="row">
<div class="col-md-9 contents">
<div class="page-header toc-ignore">
<h1>How to conduct principal component analysis (PCA) for AMR</h1>
<h4 class="author">Matthijs S. Berends</h4>
<h4 class="date">07 March 2020</h4>
<div class="hidden name"><code>PCA.Rmd</code></div>
</div>
<p><strong>NOTE: This page will be updated soon, as the pca() function is currently being developed.</strong></p>
<div id="introduction" class="section level1">
<h1 class="hasAnchor">
<a href="#introduction" class="anchor"></a>Introduction</h1>
</div>
<div id="transforming" class="section level1">
<h1 class="hasAnchor">
<a href="#transforming" class="anchor"></a>Transforming</h1>
<p>For PCA, we need to transform our AMR data first. This is what the <code>example_isolates</code> data set in this package looks like:</p>
<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1"></a><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span>(AMR)</span>
<span id="cb1-2"><a href="#cb1-2"></a><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span>(dplyr)</span>
<span id="cb1-3"><a href="#cb1-3"></a><span class="kw"><a href="https://dplyr.tidyverse.org/reference/reexports.html">glimpse</a></span>(example_isolates)</span>
<span id="cb1-4"><a href="#cb1-4"></a><span class="co"># Observations: 2,000</span></span>
<span id="cb1-5"><a href="#cb1-5"></a><span class="co"># Variables: 49</span></span>
<span id="cb1-6"><a href="#cb1-6"></a><span class="co"># $ date &lt;date&gt; 2002-01-02, 2002-01-03, 2002-01-07, 2002-01-07, 2002…</span></span>
<span id="cb1-7"><a href="#cb1-7"></a><span class="co"># $ hospital_id &lt;fct&gt; D, D, B, B, B, B, D, D, B, B, D, D, D, D, D, B, B, B,…</span></span>
<span id="cb1-8"><a href="#cb1-8"></a><span class="co"># $ ward_icu &lt;lgl&gt; FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, T…</span></span>
<span id="cb1-9"><a href="#cb1-9"></a><span class="co"># $ ward_clinical &lt;lgl&gt; TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, F…</span></span>
<span id="cb1-10"><a href="#cb1-10"></a><span class="co"># $ ward_outpatient &lt;lgl&gt; FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…</span></span>
<span id="cb1-11"><a href="#cb1-11"></a><span class="co"># $ age &lt;dbl&gt; 65, 65, 45, 45, 45, 45, 78, 78, 45, 79, 67, 67, 71, 7…</span></span>
<span id="cb1-12"><a href="#cb1-12"></a><span class="co"># $ gender &lt;chr&gt; "F", "F", "F", "F", "F", "F", "M", "M", "F", "F", "M"…</span></span>
<span id="cb1-13"><a href="#cb1-13"></a><span class="co"># $ patient_id &lt;chr&gt; "A77334", "A77334", "067927", "067927", "067927", "06…</span></span>
<span id="cb1-14"><a href="#cb1-14"></a><span class="co"># $ mo &lt;mo&gt; B_ESCHR_COLI, B_ESCHR_COLI, B_STPHY_EPDR, B_STPHY_EPDR…</span></span>
<span id="cb1-15"><a href="#cb1-15"></a><span class="co"># $ PEN &lt;rsi&gt; R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R,…</span></span>
<span id="cb1-16"><a href="#cb1-16"></a><span class="co"># $ OXA &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-17"><a href="#cb1-17"></a><span class="co"># $ FLC &lt;rsi&gt; NA, NA, R, R, R, R, S, S, R, S, S, S, NA, NA, NA, NA,…</span></span>
<span id="cb1-18"><a href="#cb1-18"></a><span class="co"># $ AMX &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-19"><a href="#cb1-19"></a><span class="co"># $ AMC &lt;rsi&gt; I, I, NA, NA, NA, NA, S, S, NA, NA, S, S, I, I, R, I,…</span></span>
<span id="cb1-20"><a href="#cb1-20"></a><span class="co"># $ AMP &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-21"><a href="#cb1-21"></a><span class="co"># $ TZP &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-22"><a href="#cb1-22"></a><span class="co"># $ CZO &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-23"><a href="#cb1-23"></a><span class="co"># $ FEP &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-24"><a href="#cb1-24"></a><span class="co"># $ CXM &lt;rsi&gt; I, I, R, R, R, R, S, S, R, S, S, S, S, S, NA, S, S, R…</span></span>
<span id="cb1-25"><a href="#cb1-25"></a><span class="co"># $ FOX &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-26"><a href="#cb1-26"></a><span class="co"># $ CTX &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, S, S,…</span></span>
<span id="cb1-27"><a href="#cb1-27"></a><span class="co"># $ CAZ &lt;rsi&gt; NA, NA, R, R, R, R, R, R, R, R, R, R, NA, NA, NA, S, …</span></span>
<span id="cb1-28"><a href="#cb1-28"></a><span class="co"># $ CRO &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, S, S,…</span></span>
<span id="cb1-29"><a href="#cb1-29"></a><span class="co"># $ GEN &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-30"><a href="#cb1-30"></a><span class="co"># $ TOB &lt;rsi&gt; NA, NA, NA, NA, NA, NA, S, S, NA, NA, NA, NA, S, S, N…</span></span>
<span id="cb1-31"><a href="#cb1-31"></a><span class="co"># $ AMK &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-32"><a href="#cb1-32"></a><span class="co"># $ KAN &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-33"><a href="#cb1-33"></a><span class="co"># $ TMP &lt;rsi&gt; R, R, S, S, R, R, R, R, S, S, NA, NA, S, S, S, S, S, …</span></span>
<span id="cb1-34"><a href="#cb1-34"></a><span class="co"># $ SXT &lt;rsi&gt; R, R, S, S, NA, NA, NA, NA, S, S, NA, NA, S, S, S, S,…</span></span>
<span id="cb1-35"><a href="#cb1-35"></a><span class="co"># $ NIT &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-36"><a href="#cb1-36"></a><span class="co"># $ FOS &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-37"><a href="#cb1-37"></a><span class="co"># $ LNZ &lt;rsi&gt; R, R, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, R, R, R…</span></span>
<span id="cb1-38"><a href="#cb1-38"></a><span class="co"># $ CIP &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, S, S, NA, NA, NA, NA,…</span></span>
<span id="cb1-39"><a href="#cb1-39"></a><span class="co"># $ MFX &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-40"><a href="#cb1-40"></a><span class="co"># $ VAN &lt;rsi&gt; R, R, S, S, S, S, S, S, S, S, NA, NA, R, R, R, R, R, …</span></span>
<span id="cb1-41"><a href="#cb1-41"></a><span class="co"># $ TEC &lt;rsi&gt; R, R, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, R, R, R…</span></span>
<span id="cb1-42"><a href="#cb1-42"></a><span class="co"># $ TCY &lt;rsi&gt; R, R, S, S, S, S, S, S, S, I, S, S, NA, NA, I, R, R, …</span></span>
<span id="cb1-43"><a href="#cb1-43"></a><span class="co"># $ TGC &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-44"><a href="#cb1-44"></a><span class="co"># $ DOX &lt;rsi&gt; NA, NA, S, S, S, S, S, S, S, NA, S, S, NA, NA, NA, R,…</span></span>
<span id="cb1-45"><a href="#cb1-45"></a><span class="co"># $ ERY &lt;rsi&gt; R, R, R, R, R, R, S, S, R, S, S, S, R, R, R, R, R, R,…</span></span>
<span id="cb1-46"><a href="#cb1-46"></a><span class="co"># $ CLI &lt;rsi&gt; NA, NA, NA, NA, NA, R, NA, NA, NA, NA, NA, NA, NA, NA…</span></span>
<span id="cb1-47"><a href="#cb1-47"></a><span class="co"># $ AZM &lt;rsi&gt; R, R, R, R, R, R, S, S, R, S, S, S, R, R, R, R, R, R,…</span></span>
<span id="cb1-48"><a href="#cb1-48"></a><span class="co"># $ IPM &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, S, S,…</span></span>
<span id="cb1-49"><a href="#cb1-49"></a><span class="co"># $ MEM &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-50"><a href="#cb1-50"></a><span class="co"># $ MTR &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-51"><a href="#cb1-51"></a><span class="co"># $ CHL &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-52"><a href="#cb1-52"></a><span class="co"># $ COL &lt;rsi&gt; NA, NA, R, R, R, R, R, R, R, R, R, R, NA, NA, NA, R, …</span></span>
<span id="cb1-53"><a href="#cb1-53"></a><span class="co"># $ MUP &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-54"><a href="#cb1-54"></a><span class="co"># $ RIF &lt;rsi&gt; R, R, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, R, R, R…</span></span></code></pre></div>
<p>Now to transform this to a data set with only resistance percentages per taxonomic order and genus:</p>
<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1"></a>resistance_data &lt;-<span class="st"> </span>example_isolates <span class="op">%&gt;%</span><span class="st"> </span></span>
<span id="cb2-2"><a href="#cb2-2"></a><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/group_by.html">group_by</a></span>(<span class="dt">order =</span> <span class="kw"><a href="../reference/mo_property.html">mo_order</a></span>(mo), <span class="co"># group on anything, like order</span></span>
<span id="cb2-3"><a href="#cb2-3"></a> <span class="dt">genus =</span> <span class="kw"><a href="../reference/mo_property.html">mo_genus</a></span>(mo)) <span class="op">%&gt;%</span><span class="st"> </span><span class="co"># and genus as we do here</span></span>
<span id="cb2-4"><a href="#cb2-4"></a><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/summarise_all.html">summarise_if</a></span>(is.rsi, resistance) <span class="op">%&gt;%</span><span class="st"> </span><span class="co"># then get resistance of all drugs</span></span>
<span id="cb2-5"><a href="#cb2-5"></a><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/select.html">select</a></span>(order, genus, AMC, CXM, CTX, </span>
<span id="cb2-6"><a href="#cb2-6"></a> CAZ, GEN, TOB, TMP, SXT) <span class="co"># and select only relevant columns</span></span>
<