Browse Source

diff for freq, fix for mo_shortname

v1.8.2
parent
commit
92c9cc2608
  1. 3
      NAMESPACE
  2. 23
      NEWS.md
  3. 42
      R/freq.R
  4. 10
      R/mo_property.R
  5. 10
      README.md
  6. BIN
      data/microorganisms.certe.rda
  7. BIN
      data/microorganisms.rda
  8. BIN
      data/septic_patients.rda
  9. 5
      man/freq.Rd
  10. 2
      tests/testthat/test-first_isolate.R
  11. 6
      tests/testthat/test-freq.R
  12. 4
      vignettes/freq.Rmd

3
NAMESPACE

@ -11,6 +11,7 @@ S3method(as.vector,frequency_tbl) @@ -11,6 +11,7 @@ S3method(as.vector,frequency_tbl)
S3method(as_tibble,frequency_tbl)
S3method(barplot,mic)
S3method(barplot,rsi)
S3method(diff,frequency_tbl)
S3method(format,frequency_tbl)
S3method(hist,frequency_tbl)
S3method(kurtosis,data.frame)
@ -133,6 +134,7 @@ exportMethods(as.vector.frequency_tbl) @@ -133,6 +134,7 @@ exportMethods(as.vector.frequency_tbl)
exportMethods(as_tibble.frequency_tbl)
exportMethods(barplot.mic)
exportMethods(barplot.rsi)
exportMethods(diff.frequency_tbl)
exportMethods(format.frequency_tbl)
exportMethods(hist.frequency_tbl)
exportMethods(kurtosis)
@ -171,6 +173,7 @@ importFrom(dplyr,case_when) @@ -171,6 +173,7 @@ importFrom(dplyr,case_when)
importFrom(dplyr,desc)
importFrom(dplyr,everything)
importFrom(dplyr,filter)
importFrom(dplyr,full_join)
importFrom(dplyr,group_by)
importFrom(dplyr,group_by_at)
importFrom(dplyr,group_vars)

23
NEWS.md

@ -7,15 +7,15 @@ @@ -7,15 +7,15 @@
* Semantic names: `mo_fullname`, `mo_shortname`
* Microbial properties: `mo_type`, `mo_gramstain`.
They also come with support for German, Dutch, French, Italian, Spanish and Portuguese, and it defaults to the systems locale:
They also come with support for German, Dutch, French, Italian, Spanish and Portuguese:
```r
mo_gramstain("E. coli")
# [1] "Gram negative"
mo_gramstain("E. coli", language = "de") # "de" = Deutsch / German
mo_gramstain("E. coli", language = "de") # "de" = German
# [1] "Gramnegativ"
mo_gramstain("E. coli", language = "es") # "es" = Español / Spanish
mo_gramstain("E. coli", language = "es") # "es" = Spanish
# [1] "Gram negativo"
mo_fullname("S. group A") # when run on a on a Portuguese system
mo_fullname("S. group A", language = "pt") # Portuguese
# [1] "Streptococcus grupo A"
```
@ -31,11 +31,11 @@ @@ -31,11 +31,11 @@
* Functions `as.mo` and `is.mo` as replacements for `as.bactid` and `is.bactid` (since the `microoganisms` data set not only contains bacteria). These last two functions are deprecated and will be removed in a future release. The `as.mo` function determines microbial IDs using Artificial Intelligence (AI):
```r
as.mo("E. coli")
# [1] ESCCOL
# [1] B_ESCHR_COL
as.mo("MRSA")
# [1] STAAUR
# [1] B_STPHY_AUR
as.mo("S group A")
# [1] STCGRA
# [1] B_STRPTC_GRA
```
And with great speed too - on a quite regular Linux server from 2007 it takes us 0.009 seconds to transform 25,000 items:
```r
@ -54,7 +54,8 @@ @@ -54,7 +54,8 @@
* Function `ab_property` and its aliases: `ab_name`, `ab_tradenames`, `ab_certe`, `ab_umcg` and `ab_trivial_nl`
* Introduction to AMR as a vignette
* Removed clipbaord functions as it violated the CRAN policy
* Removed clipboard functions as it violated the CRAN policy
* Renamed `septic_patients$sex` to `septic_patients$gender`
#### Changed
* Added three antimicrobial agents to the `antibiotics` data set: Terbinafine (D01BA02), Rifaximin (A07AA11) and Isoconazole (D01AC05)
@ -92,14 +93,14 @@ @@ -92,14 +93,14 @@
* Added longest en shortest character length in the frequency table (`freq`) header of class `character`
* Support for types (classes) list and matrix for `freq`
```r
my_matrix = with(septic_patients, matrix(c(age, sex), ncol = 2))
my_matrix = with(septic_patients, matrix(c(age, gender), ncol = 2))
freq(my_matrix)
```
For lists, subsetting is possible:
```r
my_list = list(age = septic_patients$age, sex = septic_patients$sex)
my_list = list(age = septic_patients$age, gender = septic_patients$gender)
my_list %>% freq(age)
my_list %>% freq(sex)
my_list %>% freq(gender)
```
#### Other

42
R/freq.R

@ -130,7 +130,7 @@ @@ -130,7 +130,7 @@
#' sort(septic_patients$age)) # TRUE
#'
#' # it also supports `table` objects:
#' table(septic_patients$sex,
#' table(septic_patients$gender,
#' septic_patients$age) %>%
#' freq(sep = " **sep** ")
#'
@ -502,6 +502,46 @@ top_freq <- function(f, n) { @@ -502,6 +502,46 @@ top_freq <- function(f, n) {
vect
}
#' @rdname freq
#' @exportMethod diff.frequency_tbl
#' @importFrom dplyr %>% full_join mutate
#' @export
diff.frequency_tbl <- function(x, y, ...) {
# check classes
if (!"frequency_tbl" %in% class(x)
| !"frequency_tbl" %in% class(y)) {
stop("Both x and y must be a frequency table.")
}
x.attr <- attributes(x)$opt
# only keep item and count
x <- x[, 1:2]
y <- y[, 1:2]
x <- x %>%
full_join(y,
by = colnames(x)[1],
suffix = c(".x", ".y")) %>%
mutate(
diff = case_when(
is.na(count.y) ~ -count.x,
is.na(count.x) ~ count.y,
TRUE ~ count.y - count.x)) %>%
mutate(
diff.percent = percent(
diff / count.x,
force_zero = TRUE))
print(
knitr::kable(x,
format = x.attr$tbl_format,
col.names = c("Item", "Count #1", "Count #2", "Difference", "Diff. percent"),
align = "lrrrr",
padding = 1)
)
}
#' @rdname freq
#' @exportMethod print.frequency_tbl
#' @importFrom knitr kable

10
R/mo_property.R

@ -128,10 +128,12 @@ mo_shortname <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL) @@ -128,10 +128,12 @@ mo_shortname <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL)
result <- as.character(res1)
} else {
x <- AMR::as.mo(x)
result <- data.frame(mo = x) %>%
left_join(AMR::microorganisms, by = "mo") %>%
mutate(shortname = ifelse(!is.na(genus) & !is.na(species), paste0(substr(genus, 1, 1), ". ", species), NA_character_)) %>%
pull(shortname)
suppressWarnings(
result <- data.frame(mo = x) %>%
left_join(AMR::microorganisms, by = "mo") %>%
mutate(shortname = ifelse(!is.na(genus) & !is.na(species), paste0(substr(genus, 1, 1), ". ", species), NA_character_)) %>%
pull(shortname)
)
}
mo_translate(result, language = language)
}

10
README.md

@ -120,9 +120,13 @@ help(package = "AMR") @@ -120,9 +120,13 @@ help(package = "AMR")
```
## ITIS
<img src="man/figures/logo_amr.png" height="50px"><img src="man/figures/plus.png" height="50px"><img src="man/figures/itis_logo.jpg" height="50px">
<img src="man/figures/itis_logo.jpg" height="100px">
This `AMR` package contains the **complete microbial taxonomic data** (with seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. This allows users to use authoritative taxonomic information for their data analyses on any microorganisms, not only human pathogens.
This `AMR` package contains the **complete microbial taxonomic data** (with all seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov).
The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. This allows users to use authoritative taxonomic information for their data analyses on any microorganisms, not only human pathogens.
ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists.
### New classes
This package contains two new S3 classes: `mic` for MIC values (e.g. from Vitek or Phoenix) and `rsi` for antimicrobial drug interpretations (i.e. S, I and R). Both are actually ordered factors under the hood (an MIC of `2` being higher than `<=1` but lower than `>=32`, and for class `rsi` factors are ordered as `S < I < R`).
@ -150,7 +154,7 @@ plot(septic_patients$cipr) @@ -150,7 +154,7 @@ plot(septic_patients$cipr)
![example_1_rsi](man/figures/rsi_example1.png)
<img src="man/figures/logo_amr.png" height="50px"><img src="man/figures/plus.png" height="50px"><img src="https://github.com/tidyverse/dplyr/blob/master/man/figures/logo.png" height="50px"><img src="man/figures/plus.png" height="50px"><img src="https://github.com/tidyverse/ggplot2/blob/master/man/figures/logo.png" height="50px">
<<img src="https://github.com/tidyverse/dplyr/blob/master/man/figures/logo.png" height="50px"> <img src="https://github.com/tidyverse/ggplot2/blob/master/man/figures/logo.png" height="50px">
Or use the `ggplot2` and `dplyr` packages to create more appealing plots:
```r

BIN
data/microorganisms.certe.rda

Binary file not shown.

BIN
data/microorganisms.rda

Binary file not shown.

BIN
data/septic_patients.rda

Binary file not shown.

5
man/freq.Rd

@ -4,6 +4,7 @@ @@ -4,6 +4,7 @@
\alias{freq}
\alias{frequency_tbl}
\alias{top_freq}
\alias{diff.frequency_tbl}
\alias{print.frequency_tbl}
\title{Frequency table}
\usage{
@ -17,6 +18,8 @@ freq(x, ..., sort.count = TRUE, nmax = getOption("max.print.freq"), @@ -17,6 +18,8 @@ freq(x, ..., sort.count = TRUE, nmax = getOption("max.print.freq"),
top_freq(f, n)
\method{diff}{frequency_tbl}(x, y, ...)
\method{print}{frequency_tbl}(x, nmax = getOption("max.print.freq",
default = 15), ...)
}
@ -143,7 +146,7 @@ identical(septic_patients \%>\% @@ -143,7 +146,7 @@ identical(septic_patients \%>\%
sort(septic_patients$age)) # TRUE
# it also supports `table` objects:
table(septic_patients$sex,
table(septic_patients$gender,
septic_patients$age) \%>\%
freq(sep = " **sep** ")

2
tests/testthat/test-first_isolate.R

@ -114,7 +114,7 @@ test_that("first isolates work", { @@ -114,7 +114,7 @@ test_that("first isolates work", {
first_isolate(col_date = "date",
col_mo = "mo",
col_patient_id = "patient_id",
col_testcode = "sex",
col_testcode = "gender",
testcodes_exclude = "M"))
# errors

6
tests/testthat/test-freq.R

@ -32,17 +32,17 @@ test_that("frequency table works", { @@ -32,17 +32,17 @@ test_that("frequency table works", {
# factor
expect_output(print(freq(septic_patients$hospital_id)))
# table
expect_output(print(freq(table(septic_patients$sex, septic_patients$age))))
expect_output(print(freq(table(septic_patients$gender, septic_patients$age))))
# rsi
expect_output(print(freq(septic_patients$amcl)))
# hms
expect_output(suppressWarnings(print(freq(hms::as.hms(sample(c(0:86399), 50))))))
# matrix
expect_output(print(freq(as.matrix(septic_patients$age))))
expect_output(print(freq(as.matrix(septic_patients[, c("age", "sex")]))))
expect_output(print(freq(as.matrix(septic_patients[, c("age", "gender")]))))
# list
expect_output(print(freq(list(age = septic_patients$age))))
expect_output(print(freq(list(age = septic_patients$age, sex = septic_patients$sex))))
expect_output(print(freq(list(age = septic_patients$age, gender = septic_patients$gender))))
library(dplyr)
expect_output(septic_patients %>% select(1:2) %>% freq() %>% print())

4
vignettes/freq.Rmd vendored

@ -25,9 +25,9 @@ Frequency tables (or frequency distributions) are summaries of the distribution @@ -25,9 +25,9 @@ Frequency tables (or frequency distributions) are summaries of the distribution
## Frequencies of one variable
To only show and quickly review the content of one variable, you can just select this variable in various ways. Let's say we want to get the frequencies of the `sex` variable of the `septic_patients` dataset:
To only show and quickly review the content of one variable, you can just select this variable in various ways. Let's say we want to get the frequencies of the `gender` variable of the `septic_patients` dataset:
```{r, echo = TRUE}
septic_patients %>% freq(sex)
septic_patients %>% freq(gender)
```
This immediately shows the class of the variable, its length and availability (i.e. the amount of `NA`), the amount of unique values and (most importantly) that among septic patients men are more prevalent than women.

Loading…
Cancel
Save