@ -193,19 +193,19 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
@@ -193,19 +193,19 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
require ( " AMR" )
# check onLoad() in R/zzz.R: data tables are created there.
}
# WHONET: xxx = no growth
x [tolower ( as.character ( paste0 ( x , " " ) ) ) %in% c ( " " , " xxx" , " na" , " nan" ) ] <- NA_character_
uncertainty_level <- translate_allow_uncertain ( allow_uncertain )
# mo_hist <- get_mo_history(x, uncertainty_level, force = isTRUE(list(...)$force_mo_history))
if ( mo_source_isvalid ( reference_df )
& isFALSE ( Becker )
& isFALSE ( Lancefield )
& ! is.null ( reference_df )
& all ( x %in% reference_df [ , 1 ] [ [1 ] ] ) ) {
# has valid own reference_df
# (data.table not faster here)
reference_df <- reference_df %>% filter ( ! is.na ( mo ) )
@ -225,18 +225,18 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
@@ -225,18 +225,18 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
left_join ( reference_df , by = " x" ) %>%
pull ( " mo" )
)
} else if ( all ( x %in% AMR :: microorganisms $ mo )
& isFALSE ( Becker )
& isFALSE ( Lancefield ) ) {
y <- x
# } else if (!any(is.na(mo_hist))
# & isFALSE(Becker)
# & isFALSE(Lancefield)) {
# # check previously found results
# y <- mo_hist
} else if ( all ( tolower ( x ) %in% microorganismsDT $ fullname_lower )
& isFALSE ( Becker )
& isFALSE ( Lancefield ) ) {
@ -257,7 +257,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
@@ -257,7 +257,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
}
# save them to history
set_mo_history ( x , y , 0 , force = isTRUE ( list ( ... ) $ force_mo_history ) )
} else {
# will be checked for mo class in validation and uses exec_as.mo internally if necessary
y <- mo_validate ( x = x , property = " mo" ,
@ -266,8 +266,8 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
@@ -266,8 +266,8 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
force_mo_history = isTRUE ( list ( ... ) $ force_mo_history ) ,
... )
}
to_class_mo ( y )
}
@ -286,6 +286,7 @@ is.mo <- function(x) {
@@ -286,6 +286,7 @@ is.mo <- function(x) {
#' @importFrom crayon magenta red blue silver italic
# param property a column name of AMR::microorganisms
# param initial_search logical - is FALSE when coming from uncertain tries, which uses exec_as.mo internally too
# param dyslexia_mode logical - also check for characters that resemble others
# param force_mo_history logical - whether found result must be saved with set_mo_history (default FALSE on non-interactive sessions)
# param debug logical - show different lookup texts while searching
exec_as.mo <- function ( x ,
@ -295,23 +296,24 @@ exec_as.mo <- function(x,
@@ -295,23 +296,24 @@ exec_as.mo <- function(x,
reference_df = get_mo_source ( ) ,
property = " mo" ,
initial_search = TRUE ,
dyslexia_mode = FALSE ,
force_mo_history = FALSE ,
debug = FALSE ) {
if ( ! " AMR" %in% base :: .packages ( ) ) {
require ( " AMR" )
# check onLoad() in R/zzz.R: data tables are created there.
}
# WHONET: xxx = no growth
x [tolower ( as.character ( paste0 ( x , " " ) ) ) %in% c ( " " , " xxx" , " na" , " nan" ) ] <- NA_character_
if ( initial_search == TRUE ) {
options ( mo_failures = NULL )
options ( mo_uncertainties = NULL )
options ( mo_renamed = NULL )
}
if ( NCOL ( x ) == 2 ) {
# support tidyverse selection like: df %>% select(colA, colB)
# paste these columns together
@ -325,20 +327,20 @@ exec_as.mo <- function(x,
@@ -325,20 +327,20 @@ exec_as.mo <- function(x,
stop ( ' `x` can be 2 columns at most' , call. = FALSE )
}
x [is.null ( x ) ] <- NA
# support tidyverse selection like: df %>% select(colA)
if ( ! is.vector ( x ) & ! is.null ( dim ( x ) ) ) {
x <- pull ( x , 1 )
}
}
notes <- character ( 0 )
uncertainties <- data.frame ( input = character ( 0 ) ,
fullname = character ( 0 ) ,
mo = character ( 0 ) )
failures <- character ( 0 )
uncertainty_level <- translate_allow_uncertain ( allow_uncertain )
x_input <- x
# already strip leading and trailing spaces
x <- trimws ( x , which = " both" )
@ -350,7 +352,7 @@ exec_as.mo <- function(x,
@@ -350,7 +352,7 @@ exec_as.mo <- function(x,
& ! is.null ( x )
& ! identical ( x , " " )
& ! identical ( x , " xxx" ) ]
# conversion of old MO codes from v0.5.0 (ITIS) to later versions (Catalogue of Life)
if ( any ( x %like% " ^[BFP]_[A-Z]{3,7}" ) & ! all ( x %in% microorganisms $ mo ) ) {
leftpart <- gsub ( " ^([BFP]_[A-Z]{3,7}).*" , " \\1" , x )
@ -372,7 +374,7 @@ exec_as.mo <- function(x,
@@ -372,7 +374,7 @@ exec_as.mo <- function(x,
pull ( new )
}
}
# defined df to check for
if ( ! is.null ( reference_df ) ) {
if ( ! mo_source_isvalid ( reference_df ) ) {
@ -391,7 +393,7 @@ exec_as.mo <- function(x,
@@ -391,7 +393,7 @@ exec_as.mo <- function(x,
reference_df [ ] <- lapply ( reference_df , as.character )
)
}
# all empty
if ( all ( identical ( trimws ( x_input ) , " " ) | is.na ( x_input ) | length ( x ) == 0 ) ) {
if ( property == " mo" ) {
@ -399,7 +401,7 @@ exec_as.mo <- function(x,
@@ -399,7 +401,7 @@ exec_as.mo <- function(x,
} else {
return ( rep ( NA_character_ , length ( x_input ) ) )
}
} else if ( all ( x %in% reference_df [ , 1 ] [ [1 ] ] ) ) {
# all in reference df
colnames ( reference_df ) [1 ] <- " x"
@ -409,7 +411,7 @@ exec_as.mo <- function(x,
@@ -409,7 +411,7 @@ exec_as.mo <- function(x,
left_join ( AMR :: microorganisms , by = " mo" ) %>%
pull ( property )
)
} else if ( all ( x %in% AMR :: microorganisms $ mo ) ) {
# existing mo codes when not looking for property "mo", like mo_genus("B_ESCHR_COL")
y <- microorganismsDT [prevalence == 1 ] [data.table ( mo = x ) , on = " mo" , ..property ] [ [1 ] ]
@ -424,7 +426,7 @@ exec_as.mo <- function(x,
@@ -424,7 +426,7 @@ exec_as.mo <- function(x,
..property ] [ [1 ] ]
}
x <- y
} else if ( all ( x %in% read_mo_history ( uncertainty_level ,
force = force_mo_history ) $ x ) ) {
# previously found code
@ -432,7 +434,7 @@ exec_as.mo <- function(x,
@@ -432,7 +434,7 @@ exec_as.mo <- function(x,
uncertainty_level ,
force = force_mo_history ) ) ,
on = " mo" , ..property ] [ [1 ] ]
} else if ( all ( tolower ( x ) %in% microorganismsDT $ fullname_lower ) ) {
# we need special treatment for very prevalent full names, they are likely!
# e.g. as.mo("Staphylococcus aureus")
@ -448,30 +450,30 @@ exec_as.mo <- function(x,
@@ -448,30 +450,30 @@ exec_as.mo <- function(x,
..property ] [ [1 ] ]
}
x <- y
} else if ( all ( toupper ( x ) %in% AMR :: microorganisms.codes $ code ) ) {
# commonly used MO codes
y <- as.data.table ( AMR :: microorganisms.codes ) [data.table ( code = toupper ( x ) ) , on = " code" , ]
# save them to history
set_mo_history ( x , y $ mo , 0 , force = force_mo_history )
x <- microorganismsDT [data.table ( mo = y [ [ " mo" ] ] ) , on = " mo" , ..property ] [ [1 ] ]
} else if ( ! all ( x %in% AMR :: microorganisms [ , property ] ) ) {
strip_whitespace <- function ( x ) {
# all whitespaces (tab, new lines, etc.) should be one space
# and spaces before and after should be omitted
trimws ( gsub ( " [\\s]+" , " " , x , perl = TRUE ) , which = " both" )
}
x <- strip_whitespace ( x )
x_backup <- x
# remove spp and species
x <- gsub ( " +(spp.?|ssp.?|sp.? |ss ?.?|subsp.?|subspecies|biovar |serovar |species)" , " " , x_backup , ignore.case = TRUE )
x <- strip_whitespace ( x )
x_backup_without_spp <- x
x_species <- paste ( x , " species" )
# translate to English for supported languages of mo_property
@ -490,7 +492,7 @@ exec_as.mo <- function(x,
@@ -490,7 +492,7 @@ exec_as.mo <- function(x,
# remove genus as first word
x <- gsub ( " ^Genus " , " " , x )
# allow characters that resemble others ----
if ( initial_search == FALS E) {
if ( dyslexia_mode == TRU E) {
x <- tolower ( x )
x <- gsub ( " [iy]+" , " [iy]+" , x )
x <- gsub ( " (c|k|q|qu|s|z|x|ks)+" , " (c|k|q|qu|s|z|x|ks)+" , x )
@ -512,7 +514,7 @@ exec_as.mo <- function(x,
@@ -512,7 +514,7 @@ exec_as.mo <- function(x,
x <- gsub ( " e\\+n(?![a-z[])" , " (e+n|u+(c|k|q|qu|s|z|x|ks)+)" , x , ignore.case = TRUE , perl = TRUE )
}
x <- strip_whitespace ( x )
x_trimmed <- x
x_trimmed_species <- paste ( x_trimmed , " species" )
x_trimmed_without_group <- gsub ( " gro.u.p$" , " " , x_trimmed , ignore.case = TRUE )
@ -526,7 +528,7 @@ exec_as.mo <- function(x,
@@ -526,7 +528,7 @@ exec_as.mo <- function(x,
x_withspaces_start_only <- paste0 ( ' ^' , x_withspaces )
x_withspaces_end_only <- paste0 ( x_withspaces , ' $' )
x_withspaces_start_end <- paste0 ( ' ^' , x_withspaces , ' $' )
if ( isTRUE ( debug ) ) {
cat ( paste0 ( ' x "' , x , ' "\n' ) )
cat ( paste0 ( ' x_species "' , x_species , ' "\n' ) )
@ -539,13 +541,13 @@ exec_as.mo <- function(x,
@@ -539,13 +541,13 @@ exec_as.mo <- function(x,
cat ( paste0 ( ' x_trimmed_species "' , x_trimmed_species , ' "\n' ) )
cat ( paste0 ( ' x_trimmed_without_group "' , x_trimmed_without_group , ' "\n' ) )
}
progress <- progress_estimated ( n = length ( x ) , min_time = 3 )
for ( i in 1 : length ( x ) ) {
progress $ tick ( ) $ print ( )
if ( initial_search == TRUE ) {
found <- microorganismsDT [mo == get_mo_history ( x_backup [i ] ,
uncertainty_level ,
@ -557,14 +559,14 @@ exec_as.mo <- function(x,
@@ -557,14 +559,14 @@ exec_as.mo <- function(x,
next
}
}
found <- microorganismsDT [mo == toupper ( x_backup [i ] ) , ..property ] [ [1 ] ]
# is a valid MO code
if ( length ( found ) > 0 ) {
x [i ] <- found [1L ]
next
}
found <- microorganismsDT [fullname_lower %in% tolower ( c ( x_backup [i ] , x_backup_without_spp [i ] ) ) , ..property ] [ [1 ] ]
# most probable: is exact match in fullname
if ( length ( found ) > 0 ) {
@ -574,7 +576,7 @@ exec_as.mo <- function(x,
@@ -574,7 +576,7 @@ exec_as.mo <- function(x,
}
next
}
found <- microorganismsDT [col_id == x_backup [i ] , ..property ] [ [1 ] ]
# is a valid Catalogue of Life ID
if ( NROW ( found ) > 0 ) {
@ -584,14 +586,14 @@ exec_as.mo <- function(x,
@@ -584,14 +586,14 @@ exec_as.mo <- function(x,
}
next
}
# WHONET: xxx = no growth
if ( tolower ( as.character ( paste0 ( x_backup_without_spp [i ] , " " ) ) ) %in% c ( " " , " xxx" , " na" , " nan" ) ) {
x [i ] <- NA_character_
next
}
if ( tolower ( x_backup_without_spp [i ] ) %in% c ( " other" , " none" , " unknown" ) ) {
# empty and nonsense values, ignore without warning
x [i ] <- microorganismsDT [mo == " UNKNOWN" , ..property ] [ [1 ] ]
@ -600,7 +602,7 @@ exec_as.mo <- function(x,
@@ -600,7 +602,7 @@ exec_as.mo <- function(x,
}
next
}
# check for very small input, but ignore the O antigens of E. coli
if ( nchar ( gsub ( " [^a-zA-Z]" , " " , x_trimmed [i ] ) ) < 3
& ! x_backup_without_spp [i ] %like% " O?(26|103|104|104|111|121|145|157)" ) {
@ -629,7 +631,7 @@ exec_as.mo <- function(x,
@@ -629,7 +631,7 @@ exec_as.mo <- function(x,
}
next
}
if ( x_backup_without_spp [i ] %like% " virus" ) {
# there is no fullname like virus, so don't try to coerce it
x [i ] <- microorganismsDT [mo == " UNKNOWN" , ..property ] [ [1 ] ]
@ -639,7 +641,7 @@ exec_as.mo <- function(x,
@@ -639,7 +641,7 @@ exec_as.mo <- function(x,
}
next
}
# translate known trivial abbreviations to genus + species ----
if ( ! is.na ( x_trimmed [i ] ) ) {
if ( toupper ( x_backup_without_spp [i ] ) %in% c ( ' MRSA' , ' MSSA' , ' VISA' , ' VRSA' ) ) {
@ -830,7 +832,7 @@ exec_as.mo <- function(x,
@@ -830,7 +832,7 @@ exec_as.mo <- function(x,
next
}
}
# FIRST TRY FULLNAMES AND CODES ----
# if only genus is available, return only genus
if ( all ( ! c ( x [i ] , x_trimmed [i ] ) %like% " " ) ) {
@ -854,7 +856,7 @@ exec_as.mo <- function(x,
@@ -854,7 +856,7 @@ exec_as.mo <- function(x,
}
# rest of genus only is in allow_uncertain part.
}
# TRY OTHER SOURCES ----
# WHONET and other common LIS codes
if ( toupper ( x_backup [i ] ) %in% AMR :: microorganisms.codes [ , 1 ] ) {
@ -879,7 +881,7 @@ exec_as.mo <- function(x,
@@ -879,7 +881,7 @@ exec_as.mo <- function(x,
}
}
}
# allow no codes less than 4 characters long, was already checked for WHONET above
if ( nchar ( x_backup_without_spp [i ] ) < 4 ) {
x [i ] <- microorganismsDT [mo == " UNKNOWN" , ..property ] [ [1 ] ]
@ -889,7 +891,7 @@ exec_as.mo <- function(x,
@@ -889,7 +891,7 @@ exec_as.mo <- function(x,
}
next
}
check_per_prevalence <- function ( data_to_check ,
a.x_backup ,
b.x_trimmed ,
@ -898,19 +900,19 @@ exec_as.mo <- function(x,
@@ -898,19 +900,19 @@ exec_as.mo <- function(x,
e.x_withspaces_start_only ,
f.x_withspaces_end_only ,
g.x_backup_without_spp ) {
# try probable: trimmed version of fullname ----
found <- data_to_check [fullname_lower %in% tolower ( g.x_backup_without_spp ) , ..property ] [ [1 ] ]
if ( length ( found ) > 0 ) {
return ( found [1L ] )
}
# try any match keeping spaces ----
found <- data_to_check [fullname %like% d.x_withspaces_start_end , ..property ] [ [1 ] ]
if ( length ( found ) > 0 & nchar ( g.x_backup_without_spp ) >= 6 ) {
return ( found [1L ] )
}
# try any match keeping spaces, not ending with $ ----
found <- data_to_check [fullname %like% paste0 ( trimws ( e.x_withspaces_start_only ) , " " ) , ..property ] [ [1 ] ]
if ( length ( found ) > 0 ) {
@ -920,21 +922,21 @@ exec_as.mo <- function(x,
@@ -920,21 +922,21 @@ exec_as.mo <- function(x,
if ( length ( found ) > 0 & nchar ( g.x_backup_without_spp ) >= 6 ) {
return ( found [1L ] )
}
# try any match keeping spaces, not start with ^ ----
found <- data_to_check [fullname %like% paste0 ( " " , trimws ( f.x_withspaces_end_only ) ) , ..property ] [ [1 ] ]
if ( length ( found ) > 0 ) {
return ( found [1L ] )
}
# try a trimmed version
found <- data_to_check [fullname_lower %like% b.x_trimmed
| fullname_lower %like% c.x_trimmed_without_group , ..property ] [ [1 ] ]
if ( length ( found ) > 0 & nchar ( g.x_backup_without_spp ) >= 6 ) {
return ( found [1L ] )
}
# try splitting of characters in the middle and then find ID ----
# only when text length is 6 or lower
# like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus
@ -949,18 +951,18 @@ exec_as.mo <- function(x,
@@ -949,18 +951,18 @@ exec_as.mo <- function(x,
return ( found [1L ] )
}
}
# try fullname without start and without nchar limit of >= 6 ----
# like "K. pneu rhino" >> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH
found <- data_to_check [fullname %like% e.x_withspaces_start_only , ..property ] [ [1 ] ]
if ( length ( found ) > 0 ) {
return ( found [1L ] )
}
# didn't found any
return ( NA_character_ )
}
# FIRST TRY VERY PREVALENT IN HUMAN INFECTIONS ----
x [i ] <- check_per_prevalence ( data_to_check = microorganismsDT [prevalence == 1 ] ,
a.x_backup = x_backup [i ] ,
@ -1006,9 +1008,9 @@ exec_as.mo <- function(x,
@@ -1006,9 +1008,9 @@ exec_as.mo <- function(x,
}
next
}
# MISCELLANEOUS ----
# look for old taxonomic names ----
found <- microorganisms.oldDT [fullname_lower == tolower ( x_backup [i ] )
| fullname %like% x_withspaces_start_end [i ] , ]
@ -1032,7 +1034,7 @@ exec_as.mo <- function(x,
@@ -1032,7 +1034,7 @@ exec_as.mo <- function(x,
}
next
}
# check for uncertain results ----
uncertain_fn <- function ( a.x_backup ,
b.x_trimmed ,
@ -1040,17 +1042,22 @@ exec_as.mo <- function(x,
@@ -1040,17 +1042,22 @@ exec_as.mo <- function(x,
d.x_withspaces_start_only ,
f.x_withspaces_end_only ,
g.x_backup_without_spp ) {
if ( uncertainty_level == 0 ) {
# do not allow uncertainties
return ( NA_character_ )
}
if ( uncertainty_level >= 1 ) {
now_checks_for_uncertainty_level <- 1
# (1) look again for old taxonomic names, now for G. species ----
if ( isTRUE ( debug ) ) {
cat ( " \n[UNCERTAINLY LEVEL 1] (1) look again for old taxonomic names, now for G. species\n" )
}
if ( isTRUE ( debug ) ) {
message ( " Running '" , c.x_withspaces_start_end , " ' and '" , d.x_withspaces_start_only , " '" )
}
found <- microorganisms.oldDT [fullname %like% c.x_withspaces_start_end
| fullname %like% d.x_withspaces_start_only ]
if ( NROW ( found ) > 0 & nchar ( g.x_backup_without_spp ) >= 6 ) {
@ -1068,7 +1075,7 @@ exec_as.mo <- function(x,
@@ -1068,7 +1075,7 @@ exec_as.mo <- function(x,
ref_new = microorganismsDT [col_id == found [1 , col_id_new ] , ref ] ,
mo = microorganismsDT [col_id == found [1 , col_id_new ] , mo ] )
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = 1 ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = found [1 , fullname ] ,
mo = paste ( " CoL" , found [1 , col_id ] ) ) )
@ -1077,18 +1084,26 @@ exec_as.mo <- function(x,
@@ -1077,18 +1084,26 @@ exec_as.mo <- function(x,
}
return ( x )
}
# (2) Try with misspelled input ----
# just rerun with initial_search = FALS E will used the extensive regex part above
# just rerun with dyslexia_mode = TRU E will used the extensive regex part above
if ( isTRUE ( debug ) ) {
cat ( " \n[UNCERTAINLY LEVEL 1] (2) Try with misspelled input\n" )
}
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( a.x_backup , initial_search = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( isTRUE ( debug ) ) {
message ( " Running '" , a.x_backup , " '" )
}
# first try without dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( a.x_backup , initial_search = FALSE , dyslexia_mode = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( empty_result ( found ) ) {
# then with dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( a.x_backup , initial_search = FALSE , dyslexia_mode = TRUE , allow_uncertain = FALSE , debug = debug ) ) )
}
if ( ! empty_result ( found ) ) {
found_result <- found
found <- microorganismsDT [mo == found , ..property ] [ [1 ] ]
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = 1 ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = microorganismsDT [mo == found_result [1L ] , fullname ] [ [1 ] ] ,
mo = found_result [1L ] ) )
@ -1098,21 +1113,25 @@ exec_as.mo <- function(x,
@@ -1098,21 +1113,25 @@ exec_as.mo <- function(x,
return ( found [1L ] )
}
}
if ( uncertainty_level >= 2 ) {
now_checks_for_uncertainty_level <- 2
# (3) look for genus only, part of name ----
if ( isTRUE ( debug ) ) {
cat ( " \n[UNCERTAINLY LEVEL 2] (3) look for genus only, part of name\n" )
}
if ( nchar ( g.x_backup_without_spp ) > 4 & ! b.x_trimmed %like% " " ) {
if ( ! grepl ( " ^[A-Z][a-z]+" , b.x_trimmed , ignore.case = FALSE ) ) {
if ( isTRUE ( debug ) ) {
message ( " Running '" , paste ( b.x_trimmed , " species" ) , " '" )
}
# not when input is like Genustext, because then Neospora would lead to Actinokineospora
found <- microorganismsDT [fullname_lower %like% paste ( b.x_trimmed , " species" ) , ..property ] [ [1 ] ]
if ( length ( found ) > 0 ) {
x [i ] <- found [1L ]
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = 2 ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = microorganismsDT [mo == found [1L ] , fullname ] [ [1 ] ] ,
mo = found [1L ] ) )
@ -1123,19 +1142,27 @@ exec_as.mo <- function(x,
@@ -1123,19 +1142,27 @@ exec_as.mo <- function(x,
}
}
}
# (4) strip values between brackets ----
if ( isTRUE ( debug ) ) {
cat ( " \n[UNCERTAINLY LEVEL 2] (4) strip values between brackets\n" )
}
a.x_backup_stripped <- gsub ( " ( *[(].*[)] *)" , " " , a.x_backup )
a.x_backup_stripped <- trimws ( gsub ( " +" , " " , a.x_backup_stripped ) )
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( a.x_backup_stripped , initial_search = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( isTRUE ( debug ) ) {
message ( " Running '" , a.x_backup_stripped , " '" )
}
# first try without dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( a.x_backup_stripped , initial_search = FALSE , dyslexia_mode = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( empty_result ( found ) ) {
# then with dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( a.x_backup_stripped , initial_search = FALSE , dyslexia_mode = TRUE , allow_uncertain = FALSE , debug = debug ) ) )
}
if ( ! empty_result ( found ) & nchar ( g.x_backup_without_spp ) >= 6 ) {
found_result <- found
found <- microorganismsDT [mo == found , ..property ] [ [1 ] ]
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = 2 ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = microorganismsDT [mo == found_result [1L ] , fullname ] [ [1 ] ] ,
mo = found_result [1L ] ) )
@ -1144,7 +1171,7 @@ exec_as.mo <- function(x,
@@ -1144,7 +1171,7 @@ exec_as.mo <- function(x,
}
return ( found [1L ] )
}
# (5a) try to strip off half an element from end and check the remains ----
if ( isTRUE ( debug ) ) {
cat ( " \n[UNCERTAINLY LEVEL 2] (5a) try to strip off half an element from end and check the remains\n" )
@ -1156,13 +1183,21 @@ exec_as.mo <- function(x,
@@ -1156,13 +1183,21 @@ exec_as.mo <- function(x,
lastword_half <- substr ( lastword , 1 , as.integer ( nchar ( lastword ) / 2 ) )
# remove last half of the second term
x_strip_collapsed <- paste ( c ( x_strip [1 : ( length ( x_strip ) - i ) ] , lastword_half ) , collapse = " " )
if ( nchar ( x_strip_collapsed ) >= 4 ) {
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( nchar ( x_strip_collapsed ) >= 4 & nchar ( lastword_half ) > 2 ) {
if ( isTRUE ( debug ) ) {
message ( " Running '" , x_strip_collapsed , " '" )
}
# first try without dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , dyslexia_mode = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( empty_result ( found ) ) {
# then with dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , dyslexia_mode = TRUE , allow_uncertain = FALSE , debug = debug ) ) )
}
if ( ! empty_result ( found ) ) {
found_result <- found
found <- microorganismsDT [mo == found , ..property ] [ [1 ] ]
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = 2 ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = microorganismsDT [mo == found_result [1L ] , fullname ] [ [1 ] ] ,
mo = found_result [1L ] ) )
@ -1181,13 +1216,21 @@ exec_as.mo <- function(x,
@@ -1181,13 +1216,21 @@ exec_as.mo <- function(x,
if ( length ( x_strip ) > 1 ) {
for ( i in 1 : ( length ( x_strip ) - 1 ) ) {
x_strip_collapsed <- paste ( x_strip [1 : ( length ( x_strip ) - i ) ] , collapse = " " )
if ( nchar ( x_strip_collapsed ) >= 4 ) {
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( nchar ( x_strip_collapsed ) >= 6 ) {
if ( isTRUE ( debug ) ) {
message ( " Running '" , x_strip_collapsed , " '" )
}
# first try without dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , dyslexia_mode = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( empty_result ( found ) ) {
# then with dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , dyslexia_mode = TRUE , allow_uncertain = FALSE , debug = debug ) ) )
}
if ( ! empty_result ( found ) ) {
found_result <- found
found <- microorganismsDT [mo == found , ..property ] [ [1 ] ]
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = 2 ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = microorganismsDT [mo == found_result [1L ] , fullname ] [ [1 ] ] ,
mo = found_result [1L ] ) )
@ -1208,7 +1251,7 @@ exec_as.mo <- function(x,
@@ -1208,7 +1251,7 @@ exec_as.mo <- function(x,
found_result <- found
found <- microorganismsDT [mo == found , ..property ] [ [1 ] ]
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = 2 ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = microorganismsDT [mo == found_result [1L ] , fullname ] [ [1 ] ] ,
mo = found_result [1L ] ) )
@ -1222,7 +1265,7 @@ exec_as.mo <- function(x,
@@ -1222,7 +1265,7 @@ exec_as.mo <- function(x,
found_result <- found
found <- microorganismsDT [mo == found , ..property ] [ [1 ] ]
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = 2 ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = microorganismsDT [mo == found_result [1L ] , fullname ] [ [1 ] ] ,
mo = found_result [1L ] ) )
@ -1239,14 +1282,22 @@ exec_as.mo <- function(x,
@@ -1239,14 +1282,22 @@ exec_as.mo <- function(x,
if ( length ( x_strip ) > 1 & nchar ( g.x_backup_without_spp ) >= 6 ) {
for ( i in 2 : ( length ( x_strip ) ) ) {
x_strip_collapsed <- paste ( x_strip [i : length ( x_strip ) ] , collapse = " " )
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( isTRUE ( debug ) ) {
message ( " Running '" , x_strip_collapsed , " '" )
}
# first try without dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , dyslexia_mode = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( empty_result ( found ) ) {
# then with dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , dyslexia_mode = TRUE , allow_uncertain = FALSE , debug = debug ) ) )
}
if ( ! empty_result ( found ) ) {
found_result <- found
found <- microorganismsDT [mo == found_result [1L ] , ..property ] [ [1 ] ]
# uncertainty level 2 only if searched part contains a space (otherwise it will be found with lvl 3)
if ( x_strip_collapsed %like% " " ) {
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = 2 ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = microorganismsDT [mo == found_result [1L ] , fullname ] [ [1 ] ] ,
mo = found_result [1L ] ) )
@ -1259,22 +1310,32 @@ exec_as.mo <- function(x,
@@ -1259,22 +1310,32 @@ exec_as.mo <- function(x,
}
}
}
if ( uncertainty_level >= 3 ) {
# (7) try to strip off one element from start and check the remains ----
now_checks_for_uncertainty_level <- 3
# (7a) try to strip off one element from start and check the remains (any text size) ----
if ( isTRUE ( debug ) ) {
cat ( " \n[UNCERTAINLY LEVEL 3] (7) try to strip off one element from start and check the remains\n" )
cat ( " \n[UNCERTAINLY LEVEL 3] (7a ) try to strip off one element from start and check the remains (any text size) \n" )
}
x_strip <- a.x_backup %>% strsplit ( " " ) %>% unlist ( )
if ( length ( x_strip ) > 1 & nchar ( g.x_backup_without_spp ) >= 6 ) {
for ( i in 2 : ( length ( x_strip ) ) ) {
x_strip_collapsed <- paste ( x_strip [i : length ( x_strip ) ] , collapse = " " )
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( isTRUE ( debug ) ) {
message ( " Running '" , x_strip_collapsed , " '" )
}
# first try without dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , dyslexia_mode = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( empty_result ( found ) ) {
# then with dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , dyslexia_mode = TRUE , allow_uncertain = FALSE , debug = debug ) ) )
}
if ( ! empty_result ( found ) ) {
found_result <- found
found <- microorganismsDT [mo == found , ..property ] [ [1 ] ]
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = 3 ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = microorganismsDT [mo == found_result [1L ] , fullname ] [ [1 ] ] ,
mo = found_result [1L ] ) )
@ -1285,18 +1346,53 @@ exec_as.mo <- function(x,
@@ -1285,18 +1346,53 @@ exec_as.mo <- function(x,
}
}
}
# (7b) try to strip off one element from end and check the remains (any text size) ----
# (this is in fact 5b but without nchar limit of >=6)
if ( isTRUE ( debug ) ) {
cat ( " \n[UNCERTAINLY LEVEL 3] (7b) try to strip off one element from end and check the remains (any text size)\n" )
}
if ( length ( x_strip ) > 1 ) {
for ( i in 1 : ( length ( x_strip ) - 1 ) ) {
x_strip_collapsed <- paste ( x_strip [1 : ( length ( x_strip ) - i ) ] , collapse = " " )
if ( isTRUE ( debug ) ) {
message ( " Running '" , x_strip_collapsed , " '" )
}
# first try without dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , dyslexia_mode = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( empty_result ( found ) ) {
# then with dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , dyslexia_mode = TRUE , allow_uncertain = FALSE , debug = debug ) ) )
}
if ( ! empty_result ( found ) ) {
found_result <- found
found <- microorganismsDT [mo == found , ..property ] [ [1 ] ]
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = microorganismsDT [mo == found_result [1L ] , fullname ] [ [1 ] ] ,
mo = found_result [1L ] ) )
if ( initial_search == TRUE ) {
set_mo_history ( a.x_backup , get_mo_code ( found [1L ] , property ) , 2 , force = force_mo_history )
}
return ( found [1L ] )
}
}
}
# (8) part of a name (very unlikely match) ----
if ( isTRUE ( debug ) ) {
cat ( " \n[UNCERTAINLY LEVEL 3] (8) part of a name (very unlikely match)\n" )
}
if ( isTRUE ( debug ) ) {
message ( " Running '" , f.x_withspaces_end_only , " '" )
}
found <- microorganismsDT [fullname %like% f.x_withspaces_end_only ]
if ( nrow ( found ) > 0 ) {
found_result <- found [ [ " mo" ] ]
if ( ! empty_result ( found_result ) & nchar ( g.x_backup_without_spp ) >= 6 ) {
found <- microorganismsDT [mo == found_result [1L ] , ..property ] [ [1 ] ]
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = 3 ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = microorganismsDT [mo == found_result [1L ] , fullname ] [ [1 ] ] ,
mo = found_result [1L ] ) )
@ -1307,7 +1403,7 @@ exec_as.mo <- function(x,
@@ -1307,7 +1403,7 @@ exec_as.mo <- function(x,
}
}
}
# didn't found in uncertain results too
return ( NA_character_ )
}
@ -1321,7 +1417,7 @@ exec_as.mo <- function(x,
@@ -1321,7 +1417,7 @@ exec_as.mo <- function(x,
# no set_mo_history here - it is already set in uncertain_fn()
next
}
# no results found: make them UNKNOWN ----
x [i ] <- microorganismsDT [mo == " UNKNOWN" , ..property ] [ [1 ] ]
if ( initial_search == TRUE ) {
@ -1330,7 +1426,7 @@ exec_as.mo <- function(x,
@@ -1330,7 +1426,7 @@ exec_as.mo <- function(x,
}
}
}
# handling failures ----
failures <- failures [ ! failures %in% c ( NA , NULL , NaN ) ]
if ( length ( failures ) > 0 & initial_search == TRUE ) {
@ -1355,7 +1451,7 @@ exec_as.mo <- function(x,
@@ -1355,7 +1451,7 @@ exec_as.mo <- function(x,
# handling uncertainties ----
if ( NROW ( uncertainties ) > 0 & initial_search == TRUE ) {
options ( mo_uncertainties = as.list ( distinct ( uncertainties , input , .keep_all = TRUE ) ) )
plural <- c ( " " , " it" )
if ( NROW ( uncertainties ) > 1 ) {
plural <- c ( " s" , " them" )
@ -1366,7 +1462,7 @@ exec_as.mo <- function(x,
@@ -1366,7 +1462,7 @@ exec_as.mo <- function(x,
call. = FALSE ,
immediate. = TRUE ) # thus will always be shown, even if >= warnings
}
# Becker ----
if ( Becker == TRUE | Becker == " all" ) {
# See Source. It's this figure:
@ -1391,11 +1487,11 @@ exec_as.mo <- function(x,
@@ -1391,11 +1487,11 @@ exec_as.mo <- function(x,
" pseudintermedius" , " pseudointermedius" ,
" schweitzeri" , " argenteus" )
| ( species == " schleiferi" & subspecies == " coagulans" ) , ..property ] [ [1 ] ]
# warn when species found that are not in Becker (2014, PMID 25278577) and Becker (2019, PMID 30872103)
post_Becker <- c ( " argensis" , " caeli" , " cornubiensis" , " edaphicus" )
if ( any ( x %in% MOs_staph [species %in% post_Becker , ..property ] [ [1 ] ] ) ) {
warning ( " Becker " , italic ( " et al." ) , " (2014, 2019) does not contain these species named after their publication: " ,
italic ( paste ( " S." ,
sort ( mo_species ( unique ( x [x %in% MOs_staph [species %in% post_Becker , ..property ] [ [1 ] ] ] ) ) ) ,
@ -1404,14 +1500,14 @@ exec_as.mo <- function(x,
@@ -1404,14 +1500,14 @@ exec_as.mo <- function(x,
call. = FALSE ,
immediate. = TRUE )
}
x [x %in% CoNS ] <- microorganismsDT [mo == ' B_STPHY_CNS' , ..property ] [ [1 ] ] [1L ]
x [x %in% CoPS ] <- microorganismsDT [mo == ' B_STPHY_CPS' , ..property ] [ [1 ] ] [1L ]
if ( Becker == " all" ) {
x [x %in% microorganismsDT [mo %like% ' ^B_STPHY_AUR' , ..property ] [ [1 ] ] ] <- microorganismsDT [mo == ' B_STPHY_CPS' , ..property ] [ [1 ] ] [1L ]
}
}
# Lancefield ----
if ( Lancefield == TRUE | Lancefield == " all" ) {
# group A - S. pyogenes
@ -1435,37 +1531,37 @@ exec_as.mo <- function(x,
@@ -1435,37 +1531,37 @@ exec_as.mo <- function(x,
# group K - S. salivarius
x [x == microorganismsDT [mo == ' B_STRPT_SAL' , ..property ] [ [1 ] ] [1L ] ] <- microorganismsDT [mo == ' B_STRPT_GRK' , ..property ] [ [1 ] ] [1L ]
}
# Wrap up ----------------------------------------------------------------
# comply to x, which is also unique and without empty values
x_input_unique_nonempty <- unique ( x_input [ ! is.na ( x_input )
& ! is.null ( x_input )
& ! identical ( x_input , " " )
& ! identical ( x_input , " xxx" ) ] )
# left join the found results to the original input values (x_input)
df_found <- data.frame ( input = as.character ( x_input_unique_nonempty ) ,
found = as.character ( x ) ,
stringsAsFactors = FALSE )
df_input <- data.frame ( input = as.character ( x_input ) ,
stringsAsFactors = FALSE )
suppressWarnings (
x <- df_input %>%
left_join ( df_found ,
by = " input" ) %>%
pull ( found )
)
if ( property == " mo" ) {
x <- to_class_mo ( x )
}
if ( length ( mo_renamed ( ) ) > 0 ) {
print ( mo_renamed ( ) )
}
x
}
@ -1494,7 +1590,7 @@ was_renamed <- function(name_old, name_new, ref_old = "", ref_new = "", mo = "")
@@ -1494,7 +1590,7 @@ was_renamed <- function(name_old, name_new, ref_old = "", ref_new = "", mo = "")
old_values <- gsub ( " et al." , italic ( " et al." ) , old_values )
new_values <- paste0 ( italic ( name_new ) , ref_new , mo )
new_values <- gsub ( " et al." , italic ( " et al." ) , new_values )
names ( new_values ) <- old_values
total <- c ( getOption ( " mo_renamed" ) , new_values )
options ( mo_renamed = total [order ( names ( total ) ) ] )
@ -1604,7 +1700,7 @@ print.mo_uncertainties <- function(x, ...) {
@@ -1604,7 +1700,7 @@ print.mo_uncertainties <- function(x, ...) {
" \n(1 = " , green ( " renamed/misspelled" ) ,
" , 2 = " , yellow ( " uncertain" ) ,
" , 3 = " , red ( " very uncertain" ) , " )\n" ) )
msg <- " "
for ( i in 1 : nrow ( x ) ) {
if ( x [i , " uncertainty" ] == 1 ) {
@ -1633,11 +1729,11 @@ mo_renamed <- function() {
@@ -1633,11 +1729,11 @@ mo_renamed <- function() {
if ( is.null ( items ) ) {
return ( NULL )
}
items <- strip_style ( items )
names ( items ) <- strip_style ( names ( items ) )
structure ( .Data = items ,
class = c ( " mo_renamed" , " character" ) )
class = c ( " mo_renamed" , " character" ) )
}
#' @exportMethod print.mo_renamed
@ -1666,7 +1762,7 @@ unregex <- function(x) {
@@ -1666,7 +1762,7 @@ unregex <- function(x) {
get_mo_code <- function ( x , property ) {
# don't use right now
return ( NULL )
if ( property == " mo" ) {
unique ( x )
} else {