@ -193,19 +193,19 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
require ( " AMR" )
# check onLoad() in R/zzz.R: data tables are created there.
}
# WHONET: xxx = no growth
x [tolower ( as.character ( paste0 ( x , " " ) ) ) %in% c ( " " , " xxx" , " na" , " nan" ) ] <- NA_character_
uncertainty_level <- translate_allow_uncertain ( allow_uncertain )
# mo_hist <- get_mo_history(x, uncertainty_level, force = isTRUE(list(...)$force_mo_history))
if ( mo_source_isvalid ( reference_df )
& isFALSE ( Becker )
& isFALSE ( Lancefield )
& ! is.null ( reference_df )
& all ( x %in% reference_df [ , 1 ] [ [1 ] ] ) ) {
# has valid own reference_df
# (data.table not faster here)
reference_df <- reference_df %>% filter ( ! is.na ( mo ) )
@ -225,18 +225,18 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
left_join ( reference_df , by = " x" ) %>%
pull ( " mo" )
)
} else if ( all ( x %in% AMR :: microorganisms $ mo )
& isFALSE ( Becker )
& isFALSE ( Lancefield ) ) {
y <- x
# } else if (!any(is.na(mo_hist))
# & isFALSE(Becker)
# & isFALSE(Lancefield)) {
# # check previously found results
# y <- mo_hist
} else if ( all ( tolower ( x ) %in% microorganismsDT $ fullname_lower )
& isFALSE ( Becker )
& isFALSE ( Lancefield ) ) {
@ -257,7 +257,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
}
# save them to history
set_mo_history ( x , y , 0 , force = isTRUE ( list ( ... ) $ force_mo_history ) )
} else {
# will be checked for mo class in validation and uses exec_as.mo internally if necessary
y <- mo_validate ( x = x , property = " mo" ,
@ -266,8 +266,8 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
force_mo_history = isTRUE ( list ( ... ) $ force_mo_history ) ,
... )
}
to_class_mo ( y )
}
@ -286,6 +286,7 @@ is.mo <- function(x) {
#' @importFrom crayon magenta red blue silver italic
# param property a column name of AMR::microorganisms
# param initial_search logical - is FALSE when coming from uncertain tries, which uses exec_as.mo internally too
# param dyslexia_mode logical - also check for characters that resemble others
# param force_mo_history logical - whether found result must be saved with set_mo_history (default FALSE on non-interactive sessions)
# param debug logical - show different lookup texts while searching
exec_as.mo <- function ( x ,
@ -295,23 +296,24 @@ exec_as.mo <- function(x,
reference_df = get_mo_source ( ) ,
property = " mo" ,
initial_search = TRUE ,
dyslexia_mode = FALSE ,
force_mo_history = FALSE ,
debug = FALSE ) {
if ( ! " AMR" %in% base :: .packages ( ) ) {
require ( " AMR" )
# check onLoad() in R/zzz.R: data tables are created there.
}
# WHONET: xxx = no growth
x [tolower ( as.character ( paste0 ( x , " " ) ) ) %in% c ( " " , " xxx" , " na" , " nan" ) ] <- NA_character_
if ( initial_search == TRUE ) {
options ( mo_failures = NULL )
options ( mo_uncertainties = NULL )
options ( mo_renamed = NULL )
}
if ( NCOL ( x ) == 2 ) {
# support tidyverse selection like: df %>% select(colA, colB)
# paste these columns together
@ -325,20 +327,20 @@ exec_as.mo <- function(x,
stop ( ' `x` can be 2 columns at most' , call. = FALSE )
}
x [is.null ( x ) ] <- NA
# support tidyverse selection like: df %>% select(colA)
if ( ! is.vector ( x ) & ! is.null ( dim ( x ) ) ) {
x <- pull ( x , 1 )
}
}
notes <- character ( 0 )
uncertainties <- data.frame ( input = character ( 0 ) ,
fullname = character ( 0 ) ,
mo = character ( 0 ) )
failures <- character ( 0 )
uncertainty_level <- translate_allow_uncertain ( allow_uncertain )
x_input <- x
# already strip leading and trailing spaces
x <- trimws ( x , which = " both" )
@ -350,7 +352,7 @@ exec_as.mo <- function(x,
& ! is.null ( x )
& ! identical ( x , " " )
& ! identical ( x , " xxx" ) ]
# conversion of old MO codes from v0.5.0 (ITIS) to later versions (Catalogue of Life)
if ( any ( x %like% " ^[BFP]_[A-Z]{3,7}" ) & ! all ( x %in% microorganisms $ mo ) ) {
leftpart <- gsub ( " ^([BFP]_[A-Z]{3,7}).*" , " \\1" , x )
@ -372,7 +374,7 @@ exec_as.mo <- function(x,
pull ( new )
}
}
# defined df to check for
if ( ! is.null ( reference_df ) ) {
if ( ! mo_source_isvalid ( reference_df ) ) {
@ -391,7 +393,7 @@ exec_as.mo <- function(x,
reference_df [ ] <- lapply ( reference_df , as.character )
)
}
# all empty
if ( all ( identical ( trimws ( x_input ) , " " ) | is.na ( x_input ) | length ( x ) == 0 ) ) {
if ( property == " mo" ) {
@ -399,7 +401,7 @@ exec_as.mo <- function(x,
} else {
return ( rep ( NA_character_ , length ( x_input ) ) )
}
} else if ( all ( x %in% reference_df [ , 1 ] [ [1 ] ] ) ) {
# all in reference df
colnames ( reference_df ) [1 ] <- " x"
@ -409,7 +411,7 @@ exec_as.mo <- function(x,
left_join ( AMR :: microorganisms , by = " mo" ) %>%
pull ( property )
)
} else if ( all ( x %in% AMR :: microorganisms $ mo ) ) {
# existing mo codes when not looking for property "mo", like mo_genus("B_ESCHR_COL")
y <- microorganismsDT [prevalence == 1 ] [data.table ( mo = x ) , on = " mo" , ..property ] [ [1 ] ]
@ -424,7 +426,7 @@ exec_as.mo <- function(x,
..property ] [ [1 ] ]
}
x <- y
} else if ( all ( x %in% read_mo_history ( uncertainty_level ,
force = force_mo_history ) $ x ) ) {
# previously found code
@ -432,7 +434,7 @@ exec_as.mo <- function(x,
uncertainty_level ,
force = force_mo_history ) ) ,
on = " mo" , ..property ] [ [1 ] ]
} else if ( all ( tolower ( x ) %in% microorganismsDT $ fullname_lower ) ) {
# we need special treatment for very prevalent full names, they are likely!
# e.g. as.mo("Staphylococcus aureus")
@ -448,30 +450,30 @@ exec_as.mo <- function(x,
..property ] [ [1 ] ]
}
x <- y
} else if ( all ( toupper ( x ) %in% AMR :: microorganisms.codes $ code ) ) {
# commonly used MO codes
y <- as.data.table ( AMR :: microorganisms.codes ) [data.table ( code = toupper ( x ) ) , on = " code" , ]
# save them to history
set_mo_history ( x , y $ mo , 0 , force = force_mo_history )
x <- microorganismsDT [data.table ( mo = y [ [ " mo" ] ] ) , on = " mo" , ..property ] [ [1 ] ]
} else if ( ! all ( x %in% AMR :: microorganisms [ , property ] ) ) {
strip_whitespace <- function ( x ) {
# all whitespaces (tab, new lines, etc.) should be one space
# and spaces before and after should be omitted
trimws ( gsub ( " [\\s]+" , " " , x , perl = TRUE ) , which = " both" )
}
x <- strip_whitespace ( x )
x_backup <- x
# remove spp and species
x <- gsub ( " +(spp.?|ssp.?|sp.? |ss ?.?|subsp.?|subspecies|biovar |serovar |species)" , " " , x_backup , ignore.case = TRUE )
x <- strip_whitespace ( x )
x_backup_without_spp <- x
x_species <- paste ( x , " species" )
# translate to English for supported languages of mo_property
@ -490,7 +492,7 @@ exec_as.mo <- function(x,
# remove genus as first word
x <- gsub ( " ^Genus " , " " , x )
# allow characters that resemble others ----
if ( initial_search == FALS E) {
if ( dyslexia_mode == TRU E) {
x <- tolower ( x )
x <- gsub ( " [iy]+" , " [iy]+" , x )
x <- gsub ( " (c|k|q|qu|s|z|x|ks)+" , " (c|k|q|qu|s|z|x|ks)+" , x )
@ -512,7 +514,7 @@ exec_as.mo <- function(x,
x <- gsub ( " e\\+n(?![a-z[])" , " (e+n|u+(c|k|q|qu|s|z|x|ks)+)" , x , ignore.case = TRUE , perl = TRUE )
}
x <- strip_whitespace ( x )
x_trimmed <- x
x_trimmed_species <- paste ( x_trimmed , " species" )
x_trimmed_without_group <- gsub ( " gro.u.p$" , " " , x_trimmed , ignore.case = TRUE )
@ -526,7 +528,7 @@ exec_as.mo <- function(x,
x_withspaces_start_only <- paste0 ( ' ^' , x_withspaces )
x_withspaces_end_only <- paste0 ( x_withspaces , ' $' )
x_withspaces_start_end <- paste0 ( ' ^' , x_withspaces , ' $' )
if ( isTRUE ( debug ) ) {
cat ( paste0 ( ' x "' , x , ' "\n' ) )
cat ( paste0 ( ' x_species "' , x_species , ' "\n' ) )
@ -539,13 +541,13 @@ exec_as.mo <- function(x,
cat ( paste0 ( ' x_trimmed_species "' , x_trimmed_species , ' "\n' ) )
cat ( paste0 ( ' x_trimmed_without_group "' , x_trimmed_without_group , ' "\n' ) )
}
progress <- progress_estimated ( n = length ( x ) , min_time = 3 )
for ( i in 1 : length ( x ) ) {
progress $ tick ( ) $ print ( )
if ( initial_search == TRUE ) {
found <- microorganismsDT [mo == get_mo_history ( x_backup [i ] ,
uncertainty_level ,
@ -557,14 +559,14 @@ exec_as.mo <- function(x,
next
}
}
found <- microorganismsDT [mo == toupper ( x_backup [i ] ) , ..property ] [ [1 ] ]
# is a valid MO code
if ( length ( found ) > 0 ) {
x [i ] <- found [1L ]
next
}
found <- microorganismsDT [fullname_lower %in% tolower ( c ( x_backup [i ] , x_backup_without_spp [i ] ) ) , ..property ] [ [1 ] ]
# most probable: is exact match in fullname
if ( length ( found ) > 0 ) {
@ -574,7 +576,7 @@ exec_as.mo <- function(x,
}
next
}
found <- microorganismsDT [col_id == x_backup [i ] , ..property ] [ [1 ] ]
# is a valid Catalogue of Life ID
if ( NROW ( found ) > 0 ) {
@ -584,14 +586,14 @@ exec_as.mo <- function(x,
}
next
}
# WHONET: xxx = no growth
if ( tolower ( as.character ( paste0 ( x_backup_without_spp [i ] , " " ) ) ) %in% c ( " " , " xxx" , " na" , " nan" ) ) {
x [i ] <- NA_character_
next
}
if ( tolower ( x_backup_without_spp [i ] ) %in% c ( " other" , " none" , " unknown" ) ) {
# empty and nonsense values, ignore without warning
x [i ] <- microorganismsDT [mo == " UNKNOWN" , ..property ] [ [1 ] ]
@ -600,7 +602,7 @@ exec_as.mo <- function(x,
}
next
}
# check for very small input, but ignore the O antigens of E. coli
if ( nchar ( gsub ( " [^a-zA-Z]" , " " , x_trimmed [i ] ) ) < 3
& ! x_backup_without_spp [i ] %like% " O?(26|103|104|104|111|121|145|157)" ) {
@ -629,7 +631,7 @@ exec_as.mo <- function(x,
}
next
}
if ( x_backup_without_spp [i ] %like% " virus" ) {
# there is no fullname like virus, so don't try to coerce it
x [i ] <- microorganismsDT [mo == " UNKNOWN" , ..property ] [ [1 ] ]
@ -639,7 +641,7 @@ exec_as.mo <- function(x,
}
next
}
# translate known trivial abbreviations to genus + species ----
if ( ! is.na ( x_trimmed [i ] ) ) {
if ( toupper ( x_backup_without_spp [i ] ) %in% c ( ' MRSA' , ' MSSA' , ' VISA' , ' VRSA' ) ) {
@ -830,7 +832,7 @@ exec_as.mo <- function(x,
next
}
}
# FIRST TRY FULLNAMES AND CODES ----
# if only genus is available, return only genus
if ( all ( ! c ( x [i ] , x_trimmed [i ] ) %like% " " ) ) {
@ -854,7 +856,7 @@ exec_as.mo <- function(x,
}
# rest of genus only is in allow_uncertain part.
}
# TRY OTHER SOURCES ----
# WHONET and other common LIS codes
if ( toupper ( x_backup [i ] ) %in% AMR :: microorganisms.codes [ , 1 ] ) {
@ -879,7 +881,7 @@ exec_as.mo <- function(x,
}
}
}
# allow no codes less than 4 characters long, was already checked for WHONET above
if ( nchar ( x_backup_without_spp [i ] ) < 4 ) {
x [i ] <- microorganismsDT [mo == " UNKNOWN" , ..property ] [ [1 ] ]
@ -889,7 +891,7 @@ exec_as.mo <- function(x,
}
next
}
check_per_prevalence <- function ( data_to_check ,
a.x_backup ,
b.x_trimmed ,
@ -898,19 +900,19 @@ exec_as.mo <- function(x,
e.x_withspaces_start_only ,
f.x_withspaces_end_only ,
g.x_backup_without_spp ) {
# try probable: trimmed version of fullname ----
found <- data_to_check [fullname_lower %in% tolower ( g.x_backup_without_spp ) , ..property ] [ [1 ] ]
if ( length ( found ) > 0 ) {
return ( found [1L ] )
}
# try any match keeping spaces ----
found <- data_to_check [fullname %like% d.x_withspaces_start_end , ..property ] [ [1 ] ]
if ( length ( found ) > 0 & nchar ( g.x_backup_without_spp ) >= 6 ) {
return ( found [1L ] )
}
# try any match keeping spaces, not ending with $ ----
found <- data_to_check [fullname %like% paste0 ( trimws ( e.x_withspaces_start_only ) , " " ) , ..property ] [ [1 ] ]
if ( length ( found ) > 0 ) {
@ -920,21 +922,21 @@ exec_as.mo <- function(x,
if ( length ( found ) > 0 & nchar ( g.x_backup_without_spp ) >= 6 ) {
return ( found [1L ] )
}
# try any match keeping spaces, not start with ^ ----
found <- data_to_check [fullname %like% paste0 ( " " , trimws ( f.x_withspaces_end_only ) ) , ..property ] [ [1 ] ]
if ( length ( found ) > 0 ) {
return ( found [1L ] )
}
# try a trimmed version
found <- data_to_check [fullname_lower %like% b.x_trimmed
| fullname_lower %like% c.x_trimmed_without_group , ..property ] [ [1 ] ]
if ( length ( found ) > 0 & nchar ( g.x_backup_without_spp ) >= 6 ) {
return ( found [1L ] )
}
# try splitting of characters in the middle and then find ID ----
# only when text length is 6 or lower
# like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus
@ -949,18 +951,18 @@ exec_as.mo <- function(x,
return ( found [1L ] )
}
}
# try fullname without start and without nchar limit of >= 6 ----
# like "K. pneu rhino" >> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH
found <- data_to_check [fullname %like% e.x_withspaces_start_only , ..property ] [ [1 ] ]
if ( length ( found ) > 0 ) {
return ( found [1L ] )
}
# didn't found any
return ( NA_character_ )
}
# FIRST TRY VERY PREVALENT IN HUMAN INFECTIONS ----
x [i ] <- check_per_prevalence ( data_to_check = microorganismsDT [prevalence == 1 ] ,
a.x_backup = x_backup [i ] ,
@ -1006,9 +1008,9 @@ exec_as.mo <- function(x,
}
next
}
# MISCELLANEOUS ----
# look for old taxonomic names ----
found <- microorganisms.oldDT [fullname_lower == tolower ( x_backup [i ] )
| fullname %like% x_withspaces_start_end [i ] , ]
@ -1032,7 +1034,7 @@ exec_as.mo <- function(x,
}
next
}
# check for uncertain results ----
uncertain_fn <- function ( a.x_backup ,
b.x_trimmed ,
@ -1040,17 +1042,22 @@ exec_as.mo <- function(x,
d.x_withspaces_start_only ,
f.x_withspaces_end_only ,
g.x_backup_without_spp ) {
if ( uncertainty_level == 0 ) {
# do not allow uncertainties
return ( NA_character_ )
}
if ( uncertainty_level >= 1 ) {
now_checks_for_uncertainty_level <- 1
# (1) look again for old taxonomic names, now for G. species ----
if ( isTRUE ( debug ) ) {
cat ( " \n[UNCERTAINLY LEVEL 1] (1) look again for old taxonomic names, now for G. species\n" )
}
if ( isTRUE ( debug ) ) {
message ( " Running '" , c.x_withspaces_start_end , " ' and '" , d.x_withspaces_start_only , " '" )
}
found <- microorganisms.oldDT [fullname %like% c.x_withspaces_start_end
| fullname %like% d.x_withspaces_start_only ]
if ( NROW ( found ) > 0 & nchar ( g.x_backup_without_spp ) >= 6 ) {
@ -1068,7 +1075,7 @@ exec_as.mo <- function(x,
ref_new = microorganismsDT [col_id == found [1 , col_id_new ] , ref ] ,
mo = microorganismsDT [col_id == found [1 , col_id_new ] , mo ] )
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = 1 ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = found [1 , fullname ] ,
mo = paste ( " CoL" , found [1 , col_id ] ) ) )
@ -1077,18 +1084,26 @@ exec_as.mo <- function(x,
}
return ( x )
}
# (2) Try with misspelled input ----
# just rerun with initial_search = FALS E will used the extensive regex part above
# just rerun with dyslexia_mode = TRU E will used the extensive regex part above
if ( isTRUE ( debug ) ) {
cat ( " \n[UNCERTAINLY LEVEL 1] (2) Try with misspelled input\n" )
}
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( a.x_backup , initial_search = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( isTRUE ( debug ) ) {
message ( " Running '" , a.x_backup , " '" )
}
# first try without dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( a.x_backup , initial_search = FALSE , dyslexia_mode = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( empty_result ( found ) ) {
# then with dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( a.x_backup , initial_search = FALSE , dyslexia_mode = TRUE , allow_uncertain = FALSE , debug = debug ) ) )
}
if ( ! empty_result ( found ) ) {
found_result <- found
found <- microorganismsDT [mo == found , ..property ] [ [1 ] ]
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = 1 ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = microorganismsDT [mo == found_result [1L ] , fullname ] [ [1 ] ] ,
mo = found_result [1L ] ) )
@ -1098,21 +1113,25 @@ exec_as.mo <- function(x,
return ( found [1L ] )
}
}
if ( uncertainty_level >= 2 ) {
now_checks_for_uncertainty_level <- 2
# (3) look for genus only, part of name ----
if ( isTRUE ( debug ) ) {
cat ( " \n[UNCERTAINLY LEVEL 2] (3) look for genus only, part of name\n" )
}
if ( nchar ( g.x_backup_without_spp ) > 4 & ! b.x_trimmed %like% " " ) {
if ( ! grepl ( " ^[A-Z][a-z]+" , b.x_trimmed , ignore.case = FALSE ) ) {
if ( isTRUE ( debug ) ) {
message ( " Running '" , paste ( b.x_trimmed , " species" ) , " '" )
}
# not when input is like Genustext, because then Neospora would lead to Actinokineospora
found <- microorganismsDT [fullname_lower %like% paste ( b.x_trimmed , " species" ) , ..property ] [ [1 ] ]
if ( length ( found ) > 0 ) {
x [i ] <- found [1L ]
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = 2 ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = microorganismsDT [mo == found [1L ] , fullname ] [ [1 ] ] ,
mo = found [1L ] ) )
@ -1123,19 +1142,27 @@ exec_as.mo <- function(x,
}
}
}
# (4) strip values between brackets ----
if ( isTRUE ( debug ) ) {
cat ( " \n[UNCERTAINLY LEVEL 2] (4) strip values between brackets\n" )
}
a.x_backup_stripped <- gsub ( " ( *[(].*[)] *)" , " " , a.x_backup )
a.x_backup_stripped <- trimws ( gsub ( " +" , " " , a.x_backup_stripped ) )
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( a.x_backup_stripped , initial_search = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( isTRUE ( debug ) ) {
message ( " Running '" , a.x_backup_stripped , " '" )
}
# first try without dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( a.x_backup_stripped , initial_search = FALSE , dyslexia_mode = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( empty_result ( found ) ) {
# then with dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( a.x_backup_stripped , initial_search = FALSE , dyslexia_mode = TRUE , allow_uncertain = FALSE , debug = debug ) ) )
}
if ( ! empty_result ( found ) & nchar ( g.x_backup_without_spp ) >= 6 ) {
found_result <- found
found <- microorganismsDT [mo == found , ..property ] [ [1 ] ]
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = 2 ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = microorganismsDT [mo == found_result [1L ] , fullname ] [ [1 ] ] ,
mo = found_result [1L ] ) )
@ -1144,7 +1171,7 @@ exec_as.mo <- function(x,
}
return ( found [1L ] )
}
# (5a) try to strip off half an element from end and check the remains ----
if ( isTRUE ( debug ) ) {
cat ( " \n[UNCERTAINLY LEVEL 2] (5a) try to strip off half an element from end and check the remains\n" )
@ -1156,13 +1183,21 @@ exec_as.mo <- function(x,
lastword_half <- substr ( lastword , 1 , as.integer ( nchar ( lastword ) / 2 ) )
# remove last half of the second term
x_strip_collapsed <- paste ( c ( x_strip [1 : ( length ( x_strip ) - i ) ] , lastword_half ) , collapse = " " )
if ( nchar ( x_strip_collapsed ) >= 4 ) {
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( nchar ( x_strip_collapsed ) >= 4 & nchar ( lastword_half ) > 2 ) {
if ( isTRUE ( debug ) ) {
message ( " Running '" , x_strip_collapsed , " '" )
}
# first try without dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , dyslexia_mode = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( empty_result ( found ) ) {
# then with dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , dyslexia_mode = TRUE , allow_uncertain = FALSE , debug = debug ) ) )
}
if ( ! empty_result ( found ) ) {
found_result <- found
found <- microorganismsDT [mo == found , ..property ] [ [1 ] ]
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = 2 ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = microorganismsDT [mo == found_result [1L ] , fullname ] [ [1 ] ] ,
mo = found_result [1L ] ) )
@ -1181,13 +1216,21 @@ exec_as.mo <- function(x,
if ( length ( x_strip ) > 1 ) {
for ( i in 1 : ( length ( x_strip ) - 1 ) ) {
x_strip_collapsed <- paste ( x_strip [1 : ( length ( x_strip ) - i ) ] , collapse = " " )
if ( nchar ( x_strip_collapsed ) >= 4 ) {
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( nchar ( x_strip_collapsed ) >= 6 ) {
if ( isTRUE ( debug ) ) {
message ( " Running '" , x_strip_collapsed , " '" )
}
# first try without dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , dyslexia_mode = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( empty_result ( found ) ) {
# then with dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , dyslexia_mode = TRUE , allow_uncertain = FALSE , debug = debug ) ) )
}
if ( ! empty_result ( found ) ) {
found_result <- found
found <- microorganismsDT [mo == found , ..property ] [ [1 ] ]
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = 2 ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = microorganismsDT [mo == found_result [1L ] , fullname ] [ [1 ] ] ,
mo = found_result [1L ] ) )
@ -1208,7 +1251,7 @@ exec_as.mo <- function(x,
found_result <- found
found <- microorganismsDT [mo == found , ..property ] [ [1 ] ]
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = 2 ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = microorganismsDT [mo == found_result [1L ] , fullname ] [ [1 ] ] ,
mo = found_result [1L ] ) )
@ -1222,7 +1265,7 @@ exec_as.mo <- function(x,
found_result <- found
found <- microorganismsDT [mo == found , ..property ] [ [1 ] ]
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = 2 ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = microorganismsDT [mo == found_result [1L ] , fullname ] [ [1 ] ] ,
mo = found_result [1L ] ) )
@ -1239,14 +1282,22 @@ exec_as.mo <- function(x,
if ( length ( x_strip ) > 1 & nchar ( g.x_backup_without_spp ) >= 6 ) {
for ( i in 2 : ( length ( x_strip ) ) ) {
x_strip_collapsed <- paste ( x_strip [i : length ( x_strip ) ] , collapse = " " )
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( isTRUE ( debug ) ) {
message ( " Running '" , x_strip_collapsed , " '" )
}
# first try without dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , dyslexia_mode = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( empty_result ( found ) ) {
# then with dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , dyslexia_mode = TRUE , allow_uncertain = FALSE , debug = debug ) ) )
}
if ( ! empty_result ( found ) ) {
found_result <- found
found <- microorganismsDT [mo == found_result [1L ] , ..property ] [ [1 ] ]
# uncertainty level 2 only if searched part contains a space (otherwise it will be found with lvl 3)
if ( x_strip_collapsed %like% " " ) {
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = 2 ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = microorganismsDT [mo == found_result [1L ] , fullname ] [ [1 ] ] ,
mo = found_result [1L ] ) )
@ -1259,22 +1310,32 @@ exec_as.mo <- function(x,
}
}
}
if ( uncertainty_level >= 3 ) {
# (7) try to strip off one element from start and check the remains ----
now_checks_for_uncertainty_level <- 3
# (7a) try to strip off one element from start and check the remains (any text size) ----
if ( isTRUE ( debug ) ) {
cat ( " \n[UNCERTAINLY LEVEL 3] (7) try to strip off one element from start and check the remains\n" )
cat ( " \n[UNCERTAINLY LEVEL 3] (7 a ) try to strip off one element from start and check the remains (any text size) \n")
}
x_strip <- a.x_backup %>% strsplit ( " " ) %>% unlist ( )
if ( length ( x_strip ) > 1 & nchar ( g.x_backup_without_spp ) >= 6 ) {
for ( i in 2 : ( length ( x_strip ) ) ) {
x_strip_collapsed <- paste ( x_strip [i : length ( x_strip ) ] , collapse = " " )
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( isTRUE ( debug ) ) {
message ( " Running '" , x_strip_collapsed , " '" )
}
# first try without dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , dyslexia_mode = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( empty_result ( found ) ) {
# then with dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , dyslexia_mode = TRUE , allow_uncertain = FALSE , debug = debug ) ) )
}
if ( ! empty_result ( found ) ) {
found_result <- found
found <- microorganismsDT [mo == found , ..property ] [ [1 ] ]
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = 3 ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = microorganismsDT [mo == found_result [1L ] , fullname ] [ [1 ] ] ,
mo = found_result [1L ] ) )
@ -1285,18 +1346,53 @@ exec_as.mo <- function(x,
}
}
}
# (7b) try to strip off one element from end and check the remains (any text size) ----
# (this is in fact 5b but without nchar limit of >=6)
if ( isTRUE ( debug ) ) {
cat ( " \n[UNCERTAINLY LEVEL 3] (7b) try to strip off one element from end and check the remains (any text size)\n" )
}
if ( length ( x_strip ) > 1 ) {
for ( i in 1 : ( length ( x_strip ) - 1 ) ) {
x_strip_collapsed <- paste ( x_strip [1 : ( length ( x_strip ) - i ) ] , collapse = " " )
if ( isTRUE ( debug ) ) {
message ( " Running '" , x_strip_collapsed , " '" )
}
# first try without dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , dyslexia_mode = FALSE , allow_uncertain = FALSE , debug = debug ) ) )
if ( empty_result ( found ) ) {
# then with dyslexia mode
found <- suppressMessages ( suppressWarnings ( exec_as.mo ( x_strip_collapsed , initial_search = FALSE , dyslexia_mode = TRUE , allow_uncertain = FALSE , debug = debug ) ) )
}
if ( ! empty_result ( found ) ) {
found_result <- found
found <- microorganismsDT [mo == found , ..property ] [ [1 ] ]
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = microorganismsDT [mo == found_result [1L ] , fullname ] [ [1 ] ] ,
mo = found_result [1L ] ) )
if ( initial_search == TRUE ) {
set_mo_history ( a.x_backup , get_mo_code ( found [1L ] , property ) , 2 , force = force_mo_history )
}
return ( found [1L ] )
}
}
}
# (8) part of a name (very unlikely match) ----
if ( isTRUE ( debug ) ) {
cat ( " \n[UNCERTAINLY LEVEL 3] (8) part of a name (very unlikely match)\n" )
}
if ( isTRUE ( debug ) ) {
message ( " Running '" , f.x_withspaces_end_only , " '" )
}
found <- microorganismsDT [fullname %like% f.x_withspaces_end_only ]
if ( nrow ( found ) > 0 ) {
found_result <- found [ [ " mo" ] ]
if ( ! empty_result ( found_result ) & nchar ( g.x_backup_without_spp ) >= 6 ) {
found <- microorganismsDT [mo == found_result [1L ] , ..property ] [ [1 ] ]
uncertainties <<- rbind ( uncertainties ,
data.frame ( uncertainty = 3 ,
data.frame ( uncertainty = now_checks_for_uncertainty_level ,
input = a.x_backup ,
fullname = microorganismsDT [mo == found_result [1L ] , fullname ] [ [1 ] ] ,
mo = found_result [1L ] ) )
@ -1307,7 +1403,7 @@ exec_as.mo <- function(x,
}
}
}
# didn't found in uncertain results too
return ( NA_character_ )
}
@ -1321,7 +1417,7 @@ exec_as.mo <- function(x,
# no set_mo_history here - it is already set in uncertain_fn()
next
}
# no results found: make them UNKNOWN ----
x [i ] <- microorganismsDT [mo == " UNKNOWN" , ..property ] [ [1 ] ]
if ( initial_search == TRUE ) {
@ -1330,7 +1426,7 @@ exec_as.mo <- function(x,
}
}
}
# handling failures ----
failures <- failures [ ! failures %in% c ( NA , NULL , NaN ) ]
if ( length ( failures ) > 0 & initial_search == TRUE ) {
@ -1355,7 +1451,7 @@ exec_as.mo <- function(x,
# handling uncertainties ----
if ( NROW ( uncertainties ) > 0 & initial_search == TRUE ) {
options ( mo_uncertainties = as.list ( distinct ( uncertainties , input , .keep_all = TRUE ) ) )