diff --git a/R/replace_non_ascii.R b/R/replace_non_ascii.R index 86878c6..3683a95 100644 --- a/R/replace_non_ascii.R +++ b/R/replace_non_ascii.R @@ -35,7 +35,7 @@ replace_non_ascii <- function (x, replacement = '', remove.nonconverted = TRUE, ...) { x <- replace_curly_quote(x) - x <- stringi::stri_trans_general(x, "latin-ascii") + x <- stringi::stri_trans_general(x, "Any-Latin; Latin-ASCII") x <- iconv(as.character(x), "", "ASCII", "byte") Encoding(x) <- "latin1" x <- mgsub(x, ser, reps) diff --git a/tests/testthat/test-replace_non_ascii.R b/tests/testthat/test-replace_non_ascii.R new file mode 100755 index 0000000..443ee0b --- /dev/null +++ b/tests/testthat/test-replace_non_ascii.R @@ -0,0 +1,23 @@ +context("Checking replace_non_ascii") + +test_that("replace_non_ascii transliterates Latin and non-Latin scripts to ASCII", { + x <- c("heiß", "brûlée", "Дорога", "キャンパス", "भोजन") + Encoding(x) <- "UTF-8" + expect_equal(replace_non_ascii(x), c("heiss", "brulee", "Doroga", "kyanpasu", "bhojana")) +}) + +test_that("replace_non_ascii with remove.nonconverted = FALSE preserves unmapped characters", { + x <- "hello" + expect_equal(replace_non_ascii(x, remove.nonconverted = FALSE), "hello") +}) + +test_that("replace_non_ascii2 replaces non-ASCII with regex", { + x <- "hello world" + expect_equal(replace_non_ascii2(x), "hello world") +}) + +test_that("replace_curly_quote replaces curly quotes", { + z <- '\x93Hello\x94' + Encoding(z) <- "latin1" + expect_equal(replace_curly_quote(z), '"Hello"') +}) \ No newline at end of file