library(testthat) library(NUSS) ndict <- data.frame( to_search = c("is", "science", "scienceis", "this", "thisis", "a", "approach", "ascientific", "ascientificapproach", "beauty", "beautyof", "beautyofscience", "everywhere", "isa", "isascientific", "isascientificapproach", "iseverywhere", "isscience", "of", "ofscience", "scienceiseverywhere", "scientific", "scientificapproach", "the", "thebeauty", "thebeautyof", "thebeautyofscience", "thisisa", "thisisascientific", "thisisascientificapproach", "thisisscience"), to_replace = c("is", "science", "science is", "this", "this is", "a", "approach", "a scientific", "a scientific approach", "beauty", "beauty of", "beauty of science", "everywhere", "is a", "is a scientific", "is a scientific approach", "is everywhere", "is science", "of", "of science", "science is everywhere", "scientific", "scientific approach", "the", "the beauty", "the beauty of", "the beauty of science", "this is a", "this is a scientific", "this is a scientific approach", "this is science"), id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31), points = c(4, 4, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) ) test_that("ngrams_segmentation works correctly", { # Test case 1: Basic usage expect_equal( NUSS::ngrams_segmentation("thisisscience", ndict)[,1:2], data.frame( sequence = c("thisisscience"), segmented = c("this is science") ) ) # Test case 2: Multiple sequences sequences <- c("scienceiseverywhere", "thisisscience") result <- NUSS::ngrams_segmentation(sequences, ndict) expect_equal( result[, 1:2], data.frame( sequence = c("scienceiseverywhere", "thisisscience"), segmented = c("science is everywhere", "this is science") ) ) })