MODEL_PATH <- "/mnt/Data2/DS_projects/llm_models/tiny-mistral-test-Q2_K.gguf" LORA_PATH <- "/mnt/Data2/DS_projects/llm_models/test-lora-adapter.gguf" # ============================================================ # Shared fixtures: load model & context once # ============================================================ HAS_MODEL <- file.exists(MODEL_PATH) if (HAS_MODEL) { shared_model <- llama_load_model(MODEL_PATH) shared_info <- llama_model_info(shared_model) shared_ctx <- llama_new_context(shared_model, n_ctx = 256L, n_threads = 2L) withr::defer(llama_free_context(shared_ctx), teardown_env()) withr::defer(llama_free_model(shared_model), teardown_env()) } skip_if_no_model <- function() { if (!HAS_MODEL) skip("test model not available") } # ============================================================ # Package load (no model required) # ============================================================ test_that("package loads correctly", { expect_true(require(llamaR, quietly = TRUE)) }) # ============================================================ # Verbosity (no model required) # ============================================================ test_that("verbosity can be set and retrieved", { old <- llama_get_verbosity() llama_set_verbosity(0L) expect_equal(llama_get_verbosity(), 0L) llama_set_verbosity(3L) expect_equal(llama_get_verbosity(), 3L) llama_set_verbosity(old) expect_equal(llama_get_verbosity(), old) }) # ============================================================ # Hardware / System info (no model required) # ============================================================ test_that("llama_supports_gpu returns logical", { result <- llama_supports_gpu() expect_true(is.logical(result)) expect_equal(length(result), 1L) }) test_that("system_info returns non-empty string", { info <- llama_system_info() expect_true(is.character(info)) expect_true(nchar(info) > 0) }) test_that("supports_mmap returns logical", { result <- llama_supports_mmap() expect_true(is.logical(result)) expect_equal(length(result), 1L) }) test_that("supports_mlock returns logical", { result <- llama_supports_mlock() expect_true(is.logical(result)) expect_equal(length(result), 1L) }) test_that("max_devices returns positive integer", { result <- llama_max_devices() expect_true(is.integer(result)) expect_true(result >= 1L) }) test_that("chat_builtin_templates returns character vector", { templates <- llama_chat_builtin_templates() expect_true(is.character(templates)) expect_true(length(templates) > 0) }) # ============================================================ # Model: load + info # ============================================================ test_that("model loads and info is returned", { skip_if_no_model() expect_false(is.null(shared_model)) expect_true(is.list(shared_info)) expect_true(shared_info$n_vocab > 0) expect_true(shared_info$n_embd > 0) expect_true(shared_info$n_layer > 0) expect_true(shared_info$n_head > 0) expect_true(nchar(shared_info$desc) > 0) }) test_that("model_info returns extended fields", { skip_if_no_model() expect_true(is.numeric(shared_info$size)) expect_true(shared_info$size > 0) expect_true(is.numeric(shared_info$n_params)) expect_true(shared_info$n_params > 0) expect_true(is.logical(shared_info$has_encoder)) expect_true(is.logical(shared_info$has_decoder)) expect_true(is.logical(shared_info$is_recurrent)) }) # ============================================================ # Model metadata # ============================================================ test_that("model_meta returns named character vector", { skip_if_no_model() meta <- llama_model_meta(shared_model) expect_true(is.character(meta)) expect_true(length(meta) > 0) expect_false(is.null(names(meta))) }) test_that("model_meta_val retrieves values by key", { skip_if_no_model() arch <- llama_model_meta_val(shared_model, "general.architecture") expect_true(is.character(arch) || is.null(arch)) val <- llama_model_meta_val(shared_model, "nonexistent.key.12345") expect_null(val) }) # ============================================================ # Vocabulary info # ============================================================ test_that("vocab_info returns named integer vector", { skip_if_no_model() vocab <- llama_vocab_info(shared_model) expect_true(is.integer(vocab)) expect_equal(length(vocab), 11L) expect_true(all(c("bos", "eos", "eot", "sep", "nl", "pad", "fim_pre", "fim_suf", "fim_mid", "fim_rep", "fim_sep") %in% names(vocab))) }) # ============================================================ # Chat templates # ============================================================ test_that("chat template can be retrieved from model", { skip_if_no_model() tmpl <- llama_chat_template(shared_model) expect_true(is.null(tmpl) || is.character(tmpl)) }) test_that("chat_apply_template formats messages", { skip_if_no_model() tmpl <- llama_chat_template(shared_model) if (is.null(tmpl)) skip("model has no built-in chat template") messages <- list(list(role = "user", content = "Hello")) prompt <- llama_chat_apply_template(messages, template = tmpl) expect_true(is.character(prompt)) expect_true(nchar(prompt) > 0) expect_true(grepl("Hello", prompt, fixed = TRUE)) }) # ============================================================ # Context: create + config # ============================================================ test_that("context can be created", { skip_if_no_model() expect_false(is.null(shared_ctx)) }) test_that("n_ctx returns correct context size", { skip_if_no_model() n <- llama_n_ctx(shared_ctx) expect_true(is.integer(n)) expect_equal(n, 256L) }) test_that("set_threads does not error", { skip_if_no_model() expect_no_error(llama_set_threads(shared_ctx, n_threads = 4L)) expect_no_error(llama_set_threads(shared_ctx, n_threads = 2L, n_threads_batch = 4L)) # restore llama_set_threads(shared_ctx, n_threads = 2L) }) test_that("set_causal_attn does not error", { skip_if_no_model() expect_no_error(llama_set_causal_attn(shared_ctx, FALSE)) expect_no_error(llama_set_causal_attn(shared_ctx, TRUE)) }) # ============================================================ # Tokenize / Detokenize # ============================================================ test_that("tokenize and detokenize are inverse operations", { skip_if_no_model() text <- "Hello, world!" tokens <- llama_tokenize(shared_ctx, text) expect_true(is.integer(tokens)) expect_true(length(tokens) > 0) recovered <- llama_detokenize(shared_ctx, tokens) expect_true(is.character(recovered)) expect_equal(recovered, text) }) # ============================================================ # Generation # ============================================================ test_that("generation produces non-empty output", { skip_if_no_model() result <- llama_generate(shared_ctx, "The capital of France is", max_new_tokens = 20L, temp = 0.1) expect_true(is.character(result)) expect_true(nchar(result, type = "bytes") > 0) }) test_that("greedy generation is deterministic", { skip_if_no_model() r1 <- llama_generate(shared_ctx, "Once upon a time", max_new_tokens = 30L, temp = 0.0) r2 <- llama_generate(shared_ctx, "Once upon a time", max_new_tokens = 30L, temp = 0.0) expect_equal(r1, r2) }) # ============================================================ # Advanced sampling # ============================================================ test_that("generation with min_p produces output", { skip_if_no_model() result <- llama_generate(shared_ctx, "Hello", max_new_tokens = 10L, temp = 0.8, min_p = 0.05) expect_true(is.character(result)) expect_true(nchar(result, type = "bytes") > 0) }) test_that("generation with repeat_penalty produces output", { skip_if_no_model() result <- llama_generate(shared_ctx, "Hello", max_new_tokens = 10L, temp = 0.8, repeat_penalty = 1.1, repeat_last_n = 32L) expect_true(is.character(result)) expect_true(nchar(result, type = "bytes") > 0) }) test_that("generation with mirostat v2 produces output", { skip_if_no_model() result <- llama_generate(shared_ctx, "Hello", max_new_tokens = 10L, mirostat = 2L, mirostat_tau = 5.0, mirostat_eta = 0.1) expect_true(is.character(result)) expect_true(nchar(result, type = "bytes") > 0) }) test_that("generation with typical_p produces output", { skip_if_no_model() result <- llama_generate(shared_ctx, "Hello", max_new_tokens = 10L, temp = 0.8, typical_p = 0.9) expect_true(is.character(result)) expect_true(nchar(result, type = "bytes") > 0) }) # ============================================================ # Embeddings # ============================================================ test_that("embeddings have correct dimensionality", { skip_if_no_model() emb <- llama_embeddings(shared_ctx, "Hello") expect_true(is.numeric(emb)) expect_equal(length(emb), shared_info$n_embd) expect_true(any(emb != 0)) }) # ============================================================ # Logits # ============================================================ test_that("get_logits returns numeric vector of n_vocab length", { skip_if_no_model() llama_generate(shared_ctx, "Hello", max_new_tokens = 1L, temp = 0) logits <- llama_get_logits(shared_ctx) expect_true(is.numeric(logits)) expect_equal(length(logits), shared_info$n_vocab) expect_true(any(logits != 0)) }) # ============================================================ # KV Cache operations # ============================================================ test_that("memory_clear works", { skip_if_no_model() llama_generate(shared_ctx, "Hello", max_new_tokens = 5L, temp = 0) expect_no_error(llama_memory_clear(shared_ctx)) }) test_that("memory_seq_rm works", { skip_if_no_model() llama_generate(shared_ctx, "Hello", max_new_tokens = 5L, temp = 0) result <- llama_memory_seq_rm(shared_ctx, seq_id = 0L, p0 = -1L, p1 = -1L) expect_true(is.logical(result)) }) test_that("memory_seq_keep works", { skip_if_no_model() llama_generate(shared_ctx, "Hello", max_new_tokens = 5L, temp = 0) expect_no_error(llama_memory_seq_keep(shared_ctx, seq_id = 0L)) }) test_that("memory_seq_pos_range returns named integer", { skip_if_no_model() range <- llama_memory_seq_pos_range(shared_ctx, seq_id = 0L) expect_true(is.integer(range)) expect_equal(length(range), 2L) expect_true(all(c("min", "max") %in% names(range))) }) test_that("memory_can_shift returns logical", { skip_if_no_model() result <- llama_memory_can_shift(shared_ctx) expect_true(is.logical(result)) expect_equal(length(result), 1L) }) # ============================================================ # State save/load # ============================================================ test_that("state save and load round-trip", { skip_if_no_model() llama_generate(shared_ctx, "Hello world", max_new_tokens = 5L, temp = 0) state_file <- tempfile(fileext = ".bin") on.exit(unlink(state_file), add = TRUE) result <- llama_state_save(shared_ctx, state_file) expect_true(result) expect_true(file.exists(state_file)) expect_true(file.info(state_file)$size > 0) ctx2 <- llama_new_context(shared_model, n_ctx = 256L, n_threads = 2L) result2 <- llama_state_load(ctx2, state_file) expect_true(result2) llama_free_context(ctx2) }) test_that("state_load errors on non-existent file", { skip_if_no_model() expect_error(llama_state_load(shared_ctx, "nonexistent_state.bin")) }) # ============================================================ # Performance counters # ============================================================ test_that("perf returns named list with expected fields", { skip_if_no_model() llama_generate(shared_ctx, "Hello", max_new_tokens = 5L, temp = 0) perf <- llama_perf(shared_ctx) expect_true(is.list(perf)) expect_true(all(c("t_load_ms", "t_p_eval_ms", "t_eval_ms", "n_p_eval", "n_eval", "n_reused") %in% names(perf))) expect_true(perf$n_eval > 0) expect_no_error(llama_perf_reset(shared_ctx)) }) # ============================================================ # LoRA adapters (separate model load — LoRA modifies model) # ============================================================ test_that("lora_load returns handle or errors on missing file", { skip_if_no_model() expect_error(llama_lora_load(shared_model, "nonexistent.gguf")) if (file.exists(LORA_PATH)) { lora <- llama_lora_load(shared_model, LORA_PATH) expect_false(is.null(lora)) } }) test_that("lora_apply and lora_remove work on context", { skip_if_no_model() if (!file.exists(LORA_PATH)) skip("test LoRA adapter not available") model <- llama_load_model(MODEL_PATH) lora <- llama_lora_load(model, LORA_PATH) ctx <- llama_new_context(model, n_ctx = 128L, n_threads = 2L) expect_no_error(llama_lora_apply(ctx, lora, scale = 1.0)) result <- llama_lora_remove(ctx, lora) expect_equal(result, 0L) llama_free_context(ctx) llama_free_model(model) }) test_that("lora_clear works on context", { skip_if_no_model() if (!file.exists(LORA_PATH)) skip("test LoRA adapter not available") model <- llama_load_model(MODEL_PATH) lora <- llama_lora_load(model, LORA_PATH) ctx <- llama_new_context(model, n_ctx = 128L, n_threads = 2L) llama_lora_apply(ctx, lora, scale = 0.5) expect_no_error(llama_lora_clear(ctx)) result <- llama_lora_remove(ctx, lora) expect_equal(result, -1L) llama_free_context(ctx) llama_free_model(model) })