test_that("normalize.numeric works with a regular numeric vector", {
	vec <- c(3, 4)
	result <- normalize(vec)

	# Expected result
	expected <- vec / sqrt(sum(vec^2))

	expect_equal(result, expected)
	expect_equal(sqrt(sum(result^2)), 1)
})

test_that("normalize.numeric handles zero vector correctly", {
	vec <- c(0, 0, 0)
	expect_warning(result <- normalize(vec), "NaNs produced")
	expect_true(all(is.nan(result)))
})

test_that("normalize.numeric handles NA values", {
	vec <- c(NA, 4)

	result <- normalize(vec)
	expect_true(any(is.na(result)))
})

test_that("normalize.numeric handles Inf values", {
	vec <- c(Inf, 4)

	expect_warning(result <- normalize(vec), "NaNs produced")
	expect_true(any(is.na(result)))
})

test_that("normalize.numeric works with negative values", {
	vec <- c(-3, 4)
	result <- normalize(vec)

	expected <- vec / sqrt(sum(vec^2))
	expect_equal(result, expected)
	expect_equal(sqrt(sum(result^2)), 1)
})

test_that("normalize.numeric works with single element vector", {
	vec <- c(5)
	result <- normalize(vec)

	expected <- vec / sqrt(sum(vec^2))
	expect_equal(result, expected)
	expect_equal(abs(result), 1)
})

test_that("normalize.embeddings works with embeddings object", {
	embeddings <- embeddings(c(3, 4, 0, 0, 0, 5, 8, 15, 0), nrow = 3, , byrow = TRUE, dimnames = list(paste0("token", 1:3), paste0("dim", 1:3)))

	result <- normalize(embeddings)
	expect_true(is.embeddings(result))

	# Check that each row has magnitude 1
	magnitudes <- sqrt(rowSums(result^2))
	expect_equal(as.numeric(magnitudes), c(1, 1, 1))
})

test_that("normalize.embeddings handles zero vectors", {
	embeddings <- embeddings(c(0, 0, 0, 0, 0, 0), nrow = 2)

	expect_warning(result <- normalize(embeddings), "NaNs produced")
	expect_true(all(is.nan(result)))
})

test_that("normalize.embeddings handles NA values", {
	embeddings <- embeddings(c(3, 4, 0, 0, 0, 5, 8, 15, 0), nrow = 3, , byrow = TRUE, dimnames = list(paste0("token", 1:3), paste0("dim", 1:3)))
	embeddings[1, 1] <- NA

	result <- normalize(embeddings)
	expect_true(any(is.na(result)))
})

test_that("normalize.default works with a list of numeric vectors", {
	vec_list <- list(c(3, 4), c(5, 12), c(0, 0))
	expect_warning(result <- normalize(vec_list), "NaNs produced")

	expected <- lapply(vec_list, function(vec) {
		vec / sqrt(sum(vec^2))
	})

	expect_equal(result, expected)
	expect_equal(sqrt(sum(result[[1]]^2)), 1)
	expect_equal(sqrt(sum(result[[2]]^2)), 1)
	expect_true(all(is.nan(result[[3]])))
})

test_that("normalize.default works with a list of embeddings", {
	embeddings1 <- embeddings(c(3, 4, 0, 0, 0, 5, 8, 15, 0), nrow = 3, , byrow = TRUE, dimnames = list(paste0("token", 1:3), paste0("dim", 1:3)))
	embeddings2 <- embeddings1 * 2
	embeddings_list <- list(embeddings1, embeddings2)

	result <- normalize(embeddings_list)

	# Check that each embedding in the list is normalized
	for (emb in result) {
		magnitudes <- sqrt(rowSums(emb^2))
		expect_equal(as.numeric(magnitudes), c(1, 1, 1))
	}
})

test_that("normalize.default throws error with invalid input", {
	expect_error(normalize("invalid input"), "x must be a numeric vector or an embeddings object")

	df <- data.frame(a = 1:3, b = 4:6)
	expect_error(normalize(df), "x must be a numeric vector or an embeddings object\nFor data frames and matrices, use normalize_rows().")
})

test_that("normalize_rows works with a data frame", {
	df <- data.frame(dim1 = c(3, 0, 5), dim2 = c(4, 0, 12))
	expect_warning(result <- normalize_rows(df), "NaNs produced")

	expected <- apply(df, 1, function(row) {
		row / sqrt(sum(row^2))
	})
	expected <- t(expected)

	expect_equal(as.matrix(result), expected)
})

test_that("normalize_rows works with a matrix", {
	mat <- matrix(c(3, 4, 0, 0, 5, 12), nrow = 3, byrow = TRUE)
	expect_warning(result <- normalize_rows(mat), "NaNs produced")

	expected <- apply(mat, 1, function(row) {
		row / sqrt(sum(row^2))
	})
	expected <- t(expected)

	expect_equal(result, expected)
})

test_that("normalize_rows throws error with invalid input", {
	expect_error(normalize_rows("invalid input"), "x must be a dataframe or matrix")

	vec <- c(1, 2, 3)
	expect_error(normalize_rows(vec), "x must be a dataframe or matrix")
})

test_that("normalize_rows.data.frame works with tidyselect columns", {
	df <- tibble(
		id = c("a", "b", "c"),
		dim1 = c(3, 0, 5),
		dim2 = c(4, 0, 12),
		other = c("x", "y", "z")
	)
	expect_warning(result <- normalize_rows(df, cols = starts_with("dim")), "NaNs produced")

	# Expected normalized columns
	normalized_dims <- apply(df %>% dplyr::select(starts_with("dim")), 1, function(row) {
		row / sqrt(sum(row^2))
	})
	normalized_dims <- t(normalized_dims)

	expected <- dplyr::bind_cols(
		df %>% dplyr::select(-starts_with("dim")),
		as_tibble(normalized_dims)
	)

	expect_equal(result, expected)
})

test_that("normalize_rows.data.frame handles no columns selected", {
	df <- tibble(
		id = c("a", "b", "c"),
		other = c("x", "y", "z")
	)

	result <- normalize_rows(df, cols = starts_with("dim"))

	expect_equal(result, df)
})

test_that("normalize_rows handles rows with zero vectors", {
	mat <- matrix(c(0, 0, 3, 4, 0, 0), nrow = 3, byrow = TRUE)

	expect_warning(result <- normalize_rows(mat), "NaNs produced")
	expect_true(all(is.nan(result[1, ])))
	expect_true(all(is.nan(result[3, ])))
	expect_equal(sqrt(sum(result[2, ]^2)), 1)
})

test_that("normalize_rows handles NA values", {
	mat <- matrix(c(3, 4, NA, NA), nrow = 2)
	result <- normalize_rows(mat)

	expect_true(any(is.na(result)))
})

test_that("normalize.embeddings works even if token_index is missing", {
	embeddings <- embeddings(c(3, 4, 0, 0, 0, 5, 8, 15, 0), nrow = 3, , byrow = TRUE, dimnames = list(paste0("token", 1:3), paste0("dim", 1:3)))
	attr(embeddings, "token_index") <- NULL

	result <- normalize(embeddings)

	expect_true(is.embeddings(result))
	expect_true(!is.null(attr(result, "token_index")))
	magnitudes <- sqrt(rowSums(result^2))
	expect_equal(as.numeric(magnitudes), c(1, 1, 1))
})

test_that("normalize.default handles empty list", {
	result <- normalize(list())
	expect_equal(result, list())
})

test_that("normalize.default handles list with mixed types", {
	vec <- c(3, 4)
	embeddings <- embeddings(c(3, 4, 0, 0, 0, 5, 8, 15, 0), nrow = 3, , byrow = TRUE, dimnames = list(paste0("token", 1:3), paste0("dim", 1:3)))
	lst <- list(vec, embeddings)

	result <- normalize(lst)

	expect_equal(length(result), 2)
	expect_equal(result[[1]], normalize(vec))
	expect_true(is.embeddings(result[[2]]))
})

test_that("normalize_rows.data.frame handles no numeric columns selected", {
	df <- data.frame(
		id = c("a", "b"),
		other = c("x", "y"),
		stringsAsFactors = FALSE
	)

	result <- normalize_rows(df, cols = where(is.numeric))

	expect_equal(result, df)
})

test_that("normalize works with high-dimensional embeddings", {
	embeddings <- embeddings(runif(1000), nrow = 10)

	result <- normalize(embeddings)

	magnitudes <- sqrt(rowSums(result^2))
	expect_equal(as.numeric(magnitudes), rep(1, 10))
})

test_that("normalize.numeric throws error with multi-dimensional array", {
	arr <- array(1:8, dim = c(2, 2, 2))

	expect_error(normalize(arr), "x must be a numeric vector, an embeddings object, or a dataframe with one embedding per row")
})

test_that("normalize.default suggests normalize_rows for data frames", {
	df <- data.frame(a = 1:3, b = 4:6)

	expect_error(normalize(df), "x must be a numeric vector or an embeddings object\nFor data frames and matrices, use normalize_rows().")
})

test_that("normalize_rows.data.frame works with tidyselect helpers", {
	df <- tibble(
		id = c("a", "b"),
		dim1 = c(3, 5),
		dim2 = c(4, 12),
		other = c("x", "y")
	)

	result <- normalize_rows(df, cols = c(dim1, dim2))

	expected_dims <- apply(df %>% dplyr::select(dim1, dim2), 1, function(row) {
		row / sqrt(sum(row^2))
	})
	expected_dims <- t(expected_dims)

	expected <- dplyr::bind_cols(
		df %>% dplyr::select(-dim1, -dim2),
		as_tibble(expected_dims)
	)

	expect_equal(result, expected)
})