test_that("calc_cohesion works with valid input", { # Valid input: 4 responses for each person survey_df <- data.frame( person_id = c(1, 1, 1, 1, 2, 2, 2, 2), question_concept_id = c(40192463, 40192411, 40192499, 40192417, 40192463, 40192411, 40192499, 40192417), answer_concept_id = c(40192514, 40192455, 40192524, 40192408, 40192514, 40192455, 40192422, 40192408) ) # Expected output for valid input (cohesion scores based on the answers) expected_output <- data.frame( person_id = c(1, 2), cohesion = c(3.5, 3.0) # Calculated means for each person ) # Call the function and compare with expected output result <- calc_cohesion(survey_df) expect_equal(result$cohesion, expected_output$cohesion) }) test_that("calc_cohesion handles incomplete responses", { # Incomplete responses: person 2 has fewer than 4 answers survey_df <- data.frame( person_id = c(1, 1, 1, 1, 2, 2, 2), question_concept_id = c(40192463, 40192411, 40192499, 40192417, 40192463, 40192411, 40192499), answer_concept_id = c(40192514, 40192455, 40192524, 40192408, 40192514, 40192455, 40192422) ) # Expected output: person 1 gets a cohesion score, person 2 gets NA expected_output <- data.frame( person_id = c(1, 2), cohesion = c(3.5, NA_real_) ) result <- calc_cohesion(survey_df) expect_equal(result$cohesion, expected_output$cohesion) }) test_that("calc_cohesion handles missing responses", { # Some of the responses are not valid (invalid answer concept ids) survey_df <- data.frame( person_id = c(1, 1, 1, 1, 2, 2, 2, 2), question_concept_id = c(40192463, 40192411, 40192499, 40192417, 40192463, 40192411, 40192499, 40192417), answer_concept_id = c(99999999, 99999999, 99999999, 99999999, # invalid answers for person 1 40192514, 40192455, 40192422, 40192408) # valid for person 2 ) # Expected output: person 1 has no valid answers, so NA; person 2 gets a valid cohesion score expected_output <- data.frame( person_id = c(1, 2), cohesion = c(NA_real_, 3.0) ) result <- calc_cohesion(survey_df) # Sort both result and expected output to avoid order issues result <- result[order(result$person_id), ] expected_output <- expected_output[order(expected_output$person_id), ] expect_equal(result$cohesion, expected_output$cohesion) }) test_that("calc_cohesion handles empty input", { # Empty input case survey_df <- data.frame( person_id = integer(0), question_concept_id = integer(0), answer_concept_id = integer(0) ) result <- calc_cohesion(survey_df) # Expect the result to be an empty data frame expect_equal(nrow(result), 0) }) test_that("calc_cohesion handles invalid column names", { # Input with incorrect column names bad_survey_df <- data.frame( wrong_person_id = c(1, 1, 1, 1), wrong_question_id = c(40192463, 40192411, 40192499, 40192417), wrong_answer_id = c(40192514, 40192455, 40192524, 40192408) ) # Expect an error when the input does not have the correct column names expect_error(calc_cohesion(bad_survey_df), "object 'question_concept_id' not found") }) test_that("calc_cohesion returns NA for participants without any valid answers", { # No valid answers for any participants survey_df <- data.frame( person_id = c(1, 1, 1, 1, 2, 2, 2, 2), question_concept_id = c(40192463, 40192411, 40192499, 40192417, 40192463, 40192411, 40192499, 40192417), answer_concept_id = c(99999999, 99999999, 99999999, 99999999, 99999999, 99999999, 99999999, 99999999) # invalid answers for everyone ) result <- calc_cohesion(survey_df) # Both persons should have NA scores as none of the answers are valid expected_output <- data.frame( person_id = c(1, 2), cohesion = c(NA_real_, NA_real_) ) # Sort both result and expected output to avoid order issues result <- result[order(result$person_id), ] expected_output <- expected_output[order(expected_output$person_id), ] expect_equal(result$cohesion, expected_output$cohesion) })