library("testthat")
library("pomdp")

## context("solve_POMDP")

data("Tiger")
sol <- solve_POMDP(Tiger)
expect_identical(nrow(sol$solution$pg[[1]]), 5L)
plot_policy_graph(sol)

sol <- solve_POMDP(Tiger, horizon = 3, method = "incprune")
expect_identical(length(sol$solution$pg), 3L)

reward(sol)
reward(sol, belief = c(0,1))
reward(sol, belief = c(0,1), epoch = 3)

## context("solve_POMDP with terminal values")

# solve 10 epochs
sol <- solve_POMDP(Tiger, discount = 1, horizon = 10, method = "enum")
alpha_horizon <- sol$solution$alpha[[1]]
pg_horizon <- sol$solution$pg[[1]]

# compare with 10 times 1 episode with the last episode as the terminal values
sol <- solve_POMDP(Tiger, discount = 1, horizon = 1, method = "enum")
for(i in 2:10)
  sol <- solve_POMDP(Tiger, discount = 1, horizon = 1, method = "enum", terminal_values = sol$solution$alpha[[1]])
alpha_stepwise <- sol$solution$alpha[[1]]
pg_stepwise <- sol$solution$pg[[1]]


expect_equal(alpha_horizon, alpha_stepwise)
expect_equal(pg_horizon$action, pg_stepwise$action) # transitions do not work


## context("solve_POMDP and model files")

sol <- solve_POMDP(system.file("examples/shuttle.95.POMDP", package = "pomdp"), 
                   parameter = list(fg_points = 10))
plot_policy_graph(sol, show_belief = FALSE)
policy(sol)

## test with some online problems from http://www.pomdp.org/examples/

#problem <- read_POMDP("http://www.pomdp.org/examples/1d.POMDP", parse_matrices = "no")
#sol <- solve_POMDP(problem)

sol <- solve_POMDP("http://www.pomdp.org/examples/1d.POMDP")
plot_policy_graph(sol, show_belief = FALSE)
policy(sol)

sol <- solve_POMDP("http://www.pomdp.org/examples/cheese.95.POMDP")
plot_policy_graph(sol, show_belief = FALSE)
policy(sol)


sol <- solve_POMDP("http://www.pomdp.org/examples/stand-tiger.95.POMDP")
plot_policy_graph(sol, show_belief = FALSE)
policy(sol)

## clean up
unlink("Rplots.pdf")

## test with finite horizon and terminal values
sol <- solve_POMDP(model = Tiger,
                   horizon = 3, discount = 1,  
                   method = "incprune",
                   terminal_values = c(0, 1000)
)
sol

expect_gt(reward(sol), 100)
expect_gt(simulate_POMDP(sol)$avg_reward, 100)
expect_gt(simulate_POMDP(sol, engine = "r", n = 100)$avg_reward, 100)

# grid does not converge to a valid value function
expect_warning(sol <- solve_POMDP(model = Tiger,
                                  horizon = 3, discount = 1,  
                                  method = "grid",
                                  terminal_values = c(0, 1000)
))
sol

expect_warning(reward_node_action(sol))
expect_warning(expect_gt(reward(sol), 100))
expect_gt(simulate_POMDP(sol)$avg_reward, 100)
expect_gt(simulate_POMDP(sol, engine = "r", n = 100)$avg_reward, 100)