R Under development (unstable) (2024-05-04 r86521 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ## Testing the capability of optimizing parameters > require(apsimx) Loading required package: apsimx APSIM(X) not found. If APSIM(X) is installed in an alternative location, set paths manually using 'apsimx_options' or 'apsim_options'. You can still try as the package will look into the registry (under Windows) > require(ggplot2) Loading required package: ggplot2 > apsimx_options(warn.versions = FALSE) > > ## 1. Simulate data from a model with known parameters > ## 2. Change the parameter values and some some random "noise" > ## 3. Run the optimization algorithm > ## 4. Compare optimized parameter values with original ones > > extd.dir <- system.file("extdata", package = "apsimx") > > ## extd.dir <- "~/Dropbox/apsimx/inst/extdata" > > ## I won't routinely run this > #### First set of tests ---- > if(FALSE){ + + ## One model run takes ~4 seconds + ## The original values for these were 1.5 for RUE and 90 for BasePhyllochron + system.time(sim0 <- apsimx("Wheat-opt-ex.apsimx", src.dir = extd.dir)) + + pp1 <- inspect_apsimx_replacement("Wheat-opt-ex.apsimx", + src.dir = extd.dir, + node = "Wheat", + node.child = "Leaf", + node.subchild = "Photosynthesis", + node.subsubchild = "RUE", + parm = "FixedValue", + print.path = TRUE, + display.available = FALSE, + verbose = FALSE) + + pp2 <- inspect_apsimx_replacement("Wheat-opt-ex.apsimx", + src.dir = extd.dir, + node = "Wheat", + node.child = "Cultivars", + node.subchild = "USA", + node.subsubchild = "Yecora", + parm = "BasePhyllochron", + print.path = TRUE, + display.available = FALSE, + verbose = FALSE) + + ## edit_apsimx_replacement("Wheat-opt-ex.apsimx") + + ## Visualize the data + ggplot(sim0, aes(Date, Wheat.AboveGround.Wt)) + + geom_line() + ggplot(sim0, aes(Date, Wheat.Leaf.LAI)) + + geom_line() + ggplot(sim0, aes(Date, Wheat.Phenology.Stage)) + + geom_line() + + ## Create 'observed' data + sample.dates <- seq(from = as.Date("2016-10-01"), to = as.Date("2017-06-01"), length.out = 10) + + set.seed(12345) + obs0 <- sim0[sim0$Date %in% sample.dates, c("Date","Wheat.Phenology.Stage","Wheat.Leaf.LAI","Wheat.AboveGround.Wt")] + obs0$Wheat.Phenology.Stage <- obs0$Wheat.Phenology.Stage + rnorm(10, 0, sd = 0.1) + obs0$Wheat.Leaf.LAI[5:10] <- obs0$Wheat.Leaf.LAI[5:10] + rnorm(6, 0, sd = 0.5) + obs0$Wheat.AboveGround.Wt[3:10] <- obs0$Wheat.AboveGround.Wt[3:10] + rnorm(8, 0, sd = 3) + + obsWheat <- obs0 + save(obsWheat, file = "obsWheat.rda") + + data(obsWheat) + ## We will try to optimize only 2 parameters + ## RUE: default 1.5 + ## Phyllochron for cultivar Yecora Fixed Value 90 + inspect_apsimx_replacement("Wheat-opt-ex.apsimx", + src.dir = extd.dir, + node = "Wheat", + node.child = "Leaf", + node.subchild = "Photosynthesis", + node.subsubchild = "RUE", + parm = "FixedValue", + print.path = TRUE, + display.available = FALSE, + verbose = FALSE) + + inspect_apsimx_replacement("Wheat-opt-ex.apsimx", + src.dir = extd.dir, + node = "Wheat", + node.child = "Cultivars", + node.subchild = "USA", + node.subsubchild = "Yecora", + parm = "BasePhyllochron", + print.path = TRUE, + display.available = FALSE, + verbose = FALSE) + + ## Run the model with incorrect parameters + ## RUE = 1.2 + ## Phyllochron = 120 + tmp <- tempdir() + setwd(tmp) + file.copy(file.path(extd.dir, "Ames.met"), ".") + file.copy(file.path(extd.dir, "Wheat-opt-ex.apsimx"), ".") + + system.time(sim.b4 <- apsimx("Wheat-opt-ex.apsimx", src.dir = tmp)) ## This takes 4-5 seconds + + ## write.csv(sim.b4, "wheat-sim-b4-opt.csv", row.names = FALSE) + + sim.b4.s <- subset(sim.b4, Date > as.Date("2016-09-30") & Date < as.Date("2017-07-01")) + ## phenology + ggplot() + + geom_point(data = obsWheat, aes(x = Date, y = Wheat.Phenology.Stage)) + + geom_line(data = sim.b4.s, aes(x = Date, y = Wheat.Phenology.Stage)) + + ggtitle("Phenology") + ## LAI + ggplot() + + geom_point(data = obsWheat, aes(x = Date, y = Wheat.Leaf.LAI)) + + geom_line(data = sim.b4.s, aes(x = Date, y = Wheat.Leaf.LAI)) + + ggtitle("LAI") + ## Biomass + ggplot() + + geom_point(data = obsWheat, aes(x = Date, y = Wheat.AboveGround.Wt)) + + geom_line(data = sim.b4.s, aes(x = Date, y = Wheat.AboveGround.Wt)) + + ggtitle("Biomass (g/m2)") + + ## Optimization + pp1 <- "Wheat.Leaf.Photosynthesis.RUE.FixedValue" + pp2 <- "Wheat.Cultivars.USA.Yecora.BasePhyllochron" + + # edit_apsimx_replacement("Wheat-b4-opt.apsimx", + # src.dir = ".", + # overwrite = TRUE, + # node = "Wheat", node.child = "Cultivars", + # node.subchild = "USA", node.subsubchild = "Yecora", + # parm = "BasePhyllochron", value = 110) + + ## This takes about ~7.5 minutes + start <- Sys.time() + wop <- optim_apsimx("Wheat-opt-ex.apsimx", + parm.paths = c(pp1, pp2), + data = obsWheat, + weights = "mean", + replacement = c(TRUE, TRUE), + initial.values = c(1.2, 120)) + end <- Sys.time() + + ## This took 8.8 minutes + start <- Sys.time() + wop.h <- optim_apsimx("Wheat-opt-ex.apsimx", + src.dir = extd.dir, + parm.paths = c(pp1, pp2), + data = obsWheat, + weights = "mean", + replacement = c(TRUE, TRUE), + initial.values = c(1.2, 120), + hessian = TRUE) + end <- Sys.time() + + sim.opt <- apsimx("Wheat-opt-ex.apsimx", src.dir = extd.dir, value = "report") + + sim.opt.s <- subset(sim.opt, Date > as.Date("2016-09-30") & Date < as.Date("2017-07-01")) + + ## For vignette + ## write.csv(sim.opt.s, file = "wheat-sim-opt.csv", row.names = FALSE) + + ## phenology + ggplot() + + geom_point(data = obsWheat, aes(x = Date, y = Wheat.Phenology.Stage)) + + geom_line(data = sim.opt.s, aes(x = Date, y = Wheat.Phenology.Stage)) + + ggtitle("Phenology") + ## LAI + ggplot() + + geom_point(data = obsWheat, aes(x = Date, y = Wheat.Leaf.LAI)) + + geom_line(data = sim.opt.s, aes(x = Date, y = Wheat.Leaf.LAI)) + + ggtitle("LAI") + ## Biomass + ggplot() + + geom_point(data = obsWheat, aes(x = Date, y = Wheat.AboveGround.Wt)) + + geom_line(data = sim.opt.s, aes(x = Date, y = Wheat.AboveGround.Wt)) + + ggtitle("Biomass (g/m2)") + + ## What about nloptr? + start <- Sys.time() + wop.n <- optim_apsimx("Wheat-opt-ex.apsimx", + src.dir = extd.dir, + parm.paths = c(pp1, pp2), + data = obsWheat, + weights = "mean", + replacement = c(TRUE, TRUE), + initial.values = c(1.2, 120), + type = "nloptr", + opts = list("algorithm" = "NLOPT_LN_NELDERMEAD")) + end <- Sys.time() + + ## Test BayesianTools??? Would need to embbed it inside optim + ## How long does this take in Windows laptop? 500 iter, 9 chains: 3464 seconds ~ 1 hour + ## 500 iter, 3 chains: 18 minutes + ## 5000 iter, 3 chains: 4 hours, but still not good enough + start <- Sys.time() + wop.mcmc <- optim_apsimx("Wheat-opt-ex.apsimx", + src.dir = extd.dir, + parm.paths = c(pp1, pp2), + data = obsWheat, + weights = "mean", + replacement = c(TRUE, TRUE), + initial.values = c(1.2, 120), + type = "mcmc", + parallel = FALSE, + settings = list(iterations = 5000)) + end <- Sys.time() + + } > > #### Classic #### > if(FALSE){ + + tmp <- tempdir() + setwd(tmp) + + apsim_options(warn.versions = FALSE) + + ## Testing optimization for an APSIM Classic example + ## Maize + if(file.exists("Maize.apsim")) file.remove("Maize.apsim") + if(file.exists("Maize.xml")) file.remove("Maize.xml") + if(file.exists("Maize.out")) file.remove("Maize.out") + if(file.exists("Maize.sum")) file.remove("Maize.sum") + + extd.dir <- system.file("extdata", package = "apsimx") + file.copy(file.path(extd.dir, "Maize-op.apsim"), "./Maize.apsim") + file.copy("C:/Program Files (x86)/APSIM710-r4207/Model/Maize.xml", ".") + + inspect_apsim("Maize.apsim") + + inspect_apsim("Maize.apsim", node = "Crop", + parm = list("Sow", 3)) + + edit_apsim("Maize.apsim", node = "Clock", + parm = "start_date", value = "01/01/1987", + overwrite = TRUE) + + sim0 <- apsim("Maize.apsim") + + obsMaize <- data.frame(Date = as.Date(c("1988-02-23", "1989-02-25")), + biomass = c(4600, 5000), + yield = c(350, 570)) + + pp1 <- inspect_apsim_xml("Maize.xml", parm = "sc401/rue") + pp2 <- inspect_apsim_xml("Maize.xml", parm = "sc401/GNmaxCoef") + + ## Optimize a single parameter + op01 <- optim_apsim("Maize.apsim", + crop.file = "Maize.xml", + parm.paths = pp2, + data = obsMaize, + hessian = TRUE, + weights = "mean") + + + op1 <- optim_apsim("Maize.apsim", + crop.file = "Maize.xml", + parm.paths = c(pp1, pp2), + data = obsMaize, + weights = "mean") + + file.remove("Maize.xml") + file.copy("C:/Program Files (x86)/APSIM710-r4207/Model/Maize.xml", ".") + + op2 <- optim_apsim("Maize.apsim", + crop.file = "Maize.xml", + parm.paths = c(pp1, pp2), + data = obsMaize, + hessian = TRUE, + weights = "mean") + + # op2.u <- optim_apsim("Maize.apsim", + # crop.file = "Maize.xml", + # parm.paths = c(pp1, pp2), + # type = "ucminf", + # data = obsMaize, + # hessian = TRUE, + # weights = "mean") + + op2nl <- optim_apsim("Maize.apsim", + crop.file = "Maize.xml", + parm.paths = c(pp1, pp2), + data = obsMaize, + type = "nloptr", + weights = "mean", + opts = list("algorithm" = "NLOPT_LN_NELDERMEAD")) + + sim2 <- apsim("Maize.apsim") + + ## Can I also optimize management? + pp3 <- inspect_apsim("Maize.apsim", node = "Crop", + parm = list("Sow", 17), + print.path = TRUE) + + pp4 <- inspect_apsim("Maize.apsim", node = "Crop", + parm = list("Sow", 28), + print.path = TRUE) + + ## The goal of this is to show that you + ## can try to optimize parameters which are in the + ## .apsim and .xml file + op3 <- optim_apsim("Maize.apsim", + crop.file = "Maize.xml", + parm.paths = c(pp2, pp4), + xml.parm = c(TRUE, FALSE), + data = obsMaize, + weights = "mean", + hessian = TRUE) + + inspect_apsim("Maize.apsim", node = "Crop", + parm = list("Sow", 17), + print.path = TRUE) + + inspect_apsim("Maize.apsim", node = "Crop", + parm = list("Sow", 28), + print.path = TRUE) + + + if(file.exists("Maize.apsim")) file.remove("Maize.apsim") + if(file.exists("Maize.xml")) file.remove("Maize.xml") + if(file.exists("Maize.out")) file.remove("Maize.out") + if(file.exists("Maize.sum")) file.remove("Maize.sum") + } > > ## Testing the optimization of a single parameter > ## comparing methods > if(FALSE){ + + tmp <- tempdir() + setwd(tmp) + + extd.dir <- system.file("extdata", package = "apsimx") + + file.copy(file.path(extd.dir, "Ames.met"), ".") + file.copy(file.path(extd.dir, "Wheat-opt-ex.apsimx"), ".") + + sim0 <- apsimx("Wheat-opt-ex.apsimx") + + pp1 <- "Wheat.Leaf.Photosynthesis.RUE.FixedValue" + pp2 <- "Wheat.Cultivars.USA.Yecora.BasePhyllochron" + + ## Testing the "unreliable" method + start <- Sys.time() + wop.1 <- optim_apsimx("Wheat-opt-ex.apsimx", + parm.paths = pp1, + data = obsWheat, + replacement = TRUE, + initial.values = 1.2) + end <- Sys.time() ## It took 3.25 minutes + + ## Plus hessian + file.remove("Wheat-opt-ex.apsimx") + file.copy(file.path(extd.dir, "Wheat-opt-ex.apsimx"), ".") + + start <- Sys.time() + wop.h.1 <- optim_apsimx("Wheat-opt-ex.apsimx", + parm.paths = pp1, + data = obsWheat, + replacement = TRUE, + hessian = TRUE, + initial.values = 1.2) + end <- Sys.time() ## It took 3.4 minutes + + ## Erase and try Brent + file.remove("Wheat-opt-ex.apsimx") + file.copy(file.path(extd.dir, "Wheat-opt-ex.apsimx"), ".") + + start <- Sys.time() + wop.b.1 <- optim_apsimx("Wheat-opt-ex.apsimx", + parm.paths = pp1, + data = obsWheat, + method = "Brent", + lower = 0.5, upper = 2, + replacement = TRUE, + initial.values = 1.2) + end <- Sys.time() ## It took 2.18 minutes + ## Brent, naturally, will not provide the best solution if + ## the solution is outside the interval (lower, upper) + + ## Erase and try L-BFGS-B + file.remove("Wheat-opt-ex.apsimx") + file.copy(file.path(extd.dir, "Wheat-opt-ex.apsimx"), ".") + + start <- Sys.time() + wop.bfgs.1 <- optim_apsimx("Wheat-opt-ex.apsimx", + parm.paths = pp1, + data = obsWheat, + method = "L-BFGS-B", + lower = 0.5, + hessian = TRUE, + replacement = TRUE, + initial.values = 1.2, + control = list(trace = 3)) + end <- Sys.time() ## It took 7.02 minutes + + ## Next step is to test whether weighting affects the estimate of the vcov + ## and how different it is to the Bayesian analysis + + ## Erase and try L-BFGS-B + file.remove("Wheat-opt-ex.apsimx") + file.copy(file.path(extd.dir, "Wheat-opt-ex.apsimx"), ".") + + start <- Sys.time() + wop.bfgs.2 <- optim_apsimx("Wheat-opt-ex.apsimx", + parm.paths = pp1, + data = obsWheat, + weights = "mean", + method = "L-BFGS-B", + lower = 0.5, + hessian = TRUE, + replacement = TRUE, + initial.values = 1.2, + control = list(trace = 3)) + end <- Sys.time() ## It took 4.16 minutes + + ## Testing with two parameters + file.remove("Wheat-opt-ex.apsimx") + file.copy(file.path(extd.dir, "Wheat-opt-ex.apsimx"), ".") + ## Testing with two parameters + start <- Sys.time() + wop.2 <- optim_apsimx("Wheat-opt-ex.apsimx", + parm.paths = c(pp1, pp2), + data = obsWheat, + replacement = c(TRUE, TRUE), + hessian = TRUE, + initial.values = c(1.2, 120)) + end <- Sys.time() ## It took 15.7 minutes + + ## Testing with two parameters, without the hessian + file.remove("Wheat-opt-ex.apsimx") + file.copy(file.path(extd.dir, "Wheat-opt-ex.apsimx"), ".") + ## Testing with two parameters + start <- Sys.time() + wop.nh.2 <- optim_apsimx("Wheat-opt-ex.apsimx", + parm.paths = c(pp1, pp2), + data = obsWheat, + replacement = c(TRUE, TRUE), + initial.values = c(1.2, 120)) + end <- Sys.time() ## It took 13.97 minutes + + file.remove("Wheat-opt-ex.apsimx") + file.copy(file.path(extd.dir, "Wheat-opt-ex.apsimx"), ".") + ## Testing with two parameters + start <- Sys.time() + wop.3 <- optim_apsimx("Wheat-opt-ex.apsimx", + parm.paths = c(pp1, pp2), + data = obsWheat, + type = "ucminf", + weights = "mean", + replacement = c(TRUE, TRUE), + hessian = TRUE, + initial.values = c(1.2, 120)) + end <- Sys.time() ## It took 28.97 minutes and it did not find the right answer + } > > #### Testing grid method ---- > if(FALSE){ + + data(obsWheat) + + system.time(sim0 <- apsimx("Wheat-opt-ex.apsimx", src.dir = extd.dir)) + + cmp.b4 <- compare_apsim(obsWheat, sim0, labels = c("obs", "sim"), cRSS = TRUE) + ### pre-optimized log-RSS + log(cmp.b4$cRSS) + + ## Creating a grid for two parameters + pp1 <- inspect_apsimx_replacement("Wheat-opt-ex.apsimx", + src.dir = extd.dir, + node = "Wheat", + node.child = "Leaf", + node.subchild = "Photosynthesis", + node.subsubchild = "RUE", + parm = "FixedValue", + print.path = TRUE, + display.available = FALSE, + verbose = FALSE) + + pp2 <- inspect_apsimx_replacement("Wheat-opt-ex.apsimx", + src.dir = extd.dir, + node = "Wheat", + node.child = "Cultivars", + node.subchild = "USA", + node.subsubchild = "Yecora", + parm = "BasePhyllochron", + print.path = TRUE, + display.available = FALSE, + verbose = FALSE) + + pgrd <- expand.grid(RUE = c(0.8, 1, 1.2, 1.4, 1.6, 1.8, 2.0), + BasePhyllochron = c(40, 60, 80, 100, 120, 140, 160)) + + tmp <- tempdir() + setwd(tmp) + file.copy(file.path(extd.dir, "Ames.met"), ".") + file.copy(file.path(extd.dir, "Wheat-opt-ex.apsimx"), ".") + + system.time(sim.b4 <- apsimx("Wheat-opt-ex.apsimx", src.dir = tmp)) ## This takes 4-5 seconds + xrgs <- xargs_apsimx(single.threaded = TRUE) + system.time(sim.b4 <- apsimx("Wheat-opt-ex.apsimx", src.dir = tmp, xargs = xrgs)) ## This takes 4-5 seconds + + + start <- Sys.time() + wop.grd <- optim_apsimx("Wheat-opt-ex.apsimx", + src.dir = tmp, + parm.paths = c(pp1, pp2), + data = obsWheat, + type = "grid", + replacement = c(TRUE, TRUE), + initial.values = list(RUE = 1.2, BasePhyllochron = 120), + verbose = TRUE, + grid = pgrd, + cores = 2) + end <- Sys.time() ## It took 6.1 minutes + ## Took 3.67 minutes using two cores and single-thread (Mac 2017) + + ggplot(wop.grd$res, aes(x = RUE, y = lrss)) + + facet_wrap(~BasePhyllochron) + + geom_point() + + geom_line() + + pp1 <- inspect_apsimx_replacement("Wheat-opt-ex.apsimx", + src.dir = ".", + node = "Wheat", + node.child = "Leaf", + node.subchild = "Photosynthesis", + node.subsubchild = "RUE", + parm = "FixedValue", + print.path = TRUE, + display.available = FALSE, + verbose = FALSE) + + pp2 <- inspect_apsimx_replacement("Wheat-opt-ex.apsimx", + src.dir = ".", + node = "Wheat", + node.child = "Cultivars", + node.subchild = "USA", + node.subsubchild = "Yecora", + parm = "BasePhyllochron", + print.path = TRUE, + display.available = FALSE, + verbose = FALSE) + + ## Compare to optim + start <- Sys.time() + wop.nm <- optim_apsimx("Wheat-opt-ex.apsimx", + src.dir = tmp, + parm.paths = c(pp1, pp2), + data = obsWheat, + replacement = c(TRUE, TRUE), + initial.values = list(RUE = 1.8, BasePhyllochron = 120)) + end <- Sys.time() ## It took 5.7 minutes + + ### Maybe we need a fine grid around the true values? + pgrd3 <- expand.grid(RUE = c(1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9), + BasePhyllochron = c(70, 75, 80, 85, 90, 95, 100)) + + start <- Sys.time() + wop.grd3 <- optim_apsimx("Wheat-opt-ex.apsimx", + src.dir = tmp, + parm.paths = c(pp1, pp2), + data = obsWheat, + type = "grid", + replacement = c(TRUE, TRUE), + initial.values = list(RUE = 1.5, BasePhyllochron = 90), + verbose = TRUE, + grid = pgrd3, + cores = 3) + end <- Sys.time() ## It took 6.63 minutes + + ggplot(wop.grd3$res, aes(x = RUE, y = lrss)) + + facet_wrap(~BasePhyllochron) + + geom_point() + + geom_line() + + system.time(sim.a4 <- apsimx("Wheat-opt-ex.apsimx", src.dir = tmp)) ## This takes 4-5 seconds + wop.grd3 + cmp <- compare_apsim(obsWheat, sim.a4, labels = c("obs", "sim"), cRSS = TRUE) + } > > proc.time() user system elapsed 1.06 0.17 1.21