################################################################################
##       R PROGRAM: PSE_get_estimates_2021-11-11.R
##
##         PROJECT: Evaluation of multiple-encounter population size
##                  estimators
##
##     DESCRIPTION: Extract and consolidate PSE estimates into a single
##                  data frame and save as both rds and csv.  This program
##                  updates the estimates with new LCM estimates obtained using
##                  gamma prior parameters of 0.25.
##
##           INPUT: ~data/AdditionalEstimates/*.Rdata files containing raw
##                     estimates
##                  ~data/PSE_estimates.rds
##
##          OUTPUT: ~data/PSE_estimates_2021-11-11.rds
##                  ~data/PSE_estimates_2021-11-11.csv
##                  ~output/PSE_estimate_counts_2021-11-11.rds
##                  ~output/PSE_incomplete_estimates_2021-11-11.rds
##                  ~output/Estimate_counts_plus_2021-11-11.csv
##
##      WRITTEN BY: Steve Gutreuter, CDC/CGH/DGHT Statistics, Estimation
##                                   and Modeling Team
##                  sgutreuter@cdc.gov
################################################################################
library(tidyverse)
library(data.table)
basepath <- file.path(Sys.getenv("PROJ"), "PSE/PSEsim")
workpath <- file.path(basepath, "R")
datapath <- file.path(basepath, "data")
newdatapath <- file.path(basepath, "data/Additional_Estimates")
outpath <- file.path(basepath, "output")
setwd(workpath)
source(file.path(workpath, "PSE_sim_functions.R"))
################################################################################
## Create vector of file names
################################################################################
## The "Additional estimates from May 2021"
est.obj1 <- c("BMA_Mb_estimates-a.Rdata", "BMA_Mb_estimates-b.Rdata",
              "BMA_Mbh_estimates-a.Rdata", "BMA_Mbh_estimates-b.Rdata",
              "BMA_Mbht_estimates-a.Rdata", "BMA_Mbht_estimates-b.Rdata",
              "BMA_Mh_estimates-a.Rdata", "BMA_Mh_estimates-b.Rdata",
              "BMA_Mht_estimates-a.Rdata", "BMA_Mht_estimates-b.Rdata",
              "BMA_Mt_estimates-a.Rdata", "BMA_Mt_estimates-b.Rdata",
              "LLM_estimates-1-a.Rdata", "LLM_estimates-2-a.Rdata",
              "LLM_estimates-3-a.Rdata")
## The new LCM estimates using the default prior specification
est.obj2 <- c("LCM_Mb_estimates.Rdata",
              "LCM_Mb_estimates-a.Rdata", "LCM_Mb_estimates-b.Rdata",
              "LCM_Mbh_estimates.Rdata",
              "LCM_Mbh_estimates-a.Rdata", "LCM_Mbh_estimates-b.Rdata",
              "LCM_Mbht_estimates.Rdata",
              "LCM_Mbht_estimates-a.Rdata", "LCM_Mbht_estimates-b.Rdata",
              "LCM_Mh_estimates.Rdata",
              "LCM_Mh_estimates-a.Rdata", "LCM_Mh_estimates-b.Rdata",
              "LCM_Mht_estimates.Rdata",
              "LCM_Mht_estimates-a.Rdata", "LCM_Mht_estimates-b.Rdata",
              "LCM_Mt_estimates.Rdata",
              "LCM_Mt_estimates-a.Rdata", "LCM_Mt_estimates-b.Rdata")
################################################################################
## Combine all estimates
################################################################################
combineRdata <- function(Rdatanames, datapath){
    combined <- data.frame(NULL)
    counts <- data.frame(NULL)
    for(i in seq_along(Rdatanames)){
        efname <- file.path(datapath, Rdatanames[i])
        attach(efname)
        dfnames <- cgwtools::lsdata(efname)
        for(j in seq_along(dfnames)){
            dfj <- get(dfnames[j])
            if(substring(dfnames[j], 1, 3) == "llm"){
                dfj$HPDIcov <- NA
                dfj$HPDCIwidth <- NA
                dfj$HPDlcl <- NA
                dfj$HPDucl <- NA
                dfj$ucl.truncated <- NULL}
            combined <- rbind(combined, dfj)
            x <- data.frame(with(dfj, table(Events)))
            counts <- rbind(counts,
                            cbind(rep(dfnames[j], nrow(x)), x))
        }
        detach()
    }
    names(counts) <- c("Estimates frame", "Events", "Estimates")
    invisible(list(combined = combined, counts = counts))
}
## Get the "Additional estimates from May 2021"
comb.list1 <- combineRdata(est.obj1, datapath = newdatapath)
comb1 <- comb.list1$combined
comb1$Bias <- NULL
## Get the LCM estimates obtained using the default prior
comb.list2 <- combineRdata(est.obj2, datapath = datapath)
comb2 <- comb.list2$combined
comb2$Bias <- NULL
## Combine the two
comb <- rbind(comb1, comb2)
################################################################################
## Recoding
################################################################################
## Code pmean = mean probability of detectection per observation event
Idx1 <- comb$gen.parms == "p = 0.025" |
    comb$gen.parms == "betaparms=(1.32448,51.6548)" |
    comb$gen.parms == "p0=0.025, frac=0.5" |
    comb$gen.parms == "betaparms=(1.32448,51.6548), frac=0.5"
Idx2 <- comb$gen.parms == "p = 0.05" |
    comb$gen.parms == "betaparms=(1.26488,24.0327)" |
    comb$gen.parms == "p0=0.05, frac=0.5" |
    comb$gen.parms == "betaparms=(1.26488,24.0327), frac=0.5"
Idx3 <- comb$gen.parms == "p = 0.1" |
    comb$gen.parms == "betaparms=(1.14567,10.3111)" |
    comb$gen.parms == "p0=0.1, frac=0.5" |
    comb$gen.parms == "betaparms=(1.14567,10.3111), frac=0.5"
Idx4 <- comb$gen.parms == "p = 0.15" |
    comb$gen.parms == "betaparms=(1.02647,5.81667)" |
    comb$gen.parms == "p0=0.15, frac=0.5" |
    comb$gen.parms == "betaparms=(1.02647,5.81667), frac=0.5"
Idx5 <- comb$gen.parms == "p = 0.20" |
    comb$gen.parms == "betaparms=(0.90727,3.62907)" |
    comb$gen.parms == "p0=0.2, frac=0.5" |
    comb$gen.parms == "betaparms=(0.90727,3.62907), frac=0.5"
comb$pmean[Idx1] <- "0.025"
comb$pmean[Idx2] <- "0.050"
comb$pmean[Idx3] <- "0.100"
comb$pmean[Idx4] <- "0.150"
comb$pmean[Idx5] <- "0.200"
## Drop "Poisson 2" from Model values
comb$Model <- str_split(comb$Model, pattern = " ", simplify = TRUE)[, 1]
## Correct Model name Mth to Mht
comb$Model[comb$Model == "Mth"] <- "Mht"            ## Fix Model name Mht
comb$Model[comb$Model == "LCMCR"] <- "LCM"
comb$ModelType <- comb$Model
comb$ModelType[!(comb$Model == "BMA" | comb$Model == "LCM")] <- "LLM"
## Code an indicator of estimation failure (Nest <= 0)
comb$Ifail <- ifelse((comb$Nest <= 0 | is.na(comb$Nest)), 1, 0)
################################################################################
## Get the original estimates for N = 10,000 and add to new estimates
################################################################################
old <- readRDS(file.path(datapath, "PSE_estimates.rds"))
## Delete the old LCM estimates
old <- old %>%
    filter(!Model == "LCM")
comb <- rbind(old, comb)
comb <- comb %>%
    filter(!(gen.modl == "trend" | gen.modl =="M0")) %>%
    arrange(Ntrue, ModelType, gen.modl, pmean, gen.parms, Events, Rep) %>%
    mutate(Model = as.character(Model),
           gen.modl = as.character(gen.modl),
           gen.parms = as.character(gen.parms))
attr(comb, "CreatedBy") <- "PSE_get_estimates_2021-11-11.R"
################################################################################
## Total number of estimates
################################################################################
cat("Total number of estimates: ", Nest_tot <- dim(comb)[1], "\n")
################################################################################
## Frequencies by population size, estimation model, data-generating model,
## encounter probability, generating parameters and number of observation events
################################################################################
EstimateCounts <- comb %>%
    dplyr::select(Ntrue, ModelType, gen.modl, pmean, gen.parms, Events) %>%
    group_by(Ntrue, ModelType, gen.modl, pmean, gen.parms, Events) %>%
    summarize(N_estimates = n()) %>%
    ungroup()
attr(EstimateCounts, "CreatedBy") <- "PSE_get_estimates_2021-11-11.R"
################################################################################
## Identify estimation failures
################################################################################
IncEsts <- EstimateCounts %>%
    filter(N_estimates < 400) %>%
    mutate(Fails = 400 - N_estimates)
attr(IncEsts, "CreatedBy") <- "PSE_get_estimates_2021-11-11.R"
cat("Total number of estimation failures: ",
    Nest_fail <- sum(400 - IncEsts$N_estimates), "\n")
################################################################################
## Estimation failures by ModelType
################################################################################
IncEsts %>%
    select(ModelType, N_estimates, Fails) %>%
    group_by(ModelType) %>%
    dplyr::summarize(EstimationFailures = sum(Fails))
################################################################################
## Frequencies by ModelType
################################################################################
with(comb, table(ModelType))
################################################################################
## Save the data
################################################################################
ctime <- as.character(Sys.time())
attr(comb, "Completion") <- ctime
attr(EstimateCounts, "Completion") <- ctime
attr(IncEsts, "Completion") <- ctime
saveRDS(comb, file = file.path(datapath, "PSE_estimates_2021-11-11.rds"))
fwrite(comb, file = file.path(datapath, "PSE_estimates_2021-11-11.csv"))
saveRDS(EstimateCounts, file = file.path(outpath,
                                         "PSE_estimate_counts_2021-11-11.rds"))
saveRDS(IncEsts, file = file.path(outpath,
                                  "PSE_incomplete_estimates_2021-11-11.rds"))
fwrite(comb.list$counts, file = file.path(outpath,
                                          "Estimate_counts_2021-11-11.csv"))
################################################################################
## Log session information
################################################################################
cat("Execution ended: ", ctime, "\n")
sessionInfo()
################################   END of FILE   ###############################
