paste0("Start time: ", format(Sys.time(), "%Y-%m-%d %H:%M:%S %Z", sep = "\n"))
################################################################################
##     R CODE FILE: computeSummaries_2021-11-11.R
##
##         PROJECT: Evaluation of multiple-encounter population size
##                  estimators
##
##     DESCRIPTION: Compute summary measures of the performance of the estimators
##                  for each partition of the simulation.  This version includes
##                  the additional estimates for N = 1,000 and 20,000, and
##                  encounter probability 0.200.
##
##            NOTE: Estimates from the trend data-generating models are
##                  excluded because the values of pmean from those are not
##                  compatible with the other models.
##
##           INPUT: PSE_estimates_2021-11-11.rds
##
##          OUTPUT: Estimate_summaries_by_generator_2021-11-11.rds
##                  Estimate_summaries_by_generator_2021-11-11.csv
##                  Estimate_summaries_sansM0_by_generator_2021-11-11.rds
##                  Estimate_summaries_sansM0_by_generator_2021-11-11.csv
##                  Estimate_summaries_by_pmean_2021-11-11.rds
##                  Estimate_summaries_by_pmean_2021-11-11.csv
##                  Estimate_summaries_sansM0_by_pmean_2021-11-11.rds
##                  Estimate_summaries_sansM0_by_pmean_2021-11-11.csv
##                  Estimate_summaries_2021-11-11.rds
##                  Estimate_summaries_2021-11-11.csv
##                  Estimate_summaries_sansM0_2021-11-11.rds
##                  Estimate_summaries_sansM0_2021-11-11.csv
##
##      WRITTEN BY: Steve Gutreuter                  E-mail:  sgutreuter@cdc.gov
##                  Statistics, Estimation and Modeling Team
##                  Division of Global HIV & TB
##                  Center for Global Health
##                  Centers for Disease Control & Prevention
################################################################################

################################################################################
### Define file paths
################################################################################
basepath <- file.path(Sys.getenv("PROJ"), "PSE/PSEsim")
workpath <- file.path(basepath, "R")
datapath <- file.path(basepath, "data")
outpath <- file.path(basepath, "output")
setwd(workpath)
source(file.path(workpath, "PSE_sim_functions.R"))
library(tidyverse)
library(data.table)
library(e1071)
################################################################################
## Get the PSE estimates
################################################################################
df <- readRDS(file.path(datapath, "PSE_estimates_2021-11-11.rds"))
tstamp <- as.character(Sys.time())
################################################################################
## Assign all log-linear model variants to "Loglinear"
################################################################################
df$Model <- as.character(df$Model)
df$Model[!(df$Model == "BMA" | df$Model == "LCM")] <- "Loglinear"
## Set global indicator for na.rm
narm <- TRUE
################################################################################
## Flag failed variance estimation in loglinear models for exclusion from
## summaries
################################################################################
df <- df %>%
    mutate(ISEfail = ifelse(Model == "Loglinear" & SE < 1, 1, 0))
with(df, table(Model, ISEfail))
################################################################################
## List infinite estimates
################################################################################
df[is.infinite(df$Nest), ]
################################################################################
## Compute summaries by Events, pmean, Model, gen.modl and gen.parms excluding
## estimates from the trend data-generating models which do not have compatible
## values of pmean.
################################################################################
sumry1 <- df %>%
    filter(!(gen.modl == "trend" | ISEfail == 1)) %>%
    group_by(Ntrue, Model, pmean, gen.modl, gen.parms, Events) %>%
    mutate(I_insane = ifelse(Nest > 1E10, 1, 0),
           I_neg = ifelse(Nest < 0, 1, 0)) %>%
    dplyr::summarise(n_ = n(),
              MeanEst = mean(Nest, na.rm = narm),
              RMSE = sqrt(mean((Nest - Ntrue)^2, na.rm = narm)),
              MAE = mean(abs(Nest - Ntrue), na.rm = narm),
              bias =  MeanEst - first(Ntrue),
              SE = sd(Nest, na.rm = narm),
              frac_gt_2True = mean((Nest > (2*Ntrue)), na.rm = narm),
              frac_lt_.5True = mean((Nest < (0.5*Ntrue)), na.rm = narm),
              Q90ratio = quantile(Nest, prob = 0.90, na.rm = narm) /
                  first(Ntrue),
              Coverage = mean(Icoverage, na.rm = narm),
              CoverageHPD = mean(HPDIcov, na.rm = narm),
              MedMOE = median(0.5 * CIwidth, na.rm = narm),
              MedMOEhpd = median(0.5 * (HPDucl - HPDlcl), na.rm = narm),
              skewness = skewness(Nest, na.rm = narm),
              Max = max(Nest, na.rm = narm),
              Ninsane = sum(I_insane),
              Nneg = sum(I_neg)
              )
names(sumry1) <- c("Ntrue", "Model", "pmean", "gen.modl", "gen.parms", "Events",
                   "n", "MeanEst", "RMSE", "MAE", "Bias", "SE", "frac_gt_2True",
                   "frac_lt_.5True", "Q90ratio", "Coverage", "CoverageHPD",
                   "MedMOE", "MedMOEhpd", "skewness", "Max", "n_insane", "n_neg")
cat("Rows: ", dim(sumry1)[1], "\n")
sumry1$gen.modl <- as.character(sumry1$gen.modl)
sumry1$gen.parms <- as.character(sumry1$gen.parms)
sumry1$Model <- factor(sumry1$Model)
attr(sumry1, "Created by") <- "computeSummaries_2021-11-11.R"
attr(sumry1, "Time stamp") <- tstamp
saveRDS(sumry1, file = file.path(outpath,
                                 "Estimate_summaries_by_generator_2021-11-11.rds"))
fwrite(sumry1, file = file.path(outpath,
                                "Estimate_summaries_by_generator_2021-11-11.csv"))
################################################################################
## Compute summaries by Events, pmean, Model, gen.modl and gen.parms, excluding
## estimates from the MO and trend data-generating models
################################################################################
sumry2 <- df %>%
    filter(!(gen.modl == "M0" | gen.modl == "trend" | ISEfail == 1)) %>%
    group_by(Ntrue, Model, pmean, gen.modl, gen.parms, Events) %>%
    mutate(I_insane = ifelse(Nest > 1E10, 1, 0),
           I_neg = ifelse(Nest < 0, 1, 0)) %>%
    summarise(n_ = n(),
              MeanEst = mean(Nest, na.rm = narm),
              RMSE = sqrt(mean((Nest - Ntrue)^2, na.rm = narm)),
              MAE = mean(abs(Nest - Ntrue), na.rm = narm),
              bias = MeanEst - first(Ntrue),
              SE = sd(Nest, na.rm = narm),
              frac_gt_2True = mean((Nest > (2 * Ntrue)), na.rm = narm),
              frac_lt_.5True = mean((Nest < (0.5 * Ntrue)), na.rm = narm),
              Q90ratio = quantile(Nest, prob = 0.90, na.rm = narm) /
                  first(Ntrue),
              Coverage = mean(Icoverage, na.rm = narm),
              CoverageHPD = mean(HPDIcov, na.rm = narm),
              MedMOE = median(0.5 * CIwidth, na.rm = narm),
              MedMOEhpd = median(0.5 * (HPDucl - HPDlcl), na.rm = narm),
              skewness = skewness(Nest, na.rm = narm),
              Max = max(Nest, na.rm = narm),
              Ninsane = sum(I_insane),
              Nneg = sum(I_neg)
              )
names(sumry2) <- c("Ntrue", "Model", "pmean", "gen.modl", "gen.parms", "Events",
                   "n", "MeanEst", "RMSE", "MAE", "Bias", "SE", "frac_gt_2True",
                   "frac_lt_.5True", "Q90ratio", "Coverage", "CoverageHPD",
                   "MedMOE", "MedMOEhpd", "skewness", "Max", "n_insane", "n_neg")
cat("Rows: ", dim(sumry2)[1], "\n")
sumry2$gen.modl <- as.character(sumry2$gen.modl)
sumry2$gen.parms <- as.character(sumry2$gen.parms)
sumry2$Model <- factor(sumry2$Model)
attr(sumry2, "Created by") <- "computeSummaries_2021-11-11.R"
attr(sumry2, "Time stamp") <- tstamp
saveRDS(sumry2,
        file = file.path(outpath,
                         "Estimate_summaries_sansM0_by_generator_2021-11-11.rds"))
fwrite(sumry2,
       file = file.path(outpath,
                        "Estimate_summaries_sansM0_by_generator_2021-11-11.csv"))
################################################################################
## Compute summaries by Events, pmean and Model
################################################################################
sumry3 <- df %>%
    filter(!(gen.modl == "trend" | ISEfail == 1)) %>%
    group_by(Ntrue, Model, pmean, Events) %>%
    mutate(I_insane = ifelse(Nest > 1E10, 1, 0),
           I_neg = ifelse(Nest < 0, 1, 0)) %>%
    summarise(n_ = n(),
              MeanEst = mean(Nest, na.rm = narm),
              RMSE = sqrt(mean((Nest - Ntrue)^2, na.rm = narm)),
              MAE = mean(abs(Nest - Ntrue), na.rm = narm),
              bias = MeanEst - first(Ntrue),
              SE = sd(Nest, na.rm = narm),
              frac_gt_2True = mean((Nest > (2 * Ntrue)), na.rm = narm),
              frac_lt_.5True = mean((Nest < (0.5 * Ntrue)), na.rm = narm),
              Q90ratio = quantile(Nest, prob = 0.90, na.rm = narm) /
                  first(Ntrue),
              Coverage = mean(Icoverage, na.rm = narm),
              CoverageHPD = mean(HPDIcov, na.rm = narm),
              MedMOE = median(0.5 * CIwidth, na.rm = narm),
              MedMOEhpd = median(0.5 * (HPDucl - HPDlcl), na.rm = narm),
              skewness = skewness(Nest, na.rm = narm),
              Max = max(Nest, na.rm = narm),
              Ninsane = sum(I_insane),
              Nneg = sum(I_neg)
              )
names(sumry3) <- c("Ntrue", "Model", "pmean", "Events", "n", "MeanEst", "RMSE",
                   "MAE", "Bias", "SE", "frac_gt_2True", "frac_lt_.5True",
                   "Q90ratio", "Coverage", "CoverageHPD", "MedMOE", "MedMOEhpd",
                   "skewness", "Max", "n_insane", "n_neg")
cat("Rows: ", dim(sumry3)[1], "\n")
sumry3$Model <- factor(sumry3$Model)
attr(sumry3, "Created by") <- "computeSummaries_2021-11-11.R"
attr(sumry3, "Time stamp") <- tstamp
saveRDS(sumry3, file = file.path(outpath, "Estimate_summaries_by_pmean_2021-11-11.rds"))
fwrite(sumry3, file = file.path(outpath, "Estimate_summaries_by_pmean_2021-11-11.csv"))
################################################################################
## Compute summaries by Events, pmean and Model, excluding estimates from the
## MO and trend data-generating models
################################################################################
sumry4 <- df %>%
    filter(!(gen.modl == "M0" | gen.modl == "trend" | ISEfail == 1)) %>%
    group_by(Ntrue, Model, pmean, Events) %>%
    mutate(I_insane = ifelse(Nest > 1E10, 1, 0),
           I_neg = ifelse(Nest < 0, 1, 0)) %>%
    summarise(n_ = n(),
              MeanEst = mean(Nest, na.rm = narm),
              RMSE = sqrt(mean((Nest - Ntrue)^2, na.rm = narm)),
              MAE = mean(abs(Nest - Ntrue), na.rm = narm),
              bias = MeanEst - first(Ntrue),
              SE = sd(Nest, na.rm = narm),
              frac_gt_2True = mean((Nest > (2 * Ntrue)), na.rm = narm),
              frac_lt_.5True = mean((Nest < (0.5 * Ntrue)), na.rm = narm),
              Q90ratio = quantile(Nest, prob = 0.90, na.rm = narm)/first(Ntrue),
              Coverage = mean(Icoverage, na.rm = narm),
              CoverageHPD = mean(HPDIcov, na.rm = narm),
              MedMOE = median(0.5 * CIwidth, na.rm = narm),
              MedMOEhpd = median(0.5 * (HPDucl - HPDlcl), na.rm = narm),
              skewness = skewness(Nest, na.rm = narm),
              Max = max(Nest, na.rm = narm),
              Ninsane = sum(I_insane),
              Nneg = sum(I_neg)
              )
names(sumry4) <- c("Ntrue", "Model", "pmean", "Events", "n", "MeanEst", "RMSE",
                   "MAE", "Bias", "SE", "frac_gt_2True", "frac_lt_.5True",
                   "Q90ratio", "Coverage", "CoverageHPD", "MedMOE", "MedMOEhpd",
                   "skewness", "Max", "n_insane", "n_neg")
cat("Rows: ", dim(sumry4)[1], "\n")
sumry4$Model <- factor(sumry4$Model)
attr(sumry4, "Created by") <- "computeSummaries_2021-11-11.R"
attr(sumry4, "Time stamp") <- tstamp
saveRDS(sumry4,
        file = file.path(outpath,
                         "Estimate_summaries_sansM0_by_pmean_2021-11-11.rds"))
fwrite(sumry4,
       file = file.path(outpath, "Estimate_summaries_sansM0_by_pmean_2021-11-11.csv"))
################################################################################
## Compute summaries by Events and Model
################################################################################
sumry5 <- df %>%
    filter(!(gen.modl == "trend" | ISEfail == 1)) %>%
    group_by(Ntrue, Model, Events) %>%
    mutate(I_insane = ifelse(Nest > 1E10, 1, 0),
           I_neg = ifelse(Nest < 0, 1, 0)) %>%
    summarise(n_ = n(),
              MeanEst = mean(Nest, na.rm = narm),
              RMSE = sqrt(mean((Nest - Ntrue)^2, na.rm = narm)),
              MAE = mean(abs(Nest - Ntrue), na.rm = narm),
              bias = MeanEst - first(Ntrue),
              SE = sd(Nest, na.rm = narm),
              frac_gt_2True = mean((Nest > (2 * Ntrue)), na.rm = narm),
              frac_lt_.5True = mean((Nest < (0.5 * Ntrue)), na.rm = narm),
              Q90ratio = quantile(Nest, prob = 0.90, na.rm = narm)/first(Ntrue),
              Coverage = mean(Icoverage, na.rm = narm),
              CoverageHPD = mean(HPDIcov, na.rm = narm),
              MedMOE = median(0.5 * CIwidth, na.rm = narm),
              MedMOEhpd = median(0.5 * (HPDucl - HPDlcl), na.rm = narm),
              skewness = skewness(Nest, na.rm = narm),
              Max = max(Nest, na.rm = narm),
              Ninsane = sum(I_insane),
              Nneg = sum(I_neg)
              )
names(sumry5) <- c("Ntrue", "Model", "Events", "n", "MeanEst", "RMSE", "MAE",
                   "Bias", "SE", "frac_gt_2True", "frac_lt_.5True", "Q90ratio",
                   "Coverage", "CoverageHPD", "MedMOE", "MedMOEhpd", "skewness",
                   "Max", "n_insane", "n_neg")
cat("Rows: ", dim(sumry5)[1], "\n")
sumry5$Model <- factor(sumry5$Model)
attr(sumry5, "Created by") <- "computeSummaries_2021-11-11.R"
attr(sumry5, "Time stamp") <- tstamp
saveRDS(sumry5, file = file.path(outpath, "Estimate_summaries_2021-11-11.rds"))
fwrite(sumry5, file = file.path(outpath, "Estimate_summaries_2021-11-11.csv"))
################################################################################
## Compute summaries by Events and Model, excluding estimates from the
## MO and trend data-generating models
################################################################################
sumry6 <- df %>%
    filter(!(gen.modl == "M0" | gen.modl == "trend" | ISEfail == 1)) %>%
    group_by(Ntrue, Model, Events) %>%
    mutate(I_insane = ifelse(Nest > 1E10, 1, 0),
           I_neg = ifelse(Nest < 0, 1, 0)) %>%
    summarise(n_ = n(),
              MeanEst = mean(Nest, na.rm = narm),
              RMSE = sqrt(mean((Nest - Ntrue)^2, na.rm = narm)),
              MAE = mean(abs(Nest - Ntrue), na.rm = narm),
              bias = MeanEst - first(Ntrue),
              SE = sd(Nest, na.rm = narm),
              frac_gt_2True = mean((Nest > (2 * Ntrue)), na.rm = narm),
              frac_lt_.5True = mean((Nest < (0.5 * Ntrue)), na.rm = narm),
              Q90ratio = quantile(Nest, prob = 0.90, na.rm = narm)/first(Ntrue),
              Coverage = mean(Icoverage, na.rm = narm),
              CoverageHPD = mean(HPDIcov, na.rm = narm),
              MedMOE = median(0.5 * CIwidth, na.rm = narm),
              MedMOEhpd = median(0.5 * (HPDucl - HPDlcl), na.rm = narm),
              skewness = skewness(Nest, na.rm = narm),
              Max = max(Nest, na.rm = narm),
              Ninsane = sum(I_insane),
              Nneg = sum(I_neg)
              )
names(sumry6) <- c("Ntrue", "Model", "Events", "n", "MeanEst", "RMSE", "MAE",
                   "Bias", "SE", "frac_gt_2True", "frac_lt_.5True", "Q90ratio",
                   "Coverage", "CoverageHPD", "MedMOE", "MedMOEhpd", "skewness",
                   "Max", "n_insane", "n_neg")
cat("Rows: ", dim(sumry6)[1], "\n")
sumry6$Model <- factor(sumry6$Model)
attr(sumry6, "Created by") <- "computeSummaries_2021-11-11.R"
attr(sumry6, "Time stamp") <- tstamp
saveRDS(sumry6, file = file.path(outpath, "Estimate_summaries_sansM0_2021-11-11.rds"))
fwrite(sumry6, file = file.path(outpath, "Estimate_summaries_sansM0_2021-11-11.csv"))
################################################################################
## Log session information
################################################################################
cat("Time stamp: ", tstamp, "\n")
sessionInfo()
#################################  END of FILE  ################################
