This vignette shows DDESONN results across 1000 seeds (two 500-seed runs) and compares them with a Keras parity run compiled in an Excel workbook.
The four RDS artifacts included with the package are under:
inst/extdata/heart_failure_runs/
│
├─ run1/
│ ├─ SingleRun_Train_Acc_Val_Metrics_500_seeds_20251025.rds
│ └─ SingleRun_Test_Metrics_500_seeds_20251025.rds
│
└─ run2/
├─ SingleRun_Train_Acc_Val_Metrics_500_seeds_20251026.rds
└─ SingleRun_Test_Metrics_500_seeds_20251026.rds
Each folder represents one 500-seed single-run performed locally; together they form the 1000-seed composite.
This vignette addresses a focused research question:
Can a fully R-native, from-first-principles neural network implementation achieve competitive statistical stability against an established deep-learning framework under repeated randomized initialization?
The comparison to Keras serves as a reference benchmark, not as an implementation template. DDESONN was built independently from scratch and was not derived from or based on Keras source code. Aside from a brief high-level review to understand benchmarking conventions, all implementation and architectural decisions were made independently.
The objective of the 1000-seed experiment is not to highlight a single favorable run, but to evaluate distributional behavior across seeds. In this context, “competitive” refers to:
The comparison evaluates implementation correctness, training stability, and reproducibility under stress — not code replication or framework mirroring.
suppressPackageStartupMessages({
library(dplyr)
library(tibble)
})
if (!requireNamespace("DDESONN", quietly = TRUE)) {
message("DDESONN not installed in this build session; skipping evaluation.")
knitr::opts_chunk$set(eval = FALSE)
}
.vtbl <- function(x, title = NULL, ...) {
get("ddesonn_viewTables", envir = asNamespace("DDESONN"))(x, title = title, ...)
}
heart_failure_root <- system.file("extdata", "heart_failure_runs", package = "DDESONN")
if (!nzchar(heart_failure_root)) {
# fallback when building vignettes from SOURCE (package not installed yet)
heart_failure_root <- file.path("..", "inst", "extdata", "heart_failure_runs")
}
stopifnot(dir.exists(heart_failure_root))
train_run1_path <- file.path(
heart_failure_root, "run1",
"SingleRun_Train_Acc_Val_Metrics_500_seeds_20251025.rds"
)
test_run1_path <- file.path(
heart_failure_root, "run1",
"SingleRun_Test_Metrics_500_seeds_20251025.rds"
)
train_run2_path <- file.path(
heart_failure_root, "run2",
"SingleRun_Train_Acc_Val_Metrics_500_seeds_20251026.rds"
)
test_run2_path <- file.path(
heart_failure_root, "run2",
"SingleRun_Test_Metrics_500_seeds_20251026.rds"
)
# Ensure required files exist
stopifnot(
file.exists(train_run1_path),
file.exists(test_run1_path),
file.exists(train_run2_path),
file.exists(test_run2_path)
)
train_run1 <- readRDS(train_run1_path)
test_run1 <- readRDS(test_run1_path)
train_run2 <- readRDS(train_run2_path)
test_run2 <- readRDS(test_run2_path)
train_all <- dplyr::bind_rows(train_run1, train_run2)
test_all <- dplyr::bind_rows(test_run1, test_run2)
train_seed <- train_all %>%
group_by(seed) %>%
slice_max(order_by = best_val_acc, n = 1, with_ties = FALSE) %>%
ungroup() %>%
transmute(
seed,
train_acc = best_train_acc,
val_acc = best_val_acc
)
test_seed <- test_all %>%
group_by(seed) %>%
slice_max(order_by = accuracy, n = 1, with_ties = FALSE) %>%
ungroup() %>%
transmute(
seed,
test_acc = accuracy
)
merged <- inner_join(train_seed, test_seed, by = "seed") %>%
arrange(seed)
summarize_column <- function(x) {
pct <- function(p) stats::quantile(x, probs = p, names = FALSE, type = 7)
data.frame(
count = length(x),
mean = mean(x),
std = sd(x),
min = min(x),
`25%` = pct(0.25),
`50%` = pct(0.50),
`75%` = pct(0.75),
max = max(x),
check.names = FALSE
)
}
summary_train <- summarize_column(merged$train_acc)
summary_val <- summarize_column(merged$val_acc)
summary_test <- summarize_column(merged$test_acc)
summary_all <- data.frame(
stat = c("count","mean","std","min","25%","50%","75%","max"),
train_acc = unlist(summary_train[1,]),
val_acc = unlist(summary_val[1,]),
test_acc = unlist(summary_test[1,]),
check.names = FALSE
)
round4 <- function(x) if (is.numeric(x)) round(x, 4) else x
pretty_summary <- as.data.frame(lapply(summary_all, round4))
.vtbl(pretty_summary, title = "DDESONN — 1000-seed summary (train/val/test)")
#> <table>
#> <thead>
#> <tr>
#> <th style="text-align:left;"> stat </th>
#> <th style="text-align:right;"> train_acc </th>
#> <th style="text-align:right;"> val_acc </th>
#> <th style="text-align:right;"> test_acc </th>
#> </tr>
#> </thead>
#> <tbody>
#> <tr>
#> <td style="text-align:left;"> count </td>
#> <td style="text-align:right;"> 1000.0000 </td>
#> <td style="text-align:right;"> 1000.0000 </td>
#> <td style="text-align:right;"> 1000.0000 </td>
#> </tr>
#> <tr>
#> <td style="text-align:left;"> mean </td>
#> <td style="text-align:right;"> 0.9928 </td>
#> <td style="text-align:right;"> 0.9992 </td>
#> <td style="text-align:right;"> 0.9992 </td>
#> </tr>
#> <tr>
#> <td style="text-align:left;"> std </td>
#> <td style="text-align:right;"> 0.0014 </td>
#> <td style="text-align:right;"> 0.0013 </td>
#> <td style="text-align:right;"> 0.0013 </td>
#> </tr>
#> <tr>
#> <td style="text-align:left;"> min </td>
#> <td style="text-align:right;"> 0.9854 </td>
#> <td style="text-align:right;"> 0.9893 </td>
#> <td style="text-align:right;"> 0.9920 </td>
#> </tr>
#> <tr>
#> <td style="text-align:left;"> 25% </td>
#> <td style="text-align:right;"> 0.9920 </td>
#> <td style="text-align:right;"> 0.9987 </td>
#> <td style="text-align:right;"> 0.9987 </td>
#> </tr>
#> <tr>
#> <td style="text-align:left;"> 50% </td>
#> <td style="text-align:right;"> 0.9929 </td>
#> <td style="text-align:right;"> 1.0000 </td>
#> <td style="text-align:right;"> 1.0000 </td>
#> </tr>
#> <tr>
#> <td style="text-align:left;"> 75% </td>
#> <td style="text-align:right;"> 0.9937 </td>
#> <td style="text-align:right;"> 1.0000 </td>
#> <td style="text-align:right;"> 1.0000 </td>
#> </tr>
#> <tr>
#> <td style="text-align:left;"> max </td>
#> <td style="text-align:right;"> 0.9963 </td>
#> <td style="text-align:right;"> 1.0000 </td>
#> <td style="text-align:right;"> 1.0000 </td>
#> </tr>
#> </tbody>
#> </table>Keras parity results are stored in an Excel workbook included with the package under:
inst/extdata/vsKeras/1000SEEDSRESULTSvsKeras/1000seedsKeras.xlsx
The file is accessed programmatically using
system.file() to ensure CRAN-safe, cross-platform
installation paths.
if (!requireNamespace("readxl", quietly = TRUE)) {
message("Skipping keras-summary chunk: 'readxl' not installed.")
} else {
keras_path <- system.file(
"extdata", "vsKeras", "1000SEEDSRESULTSvsKeras", "1000seedsKeras.xlsx",
package = "DDESONN"
)
if (nzchar(keras_path) && file.exists(keras_path)) {
keras_stats <- readxl::read_excel(keras_path, sheet = 2)
.vtbl(keras_stats, title = "Keras — 1000-seed summary (Sheet 2)")
} else {
cat("Keras Excel not found in installed package.\n")
}
}
#> Keras Excel not found in installed package.TestDDESONN_1000seeds.R.TestKeras_1000seeds.py.1000seedsKeras.xlsx under
inst/extdata/vsKeras/1000SEEDSRESULTSvsKeras/.The results shown here were computed locally.
For large-scale experiments (hundreds to thousands of seeds), DDESONN can be executed in distributed environments to significantly reduce wall-clock time. Distributed tooling and development-stage orchestration scripts are maintained in the GitHub repository and intentionally excluded from the CRAN build so that this vignette remains focused on validated results and methodology.