diff --git a/DESCRIPTION b/DESCRIPTION index 5c76d87..409aae7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -94,3 +94,4 @@ Collate: 'struct_equiv.R' 'struct_test.R' 'survey_to_diffnet.R' + 'transmission.R' diff --git a/NAMESPACE b/NAMESPACE index 4a6bdac..1660f50 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -91,6 +91,7 @@ export(as.dgCMatrix) export(as_dgCMatrix) export(as_diffnet) export(as_spmat) +export(as_transmission_tree) export(bass_F) export(bass_dF) export(bass_f) @@ -186,6 +187,7 @@ export(threshold) export(toa_diff) export(toa_mat) export(transformGraphBy) +export(transmission_tree) export(vertex_covariate_compare) export(vertex_covariate_dist) export(vertex_mahalanobis_dist) diff --git a/NEWS.md b/NEWS.md index 05b028b..f416c1f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,6 +11,9 @@ * New dataset `epigames` and `epigamesDiffNet`: a simulated epidemic game network with 594 nodes and 15 time periods from the WKU Epi Games study. +* `exposure()` and `rdiffnet()` now support `mode = "stochastic"`, allowing + for probabilistic interpretation of edge weights in exposure calculations. + ## Internal changes * Fixed CRAN example error in `round_to_seq()`: `plot(w, x)` replaced with @@ -19,7 +22,6 @@ * Removed `configure` framework. R already provides paths and configuration for OpenMP. - # Changes in netdiffuseR version 1.24.0 (2025-12-09) * New function `degree_adoption_diagnostic()` analyzes the correlation between network diff --git a/R/adjmat.r b/R/adjmat.r index ca5bc83..cdd5847 100644 --- a/R/adjmat.r +++ b/R/adjmat.r @@ -537,6 +537,29 @@ toa_mat.default <- function(per, t0, t1) { ) } +# Build (adopt, cumadopt) from (toa, tod) intervals for a single behavior. +# Each node i is considered adopted on periods [toa[i], tod[i] - 1]; when +# tod[i] is NA the adoption is absorbing and the interval runs through t1. +cumadopt_from_intervals <- function(toa, tod, t0, t1, labels = NULL) { + n <- length(toa) + T <- t1 - t0 + 1L + adopt <- matrix(0L, nrow = n, ncol = T) + cumadopt <- matrix(0L, nrow = n, ncol = T) + for (i in seq_len(n)) { + s_val <- toa[i] + if (is.na(s_val)) next + s <- as.integer(s_val) - t0 + 1L + e_val <- tod[i] + e <- if (is.na(e_val)) T else (as.integer(e_val) - 1L - t0 + 1L) + if (s >= 1L && s <= T) adopt[i, s] <- 1L + if (s <= e && s >= 1L && e <= T && e >= 1L) cumadopt[i, s:e] <- 1L + } + rn <- if (length(labels)) labels else seq_len(n) + dimnames(adopt) <- list(rn, t0:t1) + dimnames(cumadopt) <- list(rn, t0:t1) + list(adopt = adopt, cumadopt = cumadopt) +} + # @rdname toa_mat # @export toa_mat.numeric <- function(times, labels=NULL, diff --git a/R/data.r b/R/data.r index 4ed3b83..9fe59d1 100644 --- a/R/data.r +++ b/R/data.r @@ -980,6 +980,21 @@ NULL # "epigames" #' A directed dynamic graph with 594 vertices and 15 time periods. The attributes #' in the graph are described in \code{\link{epigames}}. #' +#' By default, this \code{diffnet} object is **non-cumulative** (each slice represents +#' ephemeral daily contacts) and **valued** (edge weights represent contact duration in seconds). +#' +#' To reconstruct the classic cumulative/binarized network, you can run: +#' +#' \preformatted{ +#' epigames_cumul <- epigamesDiffNet +#' +#' # 1. Accumulate the history across time periods +#' epigames_cumul$graph <- Reduce("+", epigames_cumul$graph, accumulate = TRUE) +#' +#' # 2. Apply a logical cut-off to binarize the network +#' epigames_cumul$graph <- lapply(epigames_cumul$graph, function(m) { m@x[] <- 1; m }) +#' } +#' #' Non-adopters have \code{toa = NA}. #' #' @format A \code{\link{diffnet}} class object. diff --git a/R/degree_adoption_diagnostic.R b/R/degree_adoption_diagnostic.R index 1fa9ada..7d1600d 100644 --- a/R/degree_adoption_diagnostic.R +++ b/R/degree_adoption_diagnostic.R @@ -78,12 +78,14 @@ #' #' # Different degree aggregation strategies #' result_first <- degree_adoption_diagnostic(kfamilyDiffNet, degree_strategy = "first") -#' result_last <- degree_adoption_diagnostic(kfamilyDiffNet, degree_strategy = "last") +#' result_last <- degree_adoption_diagnostic(kfamilyDiffNet, degree_strategy = "last") #' #' # Multi-diffusion (toy) ---------------------------------------------------- #' set.seed(999) -#' n <- 40; t <- 5; q <- 2 -#' garr <- rgraph_ws(n, t, p=.3) +#' n <- 40 +#' t <- 5 +#' q <- 2 +#' garr <- rgraph_ws(n, t, p = .3) #' diffnet_multi <- rdiffnet(seed.graph = garr, t = t, seed.p.adopt = rep(list(0.1), q)) #' #' # pooled (one combined analysis) @@ -96,20 +98,19 @@ #' @family statistics #' @export degree_adoption_diagnostic <- function( - graph, - degree_strategy = c("mean", "first", "last"), - bootstrap = TRUE, - R = 1000, - conf.level = 0.95, - toa = NULL, - t0 = NULL, t1 = NULL, - name = NULL, - behavior = NULL, - combine = c("none", "pooled", "average", "earliest"), - min_adopters = 3, - valued = getOption("diffnet.valued", FALSE), - ... -) { + graph, + degree_strategy = c("mean", "first", "last"), + bootstrap = TRUE, + R = 1000, + conf.level = 0.95, + toa = NULL, + t0 = NULL, t1 = NULL, + name = NULL, + behavior = NULL, + combine = c("none", "pooled", "average", "earliest"), + min_adopters = 3, + valued = getOption("diffnet.valued", FALSE), + ...) { # Check that bootstrap is a logical scalar if (!is.logical(bootstrap) || length(bootstrap) != 1 || is.na(bootstrap)) { stop("'bootstrap' must be a logical scalar") @@ -154,8 +155,10 @@ degree_adoption_diagnostic <- function( } behavior_indices <- match(behavior, colnames(toa)) if (any(is.na(behavior_indices))) { - stop("Some behavior names not found in colnames(toa): ", - paste(behavior[is.na(behavior_indices)], collapse = ", ")) + stop( + "Some behavior names not found in colnames(toa): ", + paste(behavior[is.na(behavior_indices)], collapse = ", ") + ) } } else if (is.numeric(behavior)) { behavior_indices <- behavior @@ -186,8 +189,10 @@ degree_adoption_diagnostic <- function( combined_data <- prepare_combined_data(degrees, toa, combine, min_adopters, Q) if (nrow(combined_data) < min_adopters) { - stop("Insufficient adopters for correlation analysis. (n=", nrow(combined_data), - ", minimum = ", min_adopters, ").") + stop( + "Insufficient adopters for correlation analysis. (n=", nrow(combined_data), + ", minimum = ", min_adopters, ")." + ) } # Compute correlations @@ -200,8 +205,8 @@ degree_adoption_diagnostic <- function( NULL } - # Determine if undirected (graph is always a diffnet here) - undirected <- isTRUE(is_undirected(graph)) + # Determine if undirected by checking matrices + undirected <- check_undirected_graph(graph) # Return results structure(list( @@ -232,8 +237,12 @@ process_graph_input <- function(graph, toa, t0, t1, name, ...) { # If graph is a list, ensure all elements are dgCMatrix if (is.list(graph)) { graph <- lapply(graph, function(g) { - if (inherits(g, "dgCMatrix")) return(g) - if (is.matrix(g)) return(as(Matrix::Matrix(g, sparse = TRUE), "dgCMatrix")) + if (inherits(g, "dgCMatrix")) { + return(g) + } + if (is.matrix(g)) { + return(as(Matrix::Matrix(g, sparse = TRUE), "dgCMatrix")) + } stop("All elements of the graph list must be matrices or dgCMatrix.") }) } @@ -271,31 +280,16 @@ compute_degree_measures <- function(graph, degree_strategy, valued) { indegree <- rowMeans(dgr(graph, cmode = "indegree", valued = valued), na.rm = TRUE) outdegree <- rowMeans(dgr(graph, cmode = "outdegree", valued = valued), na.rm = TRUE) } else { - deg_matrix <- dgr(graph, valued = valued) - if (length(dim(deg_matrix)) == 3) { - # Dynamic case - if (degree_strategy == "first") { - indegree <- deg_matrix[, 1, "indegree"] - outdegree <- deg_matrix[, 1, "outdegree"] - } else if (degree_strategy == "last") { - last_time <- dim(deg_matrix)[2] - indegree <- deg_matrix[, last_time, "indegree"] - outdegree <- deg_matrix[, last_time, "outdegree"] - } - } else if (length(dim(deg_matrix)) == 2) { - # Static case: check for column names, else use position - cn <- colnames(deg_matrix) - if (!is.null(cn) && all(c("indegree", "outdegree") %in% cn)) { - indegree <- deg_matrix[, "indegree"] - outdegree <- deg_matrix[, "outdegree"] - } else if (ncol(deg_matrix) >= 2) { - indegree <- deg_matrix[, 1] - outdegree <- deg_matrix[, 2] - } else { - stop("Degree matrix does not have expected columns for static graph.") - } - } else { - stop("Unexpected degree matrix dimensions in compute_degree_measures.") + # Request in-degree and out-degree separately and explicitly + indeg_mat <- dgr(graph, cmode = "indegree", valued = valued) + outdeg_mat <- dgr(graph, cmode = "outdegree", valued = valued) + + if (degree_strategy == "first") { + indegree <- if (is.matrix(indeg_mat)) indeg_mat[, 1] else indeg_mat + outdegree <- if (is.matrix(outdeg_mat)) outdeg_mat[, 1] else outdeg_mat + } else { # last + indegree <- if (is.matrix(indeg_mat)) indeg_mat[, ncol(indeg_mat)] else indeg_mat + outdegree <- if (is.matrix(outdeg_mat)) outdeg_mat[, ncol(outdeg_mat)] else outdeg_mat } } @@ -328,8 +322,8 @@ analyze_multi_behaviors_separately <- function(degrees, toa, min_adopters, boots toa = toa_q[adopters_q] ) - correlations_matrix[1, q] <- cor_safe(data_q$indegree, data_q$toa ) - correlations_matrix[2, q] <- cor_safe(data_q$outdegree, data_q$toa ) + correlations_matrix[1, q] <- cor_safe(data_q$indegree, data_q$toa) + correlations_matrix[2, q] <- cor_safe(data_q$outdegree, data_q$toa) sample_sizes[q] <- nrow(data_q) if (bootstrap) { @@ -341,11 +335,7 @@ analyze_multi_behaviors_separately <- function(degrees, toa, min_adopters, boots } # Determine if undirected - undirected <- if (inherits(graph, "diffnet")) { - is_undirected(graph) - } else { - check_undirected_graph(graph) - } + undirected <- check_undirected_graph(graph) structure(list( correlations = correlations_matrix, @@ -391,7 +381,9 @@ prepare_combined_data <- function(degrees, toa, combine, min_adopters, Q) { } else if (combine == "earliest") { # Earliest TOA across behaviors per actor toa_min <- apply(toa, 1, function(row) { - if (all(is.na(row))) return(NA_real_) + if (all(is.na(row))) { + return(NA_real_) + } min(row, na.rm = TRUE) }) toa_min[is.infinite(toa_min)] <- NA @@ -414,12 +406,12 @@ compute_correlations <- function(data) { compute_bootstrap_results <- function(combined_data, R, conf.level) { # Compute baseline correlations - base_corr <- compute_correlations(combined_data) - indeg_corr <- base_corr[["indegree_toa"]] + base_corr <- compute_correlations(combined_data) + indeg_corr <- base_corr[["indegree_toa"]] outdeg_corr <- base_corr[["outdegree_toa"]] indeg_boot_list <- NULL - out_boot_list <- NULL + out_boot_list <- NULL # Out-degree if (!is.na(outdeg_corr)) { @@ -430,13 +422,16 @@ compute_bootstrap_results <- function(combined_data, R, conf.level) { } boot_obj_out <- boot::boot(combined_data, statistic = safe_bootstrap_out, R = R) bias_out <- mean(boot_obj_out$t, na.rm = TRUE) - outdeg_corr - se_out <- stats::sd(boot_obj_out$t, na.rm = TRUE) - - ci_out <- tryCatch({ - bci <- boot::boot.ci(boot_obj_out, conf = conf.level, type = "perc") - # Percentile CI vector (low, high) - if (!is.null(bci$percent)) bci$percent[4:5] else NULL - }, error = function(e) NULL) + se_out <- stats::sd(boot_obj_out$t, na.rm = TRUE) + + ci_out <- tryCatch( + { + bci <- boot::boot.ci(boot_obj_out, conf = conf.level, type = "perc") + # Percentile CI vector (low, high) + if (!is.null(bci$percent)) bci$percent[4:5] else NULL + }, + error = function(e) NULL + ) out_boot_list <- list( correlation = outdeg_corr, @@ -462,12 +457,15 @@ compute_bootstrap_results <- function(combined_data, R, conf.level) { } boot_obj_in <- boot::boot(combined_data, statistic = safe_bootstrap_in, R = R) bias_in <- mean(boot_obj_in$t, na.rm = TRUE) - indeg_corr - se_in <- stats::sd(boot_obj_in$t, na.rm = TRUE) - - ci_in <- tryCatch({ - bci <- boot::boot.ci(boot_obj_in, conf = conf.level, type = "perc") - if (!is.null(bci$percent)) bci$percent[4:5] else NULL - }, error = function(e) NULL) + se_in <- stats::sd(boot_obj_in$t, na.rm = TRUE) + + ci_in <- tryCatch( + { + bci <- boot::boot.ci(boot_obj_in, conf = conf.level, type = "perc") + if (!is.null(bci$percent)) bci$percent[4:5] else NULL + }, + error = function(e) NULL + ) indeg_boot_list <- list( correlation = indeg_corr, @@ -504,11 +502,15 @@ create_empty_result <- function(degree_strategy, original_call, combine, sample_ } check_undirected_graph <- function(graph) { + # If the input is a diffnet, we extract its raw list of matrices + if (inherits(graph, "diffnet")) { + graph <- graph$graph + } if (is.list(graph)) { return(all(sapply(graph, function(g) isSymmetric(as.matrix(g))))) } if (is.array(graph) && length(dim(graph)) == 3) { - return(all(sapply(seq_len(dim(graph)[3]), function(t) isSymmetric(as.matrix(graph[,,t]))))) + return(all(sapply(seq_len(dim(graph)[3]), function(t) isSymmetric(as.matrix(graph[, , t]))))) } if (is.matrix(graph)) { return(isSymmetric(as.matrix(graph))) @@ -568,7 +570,7 @@ print_single_behavior_results <- function(x, undirected) { # Print correlations cat("Correlations:\n") if (undirected) { - deg_r <- indeg_r # For undirected graphs, in-degree = out-degree = degree + deg_r <- indeg_r # For undirected graphs, in-degree = out-degree = degree cat(sprintf(" Degree - Time of Adoption: %.3f\n", deg_r)) } else { cat(sprintf(" In-degree - Time of Adoption: %.3f\n", indeg_r)) @@ -582,16 +584,24 @@ print_single_behavior_results <- function(x, undirected) { bootstrap_data <- x$bootstrap deg_ci <- if (undirected && !is.null(bootstrap_data$indegree$conf_int)) { bootstrap_data$indegree$conf_int - } else NULL + } else { + NULL + } indeg_ci <- if (!is.null(bootstrap_data$indegree$conf_int)) { bootstrap_data$indegree$conf_int - } else NULL + } else { + NULL + } outdeg_ci <- if (!is.null(bootstrap_data$outdegree$conf_int)) { bootstrap_data$outdegree$conf_int - } else NULL + } else { + NULL + } lvl <- if (!is.null(bootstrap_data$indegree$conf_level)) { bootstrap_data$indegree$conf_level * 100 - } else NA_real_ + } else { + NA_real_ + } if (undirected) { explain_degree_correlation("Degree", deg_r, deg_ci, lvl_arg = lvl) @@ -648,16 +658,24 @@ print_multi_behavior_results <- function(x, undirected) { bootstrap_data <- if (!is.null(x$bootstrap)) x$bootstrap[[j]] else NULL deg_ci <- if (undirected && !is.null(bootstrap_data) && !is.null(bootstrap_data$indegree$conf_int)) { bootstrap_data$indegree$conf_int - } else NULL + } else { + NULL + } indeg_ci <- if (!is.null(bootstrap_data) && !is.null(bootstrap_data$indegree$conf_int)) { bootstrap_data$indegree$conf_int - } else NULL + } else { + NULL + } outdeg_ci <- if (!is.null(bootstrap_data) && !is.null(bootstrap_data$outdegree$conf_int)) { bootstrap_data$outdegree$conf_int - } else NULL + } else { + NULL + } lvl <- if (!is.null(bootstrap_data) && !is.null(bootstrap_data$indegree$conf_level)) { bootstrap_data$indegree$conf_level * 100 - } else NA_real_ + } else { + NA_real_ + } cat(sprintf(" [%s]\n", bname)) if (undirected) { @@ -696,14 +714,20 @@ explain_degree_correlation <- function(label, r, ci, lvl_arg = NA_real_, thr = 0 format_interpretation_no_ci <- function(label, r, abs_big, degree_term, thr) { if (!abs_big) { - cat(sprintf(" %s: Weak relationship between %s and adoption timing:\n |r| \u2264 %.1f; no CI.\n", - label, degree_term, thr)) - } else if (r > 0) { - cat(sprintf(" %s: Central actors (high %s) tended to adopt early (supporters):\n |r| > %.1f; no CI.\n", - label, degree_term, thr)) + cat(sprintf( + " %s: Weak relationship between %s and adoption timing:\n |r| \u2264 %.1f; no CI.\n", + label, degree_term, thr + )) + } else if (r < 0) { + cat(sprintf( + " %s: Central actors (high %s) tended to adopt early (supporters):\n |r| > %.1f; no CI.\n", + label, degree_term, thr + )) } else { - cat(sprintf(" %s: Central actors (high %s) tended to adopt late (opposers):\n |r| > %.1f; no CI.\n", - label, degree_term, thr)) + cat(sprintf( + " %s: Central actors (high %s) tended to adopt late (opposers):\n |r| > %.1f; no CI.\n", + label, degree_term, thr + )) } } @@ -711,34 +735,51 @@ format_interpretation_with_ci <- function(label, r, ci, abs_big, degree_term, th lvl_local <- if (!is.na(lvl_arg)) lvl_arg else 95 ci_includes_zero <- (length(ci) >= 2) && is.finite(ci[1]) && is.finite(ci[2]) && (ci[1] <= 0 && ci[2] >= 0) + ci_low <- if (length(ci) >= 1) ci[1] else NA_real_ + ci_high <- if (length(ci) >= 2) ci[2] else NA_real_ + if (!abs_big) { - cat(sprintf(" %s: Weak relationship between %s and adoption timing; %s statistically supported:\n |r| \u2264 %.1f; CI (%.1f%%) %s 0.\n", - label, degree_term, - if (ci_includes_zero) "NOT" else "", - thr, lvl_local, - if (ci_includes_zero) "includes" else "excludes")) - } else if (r > 0) { - cat(sprintf(" %s: Central actors (high %s) tended to adopt early (supporters); %s statistically supported:\n |r| > %.1f; CI (%.1f%%) %s 0.\n", - label, degree_term, - if (ci_includes_zero) "NOT" else "", - thr, lvl_local, - if (ci_includes_zero) "includes" else "excludes")) + cat(sprintf( + " %s: Weak relationship between %s and adoption timing; %s statistically supported:\n |r| \u2264 %.1f; CI (%.1f%%) = [%.3f, %.3f]\n", + label, degree_term, + if (ci_includes_zero) "NOT" else "", + thr, lvl_local, + ci_low, ci_high + )) + } else if (r < 0) { + cat(sprintf( + " %s: Central actors (high %s) tended to adopt early (supporters); %s statistically supported:\n |r| > %.1f; CI (%.1f%%) = [%.3f, %.3f]\n", + label, degree_term, + if (ci_includes_zero) "NOT" else "", + thr, lvl_local, + ci_low, ci_high + )) } else { - cat(sprintf(" %s: Central actors (high %s) tended to adopt late (opposers); %s statistically supported:\n |r| > %.1f; CI (%.1f%%) %s 0.\n", - label, degree_term, - if (ci_includes_zero) "NOT" else "", - thr, lvl_local, - if (ci_includes_zero) "includes" else "excludes")) + cat(sprintf( + " %s: Central actors (high %s) tended to adopt late (opposers); %s statistically supported:\n |r| > %.1f; CI (%.1f%%) = [%.3f, %.3f]\n", + label, degree_term, + if (ci_includes_zero) "NOT" else "", + thr, lvl_local, + ci_low, ci_high + )) } } # Safe correlation: returns NA (no warnings) if zero-variance or too few pairs cor_safe <- function(x, y) { - x <- as.numeric(x); y <- as.numeric(y) + x <- as.numeric(x) + y <- as.numeric(y) ok <- is.finite(x) & is.finite(y) - if (!any(ok)) return(NA_real_) - x <- x[ok]; y <- y[ok] - if (length(x) < 2L) return(NA_real_) - if (sd(x) == 0 || sd(y) == 0) return(NA_real_) + if (!any(ok)) { + return(NA_real_) + } + x <- x[ok] + y <- y[ok] + if (length(x) < 2L) { + return(NA_real_) + } + if (sd(x) == 0 || sd(y) == 0) { + return(NA_real_) + } stats::cor(x, y) } diff --git a/R/diffnet-class.r b/R/diffnet-class.r index d92ec8d..867ab1a 100644 --- a/R/diffnet-class.r +++ b/R/diffnet-class.r @@ -356,6 +356,13 @@ check_as_diffnet_attrs <- function( #' @param as.df Logical scalar. When TRUE returns a data.frame. #' @param name Character scalar. Name of the diffusion network (descriptive). #' @param behavior Character vector. Name of the behavior(s) been analyzed (innovation). +#' @param tod Optional numeric vector of size \eqn{n}. Times of disadoption +#' (e.g. recovery, removal). When supplied, each non-\code{NA} entry must be +#' strictly greater than the corresponding \code{toa} and \code{NA} whenever +#' \code{toa} is \code{NA}. Node \eqn{i} is considered adopted over the closed +#' interval \eqn{[\mathrm{toa}_i, \mathrm{tod}_i - 1]}; \code{NA} in \code{tod} +#' means the adoption is absorbing. Currently only single-behavior (vector) +#' \code{tod} is supported. #' #' @seealso Default options are listed at \code{\link{netdiffuseR-options}} #' @details @@ -572,7 +579,8 @@ new_diffnet <- function( self = getOption("diffnet.self"), multiple = getOption("diffnet.multiple"), name = "Diffusion Network", - behavior = NULL + behavior = NULL, + tod = NULL ) { # Step 0.0: Check if its diffnet! -------------------------------------------- @@ -623,7 +631,7 @@ new_diffnet <- function( } # Step 2.1: Checking class of TOA and coercing if necessary - if (!inherits(toa, "integer")) { + if (!is.integer(toa)) { warning("Coercing -toa- into integer.") toa[] <- as.integer(toa) @@ -639,9 +647,50 @@ new_diffnet <- function( } } + # Step 2.3: Validating -tod- (time of disadoption), if provided ------------- + if (length(tod)) { + + if (num_of_behaviors > 1L) + stop("Multi-behavior -tod- is not supported yet. Provide a vector -tod- ", + "with a single behavior, or leave -tod- as NULL.") + + if (!is.null(dim(tod))) + stop("-tod- must be a vector of the same length as -toa-.") + + if (length(tod) != length(toa)) + stop("-tod- and -toa- have different lengths (", length(tod), " and ", + length(toa), " respectively).") + + if (!is.integer(tod)) { + warning("Coercing -tod- into integer.") + tod_names <- names(tod) + tod <- as.integer(tod) + names(tod) <- tod_names + } + + has_both <- !is.na(toa) & !is.na(tod) + if (any(tod[has_both] <= toa[has_both])) + stop("-tod- must be strictly greater than -toa- for every node with ", + "both values defined (an adopter must remain so for at least one ", + "period before disadopting).") + + if (any(!is.na(tod) & is.na(toa))) + stop("-tod- is defined for some nodes that never adopted (NA in -toa-). ", + "-tod- entries must be NA wherever -toa- is NA.") + + if (!length(names(tod))) names(tod) <- names(toa) + } + # Step 3.1: Creating Time of adoption matrix --------------------------------- mat <- toa_mat(toa, labels = meta$ids, t0=t0, t1=t1) + # Step 3.1b: Rebuilding cumadopt using [toa, tod-1] intervals if -tod- set + if (length(tod) && num_of_behaviors == 1L) { + intervals <- cumadopt_from_intervals(toa, tod, t0 = t0, t1 = t1, + labels = meta$ids) + mat$cumadopt <- intervals$cumadopt + } + # Step 3.2: Verifying dimensions and fixing meta$pers if (num_of_behaviors==1) { @@ -775,12 +824,14 @@ new_diffnet <- function( list( graph = graph, toa = toa, + tod = tod, adopt = adopt, cumadopt = cumadopt, # Attributes vertex.static.attrs = vertex.static.attrs, vertex.dyn.attrs = vertex.dyn.attrs, graph.attrs = graph.attrs, + transmission = NULL, meta = meta ), class="diffnet" diff --git a/R/rdiffnet.r b/R/rdiffnet.r index 122938f..353bc1f 100644 --- a/R/rdiffnet.r +++ b/R/rdiffnet.r @@ -22,11 +22,19 @@ #' it can also be an \eqn{n \times Q} matrix or a list of \eqn{Q} single behavior inputs. Sets the adoption #' threshold for each node. #' @param exposure.args List. Arguments to be passed to \code{\link{exposure}}. +#' @param exposure.mode Character scalar. Either "deterministic" (default) or "stochastic". #' @param name Character scalar. Passed to \code{\link{as_diffnet}}. #' @param behavior Character scalar or a list or character scalar (multiple behaviors only). Passed to \code{\link{as_diffnet}}. #' @param stop.no.diff Logical scalar. When \code{TRUE}, the function will return #' with error if there was no diffusion. Otherwise it throws a warning. #' @param disadopt Function of disadoption, with current exposition, cumulative adoption, and time as possible inputs. +#' @param adoption_model Character scalar. Either \code{"deterministic"} (default; +#' adopter iff \code{exposure >= threshold}) or \code{"stochastic"} (adopter with +#' probability \code{plogis(beta0 + beta_expo * exposure)}). +#' @param adoption_pars Named list. Required when +#' \code{adoption_model = "stochastic"}: must supply \code{beta0} (intercept) +#' and \code{beta_expo} (slope on exposure). Ignored when +#' \code{adoption_model = "deterministic"}. #' @return A random \code{\link{diffnet}} class object. #' @family simulation functions #' @details @@ -101,6 +109,10 @@ #' \code{normalized} \tab \code{TRUE} #' } #' +#' When \code{exposure.mode = "stochastic"}, the \code{valued} argument in +#' \code{exposure.args} is forced to \code{TRUE} (with a message) to ensure that +#' edge weights are treated as probabilities. +#' #' @examples #' # (Single behavior): -------------------------------------------------------- #' @@ -399,12 +411,22 @@ rdiffnet <- function( rewire.args = list(), threshold.dist = runif(n), exposure.args = list(), + exposure.mode = "deterministic", name = "A diffusion network", behavior = "Random contagion", stop.no.diff = TRUE, - disadopt = NULL + disadopt = NULL, + adoption_model = c("deterministic", "stochastic"), + adoption_pars = NULL ) { + adoption_model <- match.arg(adoption_model) + if (adoption_model == "stochastic") { + if (is.null(adoption_pars$beta0) || is.null(adoption_pars$beta_expo)) + stop("-adoption_pars- must supply both -beta0- and -beta_expo- ", + "when -adoption_model- = \"stochastic\".") + } + # Checking options for (arg in names(default_rewire.args)) if (!length(rewire.args[[arg]])) @@ -414,6 +436,14 @@ rdiffnet <- function( if (!length(exposure.args[[arg]])) exposure.args[[arg]] <- default_exposure.args[[arg]] + exposure.args$mode <- exposure.mode + + # If stochastic mode is selected, ensure valued is TRUE (enabling weights as probabilities) + if (exposure.mode == "stochastic" && !exposure.args$valued) { + message("exposure.mode='stochastic' requires valued=TRUE to use weights as probabilities. Setting exposure.args$valued=TRUE.") + exposure.args$valued <- TRUE + } + if (inherits(exposure.args[["attrs"]], "matrix")) { # Checking if the attrs matrix is has dims n x t if (any(dim(exposure.args[["attrs"]]) != dim(matrix(NA, nrow = n, ncol = t)))) { @@ -553,8 +583,15 @@ rdiffnet <- function( for (q in 1:num_of_behaviors) { - # 3.2 Identifying who adopts based on the threshold - whoadopts <- which( (expo[,,q] >= thr[,q]) & is.na(toa[,q])) + # 3.2 Identifying who adopts under the configured adoption model + if (adoption_model == "stochastic") { + e_q <- as.vector(expo[, , q]) + p <- stats::plogis(adoption_pars$beta0 + + adoption_pars$beta_expo * e_q) + whoadopts <- which((stats::runif(length(p)) < p) & is.na(toa[, q])) + } else { + whoadopts <- which((expo[, , q] >= thr[, q]) & is.na(toa[, q])) + } # 3.3 Updating the cumadopt cumadopt[whoadopts, i:t, q] <- 1L diff --git a/R/stats.R b/R/stats.R index dadcac6..833bea9 100644 --- a/R/stats.R +++ b/R/stats.R @@ -275,6 +275,21 @@ dgr.array <- function(graph, cmode, undirected, self, valued) { #' @param groupvar Passed to \code{\link{struct_equiv}}. #' @param lags Integer scalar. When different from 0, the resulting exposure #' matrix will be the lagged exposure as specified (see examples). +#' @param mode Character scalar. Either "deterministic" (default) or "stochastic". +#' @param link_fun Character scalar or function. Kernel applied to the +#' (valued) edge weights before exposure is computed. Supported names: +#' \code{"identity"} (default, no transformation), \code{"linear"} +#' (\eqn{\min(\beta w, 1)}), \code{"sigmoid"} +#' (\eqn{\mathrm{plogis}((w - h)/\mathrm{scale})}), and \code{"wells-riley"} +#' (\eqn{1 - \exp(-\beta w)}). Alternatively, a user-supplied +#' single-argument function \code{function(w)} with its parameters +#' baked into the closure; it must return a vector of the same length +#' as \code{w}. When \code{link_fun} is not \code{"identity"}, +#' \code{valued} is forced to \code{TRUE} (with a warning if the user +#' set it to \code{FALSE}). +#' @param link_pars Named list with the scalar parameters required by +#' the named kernels (\code{"linear"}, \code{"sigmoid"}, +#' \code{"wells-riley"}). Ignored when \code{link_fun} is a function. #' @details #' Exposure is calculated as follows: #' @@ -330,6 +345,25 @@ dgr.array <- function(graph, cmode, undirected, self, valued) { #' computed as a count instead of a proportion. A good example of this can be #' found at the examples section of the function \code{\link{rdiffnet}}. #' +#' \strong{Stochastic Exposure} +#' +#' When \code{mode = "stochastic"}, the exposure is calculated based on a probabilistic +#' interpretation of the edges. In this mode, the weights of the graph \eqn{S_t} are +#' treated as probabilities of transmission. For each edge \eqn{(i,j)}, a Bernoulli +#' trial is performed with probability \eqn{S_{t,ij}}. If the trial is successful, +#' the edge is "realized" as a full connection. If failed, the edge is treated +#' as non-existent. +#' +#' The denominator is calculated using the degree of the node, representing the total +#' number of potential contacts. +#' +#' \deqn{ +#' \tilde{E}_{ti} = \frac{\sum_{j \neq i} \mathbb{I}(U_{ij} < S_{t,ij}) a_{tj}}{\sum_{j \neq i} 1} +#' } +#' +#' Where \eqn{S_{t,ij}} is the weight of the edge from \eqn{j} to \eqn{i} at time \eqn{t} +#' (treated as probability), and \eqn{U_{ij} \sim \text{Uniform}(0,1)}. +#' #' @references #' Burt, R. S. (1987). "Social Contagion and Innovation: Cohesion versus Structural #' Equivalence". American Journal of Sociology, 92(6), 1287. @@ -494,8 +528,55 @@ dgr.array <- function(graph, cmode, undirected, self, valued) { #' @name exposure NULL +# Link / kernel function applied to edge weights before exposure is computed. +# `link_fun` is either one of the named kernels listed below or a +# user-supplied single-argument function `function(w)` that returns a vector +# of the same length as `w` (the non-zero entries of a dgCMatrix, `@x`). +# Parameters for user functions are expected to be baked into the closure. +.apply_link_kernel <- function(W, link_fun, link_pars) { + if (is.null(link_fun) || + (is.character(link_fun) && identical(link_fun, "identity"))) + return(W) + + if (is.function(link_fun)) { + new_x <- link_fun(W@x) + if (length(new_x) != length(W@x)) + stop("Custom -link_fun- must return a vector of the same length as ", + "its input (the non-zero edge weights).") + W@x <- as.numeric(new_x) + return(W) + } + + if (!is.character(link_fun) || length(link_fun) != 1L) + stop("-link_fun- must be NULL, a character scalar, or a function.") + + W@x <- switch( + link_fun, + "linear" = { + if (is.null(link_pars$beta)) + stop("link_fun = \"linear\" requires link_pars$beta.") + pmin(link_pars$beta * W@x, 1) + }, + "sigmoid" = { + if (is.null(link_pars$h) || is.null(link_pars$scale)) + stop("link_fun = \"sigmoid\" requires link_pars$h and link_pars$scale.") + stats::plogis((W@x - link_pars$h) / link_pars$scale) + }, + "wells-riley" = { + if (is.null(link_pars$beta)) + stop("link_fun = \"wells-riley\" requires link_pars$beta.") + 1 - exp(-link_pars$beta * W@x) + }, + stop("Unknown link_fun: ", link_fun, + ". Supported: identity, linear, sigmoid, wells-riley, or a function.") + ) + W +} + # Workhorse of exposure plotting -.exposure <- function(graph, cumadopt, attrs, outgoing, valued, normalized, self) { +.exposure <- function(graph, cumadopt, attrs, outgoing, valued, normalized, self, + mode = "deterministic", + link_fun = "identity", link_pars = list()) { # Getting the parameters n <- nrow(graph) @@ -513,23 +594,63 @@ NULL # Checking self if (!self) graph <- sp_diag(graph, rep(0, nnodes(graph))) - norm <- graph %*% attrs + 1e-20 + # Apply link / kernel function to edge weights (pre stochastic / normalization) + graph <- .apply_link_kernel(graph, link_fun, link_pars) + + # Calculate normalization and apply stochastic filter + if (mode == "stochastic") { + # Stochastic mode interprets edge weights as Bernoulli probabilities. + # Values outside [0, 1] saturate the sampler (>1 always fires, <0 + # never fires), which is almost never what the user intended when + # weights come from raw quantities (e.g. seconds of contact). Warn + # and let the caller decide whether to apply a `link_fun`. + if (length(graph@x) && + (any(graph@x < 0, na.rm = TRUE) || + any(graph@x > 1, na.rm = TRUE))) { + warning("Stochastic exposure expects edge weights in [0, 1] ", + "(Bernoulli probabilities). Found values outside this ", + "range; the sampler will saturate. Consider applying a ", + "-link_fun- such as \"wells-riley\" or \"linear\" that ", + "maps weights into [0, 1].") + } + + # Denominator: count non-zero-weight neighbours. Using `graph@x != 0` + # instead of `rep(1, ...)` keeps the denominator aligned with the + # kernel output -- structurally-stored zeros (e.g. from a link kernel + # that maps some weights to 0, or from user-supplied 0 weights) do + # not inflate the degree. + graph_binary <- graph + graph_binary@x <- as.numeric(graph@x != 0) + norm <- as.vector(graph_binary %*% attrs) + 1e-20 + + # Numerator: Stochastic Filter (Bernoulli -> Binary) + u <- stats::runif(length(graph@x)) + graph@x <- as.numeric(u < graph@x) + } else { + # Deterministic: Based on original weights + norm <- as.vector(graph %*% attrs) + 1e-20 + } - if (!is.na(dim(cumadopt)[3])) { + if (length(dim(cumadopt)) == 3) { ans <- array(0, dim = c(dim(cumadopt)[1],dim(cumadopt)[3])) for (q in 1:dim(cumadopt)[3]) { + # Calculate numerator: Realized connections * Adoption status + numerator <- as.vector(graph %*% (attrs * cumadopt[,,q])) + if (normalized) { - ans[,q] <- as.vector(graph %*% (attrs * cumadopt[,,q]) / norm) + ans[,q] <- numerator / norm } else { - ans[,q] <- as.vector(graph %*% (attrs * cumadopt[,,q])) + ans[,q] <- numerator } } } else { - ans <- graph %*% (attrs * cumadopt) + numerator <- as.vector(graph %*% (attrs * cumadopt)) if (normalized) { - ans <- ans/ norm + ans <- numerator / norm + } else { + ans <- numerator } } @@ -570,9 +691,22 @@ exposure <- function( groupvar = NULL, self = getOption("diffnet.self"), lags = 0L, + mode = "deterministic", + link_fun = "identity", + link_pars = list(), ... ) { + # When a non-identity link kernel is requested, edge weights become the + # kernel input and must be preserved (binarizing would collapse them). + is_identity_link <- (is.null(link_fun)) || + (is.character(link_fun) && identical(link_fun, "identity")) + if (!is_identity_link && !valued) { + warning("-link_fun- different from \"identity\" requires valued edges; ", + "forcing -valued- to TRUE.") + valued <- TRUE + } + # Checking diffnet attributes if (length(attrs) == 1 && inherits(attrs, "character")) { if (!inherits(graph, "diffnet")) @@ -595,18 +729,55 @@ exposure <- function( if (!inherits(graph, "diffnet")) { stop("-cumadopt- should be provided when -graph- is not of class 'diffnet'") } else { - cumadopt <- toa_mat(graph)$cumadopt + cumadopt <- graph$cumadopt + + # Ensure rownames are present if graph is diffnet + if (is.list(cumadopt) && !is.data.frame(cumadopt)) { + for (i in seq_along(cumadopt)) { + if (is.null(rownames(cumadopt[[i]]))) { + rownames(cumadopt[[i]]) <- graph$meta$ids + } + } + } else if (is.null(rownames(cumadopt))) { + rownames(cumadopt) <- graph$meta$ids + } } + # Handling list of matrices (multi-behavior diffnet) + if (is.list(cumadopt) && !is.data.frame(cumadopt)) { + # Check if it is a list of matrices + if (all(sapply(cumadopt, function(x) length(dim(x)) == 2))) { + # Convert to array + n_c <- nrow(cumadopt[[1]]) + t_c <- ncol(cumadopt[[1]]) + q_c <- length(cumadopt) + cumadopt_arr <- array(0, dim = c(n_c, t_c, q_c)) + + # Preserve dimnames + dimnames(cumadopt_arr) <- list( + rownames(cumadopt[[1]]), + colnames(cumadopt[[1]]), + names(cumadopt) + ) + + for (i in 1:q_c) { + cumadopt_arr[,,i] <- as.matrix(cumadopt[[i]]) + } + cumadopt <- cumadopt_arr + } else { + warning("cumadopt is a list but elements do not appear to be matrices. Exposure calculation may fail.") + } + } + # Checking diffnet graph if (inherits(graph, "diffnet")) graph <- graph$graph # Checking attrs if (!length(attrs)) { - if (!is.na(dim(cumadopt)[3])) { + if (length(dim(cumadopt)) == 3) { attrs <- array(1, dim = c(nrow(cumadopt), ncol(cumadopt), 1))} else {attrs <- matrix(1, ncol=ncol(cumadopt), nrow=nrow(cumadopt))} - } else if (!is.na(dim(cumadopt)[3])) { + } else if (length(dim(cumadopt)) == 3) { attrs <- array(attrs, dim = c(nrow(attrs), ncol(attrs), 1)) } @@ -653,7 +824,8 @@ exposure <- function( if ((is.array(graph) & !inherits(graph, "matrix")) | is.list(graph)) { exposure.list(as_spmat(graph), cumadopt, attrs, outgoing, valued, normalized, - self, lags) + self, lags, mode = mode, + link_fun = link_fun, link_pars = link_pars) } else stopifnot_graph(graph) } @@ -661,7 +833,8 @@ exposure <- function( # @export exposure.list <- function( graph, cumadopt, attrs, - outgoing, valued, normalized, self, lags) { + outgoing, valued, normalized, self, lags, mode = "deterministic", + link_fun = "identity", link_pars = list()) { # attrs can be either # degree, indegree, outdegree, or a user defined vector. @@ -670,7 +843,7 @@ exposure.list <- function( # dim(attrs) default n x T matrix of 1's if (!length(dim(attrs))) stop("-attrs- must be a matrix of size n by T.") - if (!is.na(dim(cumadopt)[3])) { + if (length(dim(cumadopt)) == 3) { if (dim(cumadopt)[3]>1 && any(dim(attrs)[-3] != dim(cumadopt)[-3])) stop("Incorrect size for -attrs-. ", "Does not match n dim or t dim.") } else { @@ -681,7 +854,8 @@ exposure.list <- function( add_dimnames.mat(attrs) output <- exposure_for(graph, cumadopt, attrs, outgoing, valued, normalized, - self, lags) + self, lags, mode = mode, + link_fun = link_fun, link_pars = link_pars) dimnames(output) <- dimnames(cumadopt) output @@ -696,10 +870,13 @@ exposure_for <- function( valued, normalized, self, - lags + lags, + mode = "deterministic", + link_fun = "identity", + link_pars = list() ) { - if (!is.na(dim(cumadopt)[3])) { + if (length(dim(cumadopt)) == 3) { out <- array(NA, dim = c(dim(cumadopt)[1], dim(cumadopt)[2], dim(cumadopt)[3])) if (lags >= 0L) { @@ -710,7 +887,10 @@ exposure_for <- function( outgoing = outgoing, valued = valued, normalized = normalized, - self = self) + self = self, + mode = mode, + link_fun = link_fun, + link_pars = link_pars) } } else { for (i in (1 - lags):nslices(graph)) { @@ -720,7 +900,10 @@ exposure_for <- function( outgoing = outgoing, valued = valued, normalized = normalized, - self = self) + self = self, + mode = mode, + link_fun = link_fun, + link_pars = link_pars) } } } else { @@ -734,7 +917,10 @@ exposure_for <- function( outgoing = outgoing, valued = valued, normalized = normalized, - self = self) + self = self, + mode = mode, + link_fun = link_fun, + link_pars = link_pars) } } else { for (i in (1 - lags):nslices(graph)) { @@ -744,7 +930,10 @@ exposure_for <- function( outgoing = outgoing, valued = valued, normalized = normalized, - self = self) + self = self, + mode = mode, + link_fun = link_fun, + link_pars = link_pars) } } } diff --git a/R/transmission.R b/R/transmission.R new file mode 100644 index 0000000..a450778 --- /dev/null +++ b/R/transmission.R @@ -0,0 +1,137 @@ +# Transmission tree handling for diffnet objects. +# +# The $transmission slot is a list with the following elements: +# - tree: data.frame with columns +# date integer, period when the transmission happened +# source integer, row index of the infector in x (NA for seeds) +# target integer, row index of the infectee in x +# source_exposure_date integer, period when `source` was infected (NA for seeds) +# virus_id integer, optional virus identifier +# virus character, optional virus label +# - pars: list, free-form parameters/metadata associated with the tree. +# Each row represents one infection event (an edge in the transmission tree); +# the set of (source -> target) pairs forms the directed forest from which +# offspring distributions (Lloyd-Smith et al., 2005) and likelihood-based +# reproduction-number estimates (White & Pagano, 2008) are derived. + +.transmission_cols <- c( + "date", "source", "target", "source_exposure_date", "virus_id", "virus" +) + +.empty_transmission_tree <- function() { + data.frame( + date = integer(0), + source = integer(0), + target = integer(0), + source_exposure_date = integer(0), + virus_id = integer(0), + virus = character(0), + stringsAsFactors = FALSE + ) +} + +#' Attach a transmission tree to a \code{diffnet} object +#' +#' Populates the \code{$transmission} slot of a \code{diffnet} with a +#' transmission tree (who-infected-whom). The resulting directed forest is the +#' canonical input to offspring-distribution analyses +#' (Lloyd-Smith \emph{et al.}, 2005) and to likelihood-based estimators of the +#' reproduction number and serial interval (White & Pagano, 2008). +#' +#' @param x A \code{diffnet} object. +#' @param tree A \code{data.frame} with at least the columns \code{date}, +#' \code{source}, \code{target}, and \code{source_exposure_date}. Columns +#' \code{virus_id} and \code{virus} are optional. \code{source} and +#' \code{source_exposure_date} may be \code{NA} for seed infections (roots +#' of the tree). +#' @param pars Optional named list stored verbatim in \code{x$transmission$pars}. +#' Useful for recording kernel parameters, seeds, etc. +#' +#' @details +#' Each row of \code{tree} represents one infection event (an edge +#' \eqn{\text{source} \to \text{target}} in the transmission tree) time-stamped +#' by \code{date}. \code{source} and \code{target} must be integer row indices +#' into \code{x} (\code{1..nnodes(x)}); \code{target} is required for every +#' row. Existing \code{$transmission} content is overwritten. +#' +#' @return The \code{diffnet} object \code{x} with \code{$transmission} set to +#' a list with components \code{tree} (a clean, ordered \code{data.frame}) +#' and \code{pars}. +#' +#' @references +#' Lloyd-Smith, J. O., Schreiber, S. J., Kopp, P. E., & Getz, W. M. (2005). +#' Superspreading and the effect of individual variation on disease emergence. +#' \emph{Nature} 438:355-359. \doi{10.1038/nature04153} +#' +#' White, L. F., & Pagano, M. (2008). A likelihood-based method for real-time +#' estimation of the serial interval and reproductive number of an epidemic. +#' \emph{Statistics in Medicine} 27:2999-3016. \doi{10.1002/sim.3136} +#' +#' @export +#' @seealso \code{\link{new_diffnet}} +as_transmission_tree <- function(x, tree, pars = list()) { + + if (!inherits(x, "diffnet")) + stop("-x- must be a diffnet object.") + + if (!is.data.frame(tree)) + stop("-tree- must be a data.frame.") + + required <- c("date", "source", "target", "source_exposure_date") + missing_cols <- setdiff(required, names(tree)) + if (length(missing_cols)) + stop("-tree- is missing required column(s): ", + paste(missing_cols, collapse = ", "), ".") + + if (anyNA(tree$target)) + stop("-tree$target- cannot contain NA values.") + + n <- nnodes(x) + tgt <- suppressWarnings(as.integer(tree$target)) + if (anyNA(tgt) || any(tgt < 1L) || any(tgt > n)) + stop("-tree$target- must be integer indices in 1..", n, ".") + + src <- suppressWarnings(as.integer(tree$source)) + src_ok <- src[!is.na(src)] + if (length(src_ok) && (any(src_ok < 1L) || any(src_ok > n))) + stop("-tree$source- must be NA or an integer index in 1..", n, ".") + + out <- data.frame( + date = as.integer(tree$date), + source = src, + target = tgt, + source_exposure_date = as.integer(tree$source_exposure_date), + stringsAsFactors = FALSE + ) + + out$virus_id <- if (!is.null(tree$virus_id)) + as.integer(tree$virus_id) else rep(1L, nrow(out)) + + out$virus <- if (!is.null(tree$virus)) + as.character(tree$virus) else rep(NA_character_, nrow(out)) + + out <- out[, .transmission_cols, drop = FALSE] + out <- out[order(out$date, out$target), , drop = FALSE] + rownames(out) <- NULL + + x$transmission <- list(tree = out, pars = pars) + x +} + +#' Retrieve the transmission tree of a \code{diffnet} object +#' +#' Returns the data.frame stored in \code{x$transmission$tree}. If none has +#' been attached, an empty data.frame with the standard columns is returned. +#' +#' @param x A \code{diffnet} object. +#' @return A \code{data.frame} with columns \code{date}, \code{source}, +#' \code{target}, \code{source_exposure_date}, \code{virus_id}, \code{virus}. +#' @export +#' @seealso \code{\link{as_transmission_tree}} +transmission_tree <- function(x) { + if (!inherits(x, "diffnet")) + stop("-x- must be a diffnet object.") + + tr <- x$transmission$tree + if (is.null(tr)) .empty_transmission_tree() else tr +} diff --git a/data-raw/epigames.R b/data-raw/epigames.R index 48c5402..28cfe97 100644 --- a/data-raw/epigames.R +++ b/data-raw/epigames.R @@ -7,7 +7,23 @@ rm(list = ls()) # both using consistent node IDs (1-594). load("data-raw/epigames_hourly.rda") -epigames <- epigames_hourly +# Load the hourly dynamic behavioral attributes +dyn_attrs_path <- "playground/epigames-stuff/epigames-analysis-copy/dynamic_attrs_hourly.csv" -# Save compressed raw data +dyn_attrs_hourly <- read.csv(dyn_attrs_path, stringsAsFactors = FALSE) + +# Sanity checks +stopifnot(ncol(dyn_attrs_hourly) == 5) # id, hour, mask, med, quarantine +stopifnot(nrow(dyn_attrs_hourly) == 594 * 339) # 201,366 rows +stopifnot(all(dyn_attrs_hourly$id %in% 1:594)) +stopifnot(all(dyn_attrs_hourly$hour %in% 0:338)) + +# Bundle into the epigames list (3 elements) +epigames <- list( + attributes = epigames_hourly$attributes, # static, 594 x 6 + edgelist = epigames_hourly$edgelist, # hourly, ~39k rows + dyn_attrs = dyn_attrs_hourly # dynamical attributes (long format) +) + +# Save compressed .rda usethis::use_data(epigames, overwrite = TRUE, compress = "xz") diff --git a/data-raw/epigamesDiffNet.R b/data-raw/epigamesDiffNet.R index a7d313f..0637ad2 100644 --- a/data-raw/epigamesDiffNet.R +++ b/data-raw/epigamesDiffNet.R @@ -1,50 +1,78 @@ # data-raw/epigamesDiffNet.R -# Generating the dynamic diffnet object using netdiffuseR + collapse_timeframes() +# Generating the daily diffnet object from epigames using collapse_timeframes() +# Run after data-raw/epigames.R has built data/epigames.rda. rm(list = ls()) library(netdiffuseR) -# Load the base raw dataset created in data-raw/epigames.R (hourly resolution) load("data/epigames.rda") -attrs <- epigames$attributes -edges <- epigames$edgelist +attrs <- epigames$attributes # 594 x 6: id, toa, qyes_total, qno_total, mask_prop, med_prop +edges <- epigames$edgelist # hourly edgelist: sender, receiver, time (0-338), weight +dyn_long <- epigames$dyn_attrs # long format: id, hour (0-338), mask, med, quarantine -# Collapse hourly edgelist (hours 0-338) into daily windows (days 1-15) -source("R/collapse_timeframes.R") +# Collapse hourly edgelist into 15 daily windows via collapse_timeframes() +WINDOW_SIZE <- 24 +N_DAYS <- 15 + +dyn_long$day <- (dyn_long$hour %/% WINDOW_SIZE) + 1 +dyn_long$day <- pmin(dyn_long$day, N_DAYS) # day mapping daily_edgelist <- collapse_timeframes( - edgelist = edges, - ego = "sender", - alter = "receiver", - timevar = "time", - weightvar = "weight", - window_size = 24, - binarize = TRUE, - cumulative = TRUE, - symmetric = TRUE + edgelist = edges, + ego = "sender", + alter = "receiver", + timevar = "time", + weightvar = "weight", + window_size = WINDOW_SIZE, + binarize = FALSE, + cumulative = FALSE, + symmetric = TRUE ) -# Build daily adjacency matrices +# Build adjacency matrices adjmat <- edgelist_to_adjmat( daily_edgelist[, c("sender", "receiver")], - w = daily_edgelist$weight, - t0 = daily_edgelist$time, + w = daily_edgelist$weight, + t0 = daily_edgelist$time, + t1 = daily_edgelist$time, keep.isolates = TRUE, multiple = TRUE ) -max_t <- max(daily_edgelist$time, na.rm = TRUE) +# Build vertex.dyn.attrs: one data.frame per day (15 total) +# Each data.frame: 594 rows, columns: mask, med, quarantine + +vertex_dyn <- lapply(1:N_DAYS, function(d) { + sub <- dyn_long[dyn_long$day == d, ] + + # Aggregate per node: mean within each 24-hour window + agg <- aggregate( + cbind(mask, med, quarantine) ~ id, + data = sub, + FUN = mean + ) + + # Sort by id to match the node ordering in the diffnet object + agg <- agg[order(agg$id), ] + rownames(agg) <- NULL + + # Return only the behavior columns + agg[, c("mask", "med", "quarantine")] +}) -# Prepare TOA vector: real adoption times from attrs, NA for non-adopters +# Prepare TOA vector toa_vec <- stats::setNames(attrs$toa, as.character(attrs$id)) +# Assemble diffnet object epigamesDiffNet <- as_diffnet( adjmat, - toa = toa_vec, + toa = toa_vec, vertex.static.attrs = attrs, + vertex.dyn.attrs = vertex_dyn, t0 = 1, - t1 = max_t + t1 = N_DAYS ) +# Save usethis::use_data(epigamesDiffNet, overwrite = TRUE, compress = "xz") diff --git a/data/epigames.rda b/data/epigames.rda index 0c0dda5..a9efca8 100644 Binary files a/data/epigames.rda and b/data/epigames.rda differ diff --git a/data/epigamesDiffNet.rda b/data/epigamesDiffNet.rda index fc1a2de..8e8e0d3 100644 Binary files a/data/epigamesDiffNet.rda and b/data/epigamesDiffNet.rda differ diff --git a/man/as_transmission_tree.Rd b/man/as_transmission_tree.Rd new file mode 100644 index 0000000..a73a38c --- /dev/null +++ b/man/as_transmission_tree.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/transmission.R +\name{as_transmission_tree} +\alias{as_transmission_tree} +\title{Attach a transmission tree to a \code{diffnet} object} +\usage{ +as_transmission_tree(x, tree, pars = list()) +} +\arguments{ +\item{x}{A \code{diffnet} object.} + +\item{tree}{A \code{data.frame} with at least the columns \code{date}, +\code{source}, \code{target}, and \code{source_exposure_date}. Columns +\code{virus_id} and \code{virus} are optional. \code{source} and +\code{source_exposure_date} may be \code{NA} for seed infections (roots +of the tree).} + +\item{pars}{Optional named list stored verbatim in \code{x$transmission$pars}. +Useful for recording kernel parameters, seeds, etc.} +} +\value{ +The \code{diffnet} object \code{x} with \code{$transmission} set to + a list with components \code{tree} (a clean, ordered \code{data.frame}) + and \code{pars}. +} +\description{ +Populates the \code{$transmission} slot of a \code{diffnet} with a +transmission tree (who-infected-whom). The resulting directed forest is the +canonical input to offspring-distribution analyses +(Lloyd-Smith \emph{et al.}, 2005) and to likelihood-based estimators of the +reproduction number and serial interval (White & Pagano, 2008). +} +\details{ +Each row of \code{tree} represents one infection event (an edge +\eqn{\text{source} \to \text{target}} in the transmission tree) time-stamped +by \code{date}. \code{source} and \code{target} must be integer row indices +into \code{x} (\code{1..nnodes(x)}); \code{target} is required for every +row. Existing \code{$transmission} content is overwritten. +} +\references{ +Lloyd-Smith, J. O., Schreiber, S. J., Kopp, P. E., & Getz, W. M. (2005). +Superspreading and the effect of individual variation on disease emergence. +\emph{Nature} 438:355-359. \doi{10.1038/nature04153} + +White, L. F., & Pagano, M. (2008). A likelihood-based method for real-time +estimation of the serial interval and reproductive number of an epidemic. +\emph{Statistics in Medicine} 27:2999-3016. \doi{10.1002/sim.3136} +} +\seealso{ +\code{\link{new_diffnet}} +} diff --git a/man/diffnet-class.Rd b/man/diffnet-class.Rd index f2532b4..b825d54 100644 --- a/man/diffnet-class.Rd +++ b/man/diffnet-class.Rd @@ -52,7 +52,8 @@ new_diffnet( self = getOption("diffnet.self"), multiple = getOption("diffnet.multiple"), name = "Diffusion Network", - behavior = NULL + behavior = NULL, + tod = NULL ) \method{as.data.frame}{diffnet}( @@ -148,6 +149,14 @@ order of the rows in the attribute data.} \item{behavior}{Character vector. Name of the behavior(s) been analyzed (innovation).} +\item{tod}{Optional numeric vector of size \eqn{n}. Times of disadoption +(e.g. recovery, removal). When supplied, each non-\code{NA} entry must be +strictly greater than the corresponding \code{toa} and \code{NA} whenever +\code{toa} is \code{NA}. Node \eqn{i} is considered adopted over the closed +interval \eqn{[\mathrm{toa}_i, \mathrm{tod}_i - 1]}; \code{NA} in \code{tod} +means the adoption is absorbing. Currently only single-behavior (vector) +\code{tod} is supported.} + \item{x}{A \code{diffnet} object.} \item{row.names}{Ignored.} diff --git a/man/epigamesDiffNet.Rd b/man/epigamesDiffNet.Rd index f633835..6d36932 100644 --- a/man/epigamesDiffNet.Rd +++ b/man/epigamesDiffNet.Rd @@ -14,6 +14,21 @@ A directed dynamic graph with 594 vertices and 15 time periods. The attributes in the graph are described in \code{\link{epigames}}. } \details{ +By default, this \code{diffnet} object is **non-cumulative** (each slice represents +ephemeral daily contacts) and **valued** (edge weights represent contact duration in seconds). + +To reconstruct the classic cumulative/binarized network, you can run: + +\preformatted{ +epigames_cumul <- epigamesDiffNet + +# 1. Accumulate the history across time periods +epigames_cumul$graph <- Reduce("+", epigames_cumul$graph, accumulate = TRUE) + +# 2. Apply a logical cut-off to binarize the network +epigames_cumul$graph <- lapply(epigames_cumul$graph, function(m) { m@x[] <- 1; m }) +} + Non-adopters have \code{toa = NA}. } \seealso{ diff --git a/man/exposure.Rd b/man/exposure.Rd index 82f3531..1b9599f 100644 --- a/man/exposure.Rd +++ b/man/exposure.Rd @@ -15,6 +15,9 @@ exposure( groupvar = NULL, self = getOption("diffnet.self"), lags = 0L, + mode = "deterministic", + link_fun = "identity", + link_pars = list(), ... ) } @@ -47,6 +50,24 @@ and one (see details).} \item{lags}{Integer scalar. When different from 0, the resulting exposure matrix will be the lagged exposure as specified (see examples).} +\item{mode}{Character scalar. Either "deterministic" (default) or "stochastic".} + +\item{link_fun}{Character scalar or function. Kernel applied to the +(valued) edge weights before exposure is computed. Supported names: +\code{"identity"} (default, no transformation), \code{"linear"} +(\eqn{\min(\beta w, 1)}), \code{"sigmoid"} +(\eqn{\mathrm{plogis}((w - h)/\mathrm{scale})}), and \code{"wells-riley"} +(\eqn{1 - \exp(-\beta w)}). Alternatively, a user-supplied +single-argument function \code{function(w)} with its parameters +baked into the closure; it must return a vector of the same length +as \code{w}. When \code{link_fun} is not \code{"identity"}, +\code{valued} is forced to \code{TRUE} (with a warning if the user +set it to \code{FALSE}).} + +\item{link_pars}{Named list with the scalar parameters required by +the named kernels (\code{"linear"}, \code{"sigmoid"}, +\code{"wells-riley"}). Ignored when \code{link_fun} is a function.} + \item{...}{Further arguments passed to \code{\link{struct_equiv}} (only used when \code{alt.graph="se"}).} } @@ -115,6 +136,25 @@ If \code{normalize=FALSE} then denominator, \eqn{S_t \times x_t}{S(t) \%*\% x(t) is not included. This can be useful when, for example, exposure needs to be computed as a count instead of a proportion. A good example of this can be found at the examples section of the function \code{\link{rdiffnet}}. + +\strong{Stochastic Exposure} + +When \code{mode = "stochastic"}, the exposure is calculated based on a probabilistic +interpretation of the edges. In this mode, the weights of the graph \eqn{S_t} are +treated as probabilities of transmission. For each edge \eqn{(i,j)}, a Bernoulli +trial is performed with probability \eqn{S_{t,ij}}. If the trial is successful, +the edge is "realized" as a full connection. If failed, the edge is treated +as non-existent. + +The denominator is calculated using the degree of the node, representing the total +number of potential contacts. + +\deqn{ +\tilde{E}_{ti} = \frac{\sum_{j \neq i} \mathbb{I}(U_{ij} < S_{t,ij}) a_{tj}}{\sum_{j \neq i} 1} +} + +Where \eqn{S_{t,ij}} is the weight of the edge from \eqn{j} to \eqn{i} at time \eqn{t} +(treated as probability), and \eqn{U_{ij} \sim \text{Uniform}(0,1)}. } \examples{ # Calculating lagged exposure ----------------------------------------------- diff --git a/man/rdiffnet.Rd b/man/rdiffnet.Rd index 0d63f7b..ce15355 100644 --- a/man/rdiffnet.Rd +++ b/man/rdiffnet.Rd @@ -18,10 +18,13 @@ rdiffnet( rewire.args = list(), threshold.dist = runif(n), exposure.args = list(), + exposure.mode = "deterministic", name = "A diffusion network", behavior = "Random contagion", stop.no.diff = TRUE, - disadopt = NULL + disadopt = NULL, + adoption_model = c("deterministic", "stochastic"), + adoption_pars = NULL ) } \arguments{ @@ -61,6 +64,8 @@ threshold for each node.} \item{exposure.args}{List. Arguments to be passed to \code{\link{exposure}}.} +\item{exposure.mode}{Character scalar. Either "deterministic" (default) or "stochastic".} + \item{name}{Character scalar. Passed to \code{\link{as_diffnet}}.} \item{behavior}{Character scalar or a list or character scalar (multiple behaviors only). Passed to \code{\link{as_diffnet}}.} @@ -69,6 +74,15 @@ threshold for each node.} with error if there was no diffusion. Otherwise it throws a warning.} \item{disadopt}{Function of disadoption, with current exposition, cumulative adoption, and time as possible inputs.} + +\item{adoption_model}{Character scalar. Either \code{"deterministic"} (default; +adopter iff \code{exposure >= threshold}) or \code{"stochastic"} (adopter with +probability \code{plogis(beta0 + beta_expo * exposure)}).} + +\item{adoption_pars}{Named list. Required when +\code{adoption_model = "stochastic"}: must supply \code{beta0} (intercept) +and \code{beta_expo} (slope on exposure). Ignored when +\code{adoption_model = "deterministic"}.} } \value{ A random \code{\link{diffnet}} class object. @@ -156,6 +170,10 @@ is applied using that graph instead. \code{normalized} \tab \code{TRUE} } +When \code{exposure.mode = "stochastic"}, the \code{valued} argument in +\code{exposure.args} is forced to \code{TRUE} (with a message) to ensure that +edge weights are treated as probabilities. + The function \code{rdiffnet_multiple} is a wrapper of \code{rdiffnet} wich allows simulating multiple diffusion networks with the same parameters and apply the same function to all of them. This function is designed to allow the user diff --git a/man/transmission_tree.Rd b/man/transmission_tree.Rd new file mode 100644 index 0000000..10c065f --- /dev/null +++ b/man/transmission_tree.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/transmission.R +\name{transmission_tree} +\alias{transmission_tree} +\title{Retrieve the transmission tree of a \code{diffnet} object} +\usage{ +transmission_tree(x) +} +\arguments{ +\item{x}{A \code{diffnet} object.} +} +\value{ +A \code{data.frame} with columns \code{date}, \code{source}, + \code{target}, \code{source_exposure_date}, \code{virus_id}, \code{virus}. +} +\description{ +Returns the data.frame stored in \code{x$transmission$tree}. If none has +been attached, an empty data.frame with the standard columns is returned. +} +\seealso{ +\code{\link{as_transmission_tree}} +} diff --git a/tests/testthat/test-exposure-link-fun.R b/tests/testthat/test-exposure-link-fun.R new file mode 100644 index 0000000..0531f10 --- /dev/null +++ b/tests/testthat/test-exposure-link-fun.R @@ -0,0 +1,175 @@ +context("Exposure link / kernel function") +library(netdiffuseR) + +# ---- Helpers -------------------------------------------------------------- +# Build a small valued dynamic graph + cumadopt that we can reuse. +mk_fixture <- function(seed = 71) { + set.seed(seed) + n <- 8L + g <- rgraph_er(n, t = 1, p = 0.5) + g@x <- runif(length(g@x), min = 0.1, max = 2.0) + cumadopt <- matrix(0, nrow = n, ncol = 1) + cumadopt[1:3, 1] <- 1 + list(g = list(g), cumadopt = cumadopt, n = n) +} + +# ---- Identity (default) --------------------------------------------------- +test_that("link_fun = 'identity' reproduces the default exposure", { + fx <- mk_fixture() + e_default <- exposure(fx$g, fx$cumadopt, valued = TRUE) + e_ident <- exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = "identity") + expect_equal(e_default, e_ident) + + # NULL is treated as identity as well + e_null <- exposure(fx$g, fx$cumadopt, valued = TRUE, link_fun = NULL) + expect_equal(e_default, e_null) +}) + +# ---- Linear --------------------------------------------------------------- +test_that("link_fun = 'linear' applies min(beta * w, 1) to edge weights", { + fx <- mk_fixture() + + g_lin <- fx$g + g_lin[[1]]@x <- pmin(0.4 * g_lin[[1]]@x, 1) + e_manual <- exposure(g_lin, fx$cumadopt, valued = TRUE) + + e_kernel <- exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = "linear", link_pars = list(beta = 0.4)) + expect_equal(e_kernel, e_manual) + + # Linear with beta large enough to saturate at 1 + g_sat <- fx$g + g_sat[[1]]@x <- pmin(1000 * g_sat[[1]]@x, 1) + e_sat_manual <- exposure(g_sat, fx$cumadopt, valued = TRUE) + e_sat_kernel <- exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = "linear", + link_pars = list(beta = 1000)) + expect_equal(e_sat_kernel, e_sat_manual) +}) + +# ---- Sigmoid -------------------------------------------------------------- +test_that("link_fun = 'sigmoid' applies plogis((w - h)/scale)", { + fx <- mk_fixture() + + g_sig <- fx$g + g_sig[[1]]@x <- stats::plogis((g_sig[[1]]@x - 1.0) / 0.5) + e_manual <- exposure(g_sig, fx$cumadopt, valued = TRUE) + + e_kernel <- exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = "sigmoid", + link_pars = list(h = 1.0, scale = 0.5)) + expect_equal(e_kernel, e_manual) +}) + +# ---- Wells-Riley ---------------------------------------------------------- +test_that("link_fun = 'wells-riley' applies 1 - exp(-beta * w)", { + fx <- mk_fixture() + + g_wr <- fx$g + g_wr[[1]]@x <- 1 - exp(-0.7 * g_wr[[1]]@x) + e_manual <- exposure(g_wr, fx$cumadopt, valued = TRUE) + + e_kernel <- exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = "wells-riley", + link_pars = list(beta = 0.7)) + expect_equal(e_kernel, e_manual) +}) + +# ---- User-supplied function ---------------------------------------------- +test_that("link_fun accepts a single-argument user function with baked pars", { + fx <- mk_fixture() + # A fully-specified closure -- no link_pars needed. + random_func <- function(x) 1 - exp(-0.3 * x) + + g_ref <- fx$g + g_ref[[1]]@x <- 1 - exp(-0.3 * g_ref[[1]]@x) + e_manual <- exposure(g_ref, fx$cumadopt, valued = TRUE) + + e_user <- exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = random_func) + expect_equal(e_user, e_manual) +}) + +test_that("user-supplied link_fun must return same-length output", { + fx <- mk_fixture() + bad_kernel <- function(w) w[-1L] + expect_error( + exposure(fx$g, fx$cumadopt, valued = TRUE, link_fun = bad_kernel), + "same length" + ) +}) + +# ---- Parameter validation ------------------------------------------------- +test_that("Missing required link_pars raises informative errors", { + fx <- mk_fixture() + + expect_error( + exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = "linear", link_pars = list()), + "beta" + ) + expect_error( + exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = "wells-riley", link_pars = list()), + "beta" + ) + expect_error( + exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = "sigmoid", link_pars = list(h = 1)), + "scale" + ) + expect_error( + exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = "sigmoid", link_pars = list(scale = 1)), + "h" + ) +}) + +test_that("Unknown link_fun name errors informatively", { + fx <- mk_fixture() + expect_error( + exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = "gompertz", link_pars = list()), + "Unknown link_fun" + ) +}) + +# ---- Interaction with `valued` ------------------------------------------- +test_that("Non-identity link_fun warns and forces valued = TRUE", { + fx <- mk_fixture() + expect_warning( + e_forced <- exposure(fx$g, fx$cumadopt, valued = FALSE, + link_fun = "wells-riley", + link_pars = list(beta = 0.5)), + "valued" + ) + e_valued <- exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = "wells-riley", + link_pars = list(beta = 0.5)) + expect_equal(e_forced, e_valued) +}) + +# ---- diffnet dispatch threads link_fun through ---------------------------- +test_that("diffnet dispatch accepts link_fun/link_pars", { + set.seed(9) + dn <- suppressWarnings(rdiffnet(n = 20, t = 4, seed.graph = "small-world", + seed.p.adopt = 0.1, + stop.no.diff = FALSE)) + # Force a non-trivial valued graph + for (i in seq_along(dn$graph)) { + dn$graph[[i]]@x <- runif(length(dn$graph[[i]]@x), 0.1, 2.0) + } + + e_ident <- exposure(dn, valued = TRUE) + e_wr <- exposure(dn, valued = TRUE, + link_fun = "wells-riley", + link_pars = list(beta = 1.2)) + + # Output must have the same shape as identity case + expect_equal(dim(e_ident), dim(e_wr)) + + # Wells-Riley output is bounded in [0, 1] + expect_true(all(e_wr >= 0)) + expect_true(all(e_wr <= 1 + 1e-10)) +}) diff --git a/tests/testthat/test-rdiffnet-stochastic.R b/tests/testthat/test-rdiffnet-stochastic.R new file mode 100644 index 0000000..a28073c --- /dev/null +++ b/tests/testthat/test-rdiffnet-stochastic.R @@ -0,0 +1,97 @@ +context("rdiffnet stochastic adoption model") +library(netdiffuseR) + +test_that("deterministic (default) path is unchanged", { + # Compare a deterministic run under fixed seed against itself on a second + # call with adoption_model explicitly set to "deterministic". + set.seed(2026) + dn_default <- rdiffnet(n = 25, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.1, stop.no.diff = FALSE) + + set.seed(2026) + dn_det <- rdiffnet(n = 25, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.1, stop.no.diff = FALSE, + adoption_model = "deterministic") + + expect_identical(dn_default$toa, dn_det$toa) +}) + +test_that("stochastic mode runs and returns a diffnet", { + set.seed(2026) + dn <- rdiffnet(n = 40, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.05, stop.no.diff = FALSE, + adoption_model = "stochastic", + adoption_pars = list(beta0 = -2, beta_expo = 6)) + + expect_s3_class(dn, "diffnet") + expect_equal(dim(dn$cumadopt), c(40, 6)) + expect_equal(length(dn$toa), 40) +}) + +test_that("stochastic mode requires both beta0 and beta_expo", { + expect_error( + rdiffnet(n = 20, t = 4, seed.graph = "small-world", + adoption_model = "stochastic", adoption_pars = list(beta0 = 0), + stop.no.diff = FALSE), + "beta0.*beta_expo|beta_expo" + ) + expect_error( + rdiffnet(n = 20, t = 4, seed.graph = "small-world", + adoption_model = "stochastic", adoption_pars = list(beta_expo = 1), + stop.no.diff = FALSE), + "beta0" + ) + expect_error( + rdiffnet(n = 20, t = 4, seed.graph = "small-world", + adoption_model = "stochastic", + stop.no.diff = FALSE), + "beta0" + ) +}) + +test_that("saturating beta0 drives near-universal adoption", { + # Very large intercept -> plogis(beta0 + ...) ~ 1 for all exposures. + set.seed(99) + dn <- rdiffnet(n = 60, t = 8, seed.graph = "small-world", + seed.p.adopt = 0.05, stop.no.diff = FALSE, + adoption_model = "stochastic", + adoption_pars = list(beta0 = 50, beta_expo = 0)) + # Everyone has adopted by some t <= T + expect_true(all(!is.na(dn$toa))) +}) + +test_that("very negative beta0 + beta_expo = 0 suppresses diffusion", { + set.seed(99) + expect_warning( + dn <- rdiffnet(n = 30, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.05, stop.no.diff = FALSE, + adoption_model = "stochastic", + adoption_pars = list(beta0 = -50, beta_expo = 0)), + "No diffusion" + ) + # All adopters come from the seed round (toa == 1) + expect_true(all(dn$toa[!is.na(dn$toa)] == 1L)) +}) + +test_that("stochastic works with multiple behaviors", { + set.seed(2026) + dn <- rdiffnet(n = 40, t = 6, seed.graph = "small-world", + seed.p.adopt = list(0.05, 0.05), + stop.no.diff = FALSE, + adoption_model = "stochastic", + adoption_pars = list(beta0 = -1, beta_expo = 4)) + + expect_s3_class(dn, "diffnet") + # Multi-behavior diffnet stores cumadopt as a list of length num_behaviors + expect_type(dn$cumadopt, "list") + expect_length(dn$cumadopt, 2L) + expect_equal(dim(dn$toa), c(40, 2)) +}) + +test_that("unknown adoption_model raises match.arg error", { + expect_error( + rdiffnet(n = 20, t = 4, seed.graph = "small-world", + adoption_model = "probit", stop.no.diff = FALSE), + "should be one of" + ) +}) diff --git a/tests/testthat/test-stochastic-exposure.R b/tests/testthat/test-stochastic-exposure.R new file mode 100644 index 0000000..6bc9b44 --- /dev/null +++ b/tests/testthat/test-stochastic-exposure.R @@ -0,0 +1,157 @@ +context("Stochastic Exposure") +library(netdiffuseR) + +test_that("Stochastic vs Deterministic Exposure", { + # Create a small random graph + set.seed(123) + n <- 10 + g <- rgraph_er(n, t=1, p=0.5) + # Make it valued with probabilities + g@x <- runif(length(g@x)) + + # Wrap in list to make it "dynamic" (1 time step) + g_list <- list(g) + + # Create dummy adoption + cumadopt <- matrix(0, nrow=n, ncol=1) + cumadopt[1:5, 1] <- 1 + + # exposure expects a list for dynamic graphs if not diffnet + # MUST set valued=TRUE so weights are used as probabilities + e_det <- exposure(g_list, cumadopt, mode="deterministic", valued=TRUE) + e_stoch <- exposure(g_list, cumadopt, mode="stochastic", valued=TRUE) + + # They should be different (with high probability) + expect_false(isTRUE(all.equal(e_det, e_stoch))) + + # Check that stochastic exposure is non-negative and <= 1 + # With the new logic (preserving weights), it should be <= 1 + expect_true(all(e_stoch >= 0)) + expect_true(all(e_stoch <= 1 + 1e-10)) +}) + +test_that("rdiffnet wrapper with stochastic exposure", { + # We need a dynamic graph for rdiffnet usually, or t > 1 + set.seed(1231) + diffnet <- rdiffnet(n=20, t=5, seed.graph="small-world", + exposure.mode="stochastic", + seed.p.adopt = 0.2, + stop.no.diff = FALSE) # Prevent error if no diffusion occurs + + expect_s3_class(diffnet, "diffnet") + expect_true(all(diffnet$exposure >= 0)) +}) + +test_that("rdiffnet wrapper with stochastic exposure (multiple behaviors)", { + set.seed(1231) + # 2 behaviors + diffnet <- rdiffnet(n=20, t=5, seed.graph="small-world", + exposure.mode="stochastic", + seed.p.adopt = list(0.2, 0.2), + stop.no.diff = FALSE) + + expect_s3_class(diffnet, "diffnet") + + # Calculate exposure to check dimensions + # Note: We must use the same mode to get consistent dimensions/behavior + expo <- exposure(diffnet, mode="stochastic") + + # Check dimensions of exposure: n x t x 2 + expect_equal(dim(expo), c(20, 5, 2)) + expect_true(all(expo >= 0)) + expect_true(all(expo <= 1 + 1e-10)) +}) + +# ---- Continuous (non-Bernoulli) edge weights ----------------------------- +# Fixture: graph with floating-point weights resembling seconds of contact. +mk_seconds_graph <- function(seed = 17, n = 10L) { + set.seed(seed) + g <- rgraph_er(n, t = 1, p = 0.6) + g@x <- runif(length(g@x), min = 30, max = 3600) # 30s .. 1h + cumadopt <- matrix(0, nrow = n, ncol = 1) + cumadopt[1:4, 1] <- 1 + list(g = list(g), cumadopt = cumadopt, n = n) +} + +test_that("Each link_fun maps seconds-scale weights into valid probabilities", { + fx <- mk_seconds_graph() + + # Identity: raw seconds > 1 warns and saturates the sampler; the + # normalised exposure still ends up in [0, 1]. + expect_warning( + e_id <- exposure(fx$g, fx$cumadopt, valued = TRUE, mode = "stochastic"), + "weights in \\[0, 1\\]" + ) + expect_true(all(e_id >= 0) && all(e_id <= 1 + 1e-10)) + + set.seed(101) + e_wr <- exposure(fx$g, fx$cumadopt, valued = TRUE, mode = "stochastic", + link_fun = "wells-riley", + link_pars = list(beta = 1 / 1800)) + expect_true(all(e_wr >= 0) && all(e_wr <= 1 + 1e-10)) + + set.seed(101) + e_lin <- exposure(fx$g, fx$cumadopt, valued = TRUE, mode = "stochastic", + link_fun = "linear", + link_pars = list(beta = 1 / 5000)) + expect_true(all(e_lin >= 0) && all(e_lin <= 1 + 1e-10)) + + set.seed(101) + e_sig <- exposure(fx$g, fx$cumadopt, valued = TRUE, mode = "stochastic", + link_fun = "sigmoid", + link_pars = list(h = 600, scale = 300)) + expect_true(all(e_sig >= 0) && all(e_sig <= 1 + 1e-10)) +}) + +test_that("Out-of-range warning fires only when raw weights leave [0, 1]", { + fx <- mk_seconds_graph() + + # Kernel maps into [0, 1]: no warning. + expect_warning( + exposure(fx$g, fx$cumadopt, valued = TRUE, mode = "stochastic", + link_fun = "wells-riley", + link_pars = list(beta = 1 / 1800)), + regexp = NA + ) + + # Caller pre-normalises: no warning. + set.seed(99); n <- fx$n + g_ok <- rgraph_er(n, t = 1, p = 0.6); g_ok@x <- runif(length(g_ok@x)) + expect_warning( + exposure(list(g_ok), fx$cumadopt, valued = TRUE, mode = "stochastic"), + regexp = NA + ) +}) + +test_that("Stochastic denominator excludes zero-weight stored neighbours", { + # Node 1 has 4 stored edges to adopters; 3 weigh 0, 1 weighs 1. + # Old behaviour: exposure[1] = 1/4; corrected: 1/1 = 1. + n <- 5L + A <- matrix(0, n, n); A[1, 2:5] <- 1 + G <- methods::as(A, "CsparseMatrix") + stopifnot(length(G@x) == 4) + G@x <- c(0, 0, 0, 1) # three stored zeros, one stored one + + cumadopt <- matrix(0, n, 1); cumadopt[2:5, 1] <- 1 + + set.seed(123) + e <- exposure(list(G), cumadopt, valued = TRUE, mode = "stochastic", + self = TRUE) + + expect_equal(e[1, 1], 1) +}) + +test_that("Zero-weight self-loops do not blow up the exposure", { + set.seed(7); n <- 6L + g <- rgraph_er(n, t = 1, p = 0.6) + g[1, 1] <- 1; g[1, 1] <- 0 # force a stored 0 on the diagonal + cumadopt <- matrix(0, n, 1); cumadopt[1, 1] <- 1 + + e <- exposure(list(g), cumadopt, valued = TRUE, mode = "stochastic", + self = FALSE, + link_fun = "wells-riley", + link_pars = list(beta = 0.5)) + expect_false(anyNA(e)) + expect_true(all(is.finite(e))) + expect_true(all(e >= 0) && all(e <= 1 + 1e-10)) +}) diff --git a/tests/testthat/test-tod-slot.R b/tests/testthat/test-tod-slot.R new file mode 100644 index 0000000..b275e27 --- /dev/null +++ b/tests/testthat/test-tod-slot.R @@ -0,0 +1,82 @@ +context("Time of disadoption (tod) slot") + +mk_graph <- function(n = 5L, T = 5L, seed = 1L) { + set.seed(seed) + lapply(seq_len(T), function(x) rgraph_ba(t = n - 1L)) +} + +test_that("Default new_diffnet still has tod = NULL and transmission = NULL", { + gr <- mk_graph() + toa <- c(1L, 2L, NA, 3L, 5L) + x <- new_diffnet(gr, toa, t0 = 1L, t1 = 5L) + + expect_null(x$tod) + expect_null(x$transmission) + expect_true(inherits(x, "diffnet")) +}) + +test_that("tod rebuilds cumadopt using [toa, tod - 1] intervals", { + gr <- mk_graph() + toa <- c(1L, 2L, NA, 3L, 5L) + tod <- c(3L, 4L, NA, NA, NA) + + x <- new_diffnet(gr, toa, tod = tod, t0 = 1L, t1 = 5L) + + expect_identical(x$tod, setNames(tod, as.character(seq_len(5L)))) + + # Node 1: adopts t=1, disadopts t=3 -> cumadopt c(1,1,0,0,0) + expect_equal(as.integer(x$cumadopt[1, ]), c(1L, 1L, 0L, 0L, 0L)) + # Node 2: adopts t=2, disadopts t=4 -> cumadopt c(0,1,1,0,0) + expect_equal(as.integer(x$cumadopt[2, ]), c(0L, 1L, 1L, 0L, 0L)) + # Node 3: never adopts -> all zero + expect_equal(as.integer(x$cumadopt[3, ]), rep(0L, 5L)) + # Node 4: adopts t=3, absorbing -> c(0,0,1,1,1) + expect_equal(as.integer(x$cumadopt[4, ]), c(0L, 0L, 1L, 1L, 1L)) + # Node 5: adopts t=5, absorbing -> c(0,0,0,0,1) + expect_equal(as.integer(x$cumadopt[5, ]), c(0L, 0L, 0L, 0L, 1L)) + + # adopt matrix is preserved (single 1 at the adoption period). + expect_equal(as.integer(rowSums(x$adopt)), c(1L, 1L, 0L, 1L, 1L)) +}) + +test_that("tod validation errors are informative", { + gr <- mk_graph() + toa <- c(1L, 2L, NA, 3L, 5L) + + # tod <= toa somewhere + expect_error( + new_diffnet(gr, toa, tod = c(0L, 2L, NA, NA, NA), t0 = 1L, t1 = 5L), + "strictly greater" + ) + + # length mismatch + expect_error( + new_diffnet(gr, toa, tod = c(3L, 4L, NA, NA, NA, NA), t0 = 1L, t1 = 5L), + "different lengths" + ) + + # tod defined where toa is NA + expect_error( + new_diffnet(gr, toa, tod = c(3L, 4L, 5L, NA, NA), t0 = 1L, t1 = 5L), + "NA wherever" + ) + + # tod must be a vector (matrix not yet supported) + expect_error( + new_diffnet(gr, toa, + tod = matrix(c(3L, 4L, NA, NA, NA), ncol = 1L), + t0 = 1L, t1 = 5L), + "vector of the same length" + ) +}) + +test_that("tod coerces non-integer input with a warning", { + gr <- mk_graph() + toa <- c(1L, 2L, NA, 3L, 5L) + + expect_warning( + x <- new_diffnet(gr, toa, tod = c(3, 4, NA, NA, NA), t0 = 1L, t1 = 5L), + "Coercing -tod-" + ) + expect_true(is.integer(x$tod)) +}) diff --git a/tests/testthat/test-transmission.R b/tests/testthat/test-transmission.R new file mode 100644 index 0000000..305002c --- /dev/null +++ b/tests/testthat/test-transmission.R @@ -0,0 +1,93 @@ +context("Transmission tree slot") + +mk_diffnet <- function() { + set.seed(42) + gr <- lapply(1:5, function(x) rgraph_ba(t = 4L)) + toa <- c(1L, 2L, NA, 3L, 5L) + new_diffnet(gr, toa, t0 = 1L, t1 = 5L) +} + +test_that("transmission_tree() returns an empty data.frame when unset", { + x <- mk_diffnet() + tr <- transmission_tree(x) + expect_s3_class(tr, "data.frame") + expect_equal(nrow(tr), 0L) + expect_setequal( + names(tr), + c("date", "source", "target", "source_exposure_date", "virus_id", "virus") + ) +}) + +test_that("as_transmission_tree() validates required columns and ranges", { + x <- mk_diffnet() + + expect_error( + as_transmission_tree(x, data.frame(date = 1L, source = NA, target = 1L)), + "missing required column" + ) + + expect_error( + as_transmission_tree(x, data.frame( + date = 1L, source = NA_integer_, target = NA_integer_, + source_exposure_date = NA_integer_ + )), + "cannot contain NA" + ) + + expect_error( + as_transmission_tree(x, data.frame( + date = 1L, source = NA_integer_, target = 999L, + source_exposure_date = NA_integer_ + )), + "integer indices" + ) + + expect_error( + as_transmission_tree(x, data.frame( + date = 1L, source = 42L, target = 2L, source_exposure_date = 1L + )), + "NA or an integer index" + ) +}) + +test_that("as_transmission_tree() stores a clean tree and optional pars", { + x <- mk_diffnet() + tree <- data.frame( + date = c(3L, 1L, 2L), + source = c(2L, NA, 1L), + target = c(4L, 1L, 2L), + source_exposure_date = c(2L, NA, 1L), + virus_id = c(1L, 1L, 1L), + virus = c("flu", "flu", "flu"), + stringsAsFactors = FALSE + ) + y <- as_transmission_tree(x, tree, pars = list(kernel = "wells-riley")) + tr <- transmission_tree(y) + + # Ordered by (date, target) and clean rownames + expect_equal(tr$date, c(1L, 2L, 3L)) + expect_equal(tr$target, c(1L, 2L, 4L)) + expect_equal(rownames(tr), as.character(seq_len(nrow(tr)))) + + expect_equal(y$transmission$pars$kernel, "wells-riley") +}) + +test_that("Missing optional columns are defaulted", { + x <- mk_diffnet() + tree <- data.frame( + date = c(1L, 2L), + source = c(NA_integer_, 1L), + target = c(1L, 2L), + source_exposure_date = c(NA_integer_, 1L) + ) + y <- as_transmission_tree(x, tree) + tr <- transmission_tree(y) + + expect_true(all(tr$virus_id == 1L)) + expect_true(all(is.na(tr$virus))) +}) + +test_that("transmission_tree() requires a diffnet", { + expect_error(transmission_tree(42), "must be a diffnet") + expect_error(as_transmission_tree(42, data.frame()), "must be a diffnet") +})