# Load required libraries
library(bnlearn)
library(dplyr)
library(ggplot2)
library(lmtest)    # Granger causality
library(reshape2)  # Data reshaping
library(knitr)     # Reporting
library(zoo)       
library(igraph)
library(ggraph)
library(tidygraph)
library(ggrepel)
library(scales)

# --------------------------
# 1. DATA PREPROCESSING
# --------------------------

# Load dataset
data <- read.table("D:/Descargas/Causal_inference/data_analysis.txt", dec = ",", header = TRUE, sep = "\t")

# Select key variables (adjust based on domain relevance)
selected_vars <- c("Flood_F1", "EOF1",
                   "TSI", "NH_Volcanic","Tjja_Alps", "Pamj_Alps", "NH_Tja",
                   "SNAO", "EA", "EATL.WRUS", "AL", "OM", "AH")

# Split into pre-industrial (1300-1849) and industrial (1850-2005)
pre_ind <- data[data$YEAR < 1850, ]
ind <- data[data$YEAR >= 1850, ]

pre_ind <-pre_ind %>% 
  dplyr::select(all_of(selected_vars)) %>% 
  na.omit() %>% 
  mutate(across(everything(), ~ scale(.)))# Standardize variables

ind <- ind %>% 
  dplyr::select(all_of(selected_vars)) %>% 
  na.omit() %>% 
  mutate(across(everything(), ~ scale(.)))# Standardize variables

# --------------------------
# 2. STRUCTURE LEARNING WITH PC ALGORITHM
# --------------------------

# Learn structure using PC algorithm with bootstrapping
set.seed(123)
climate_data <- pre_ind[,2:13]
original_times <- pre_ind$Flood_F1

n_iterations <- 100 # Montecarlo iterations for perturbation of the data
stability_edges <- list()  # List for tracking the stability of the edges

for (i in 1:n_iterations) {
  # Timings were perturbed by introducing random shifts ranging from -20 to +20 years
  perturbed_times <- original_times + runif(length(original_times), -20, 20)
  
  # To integrate the perturbed data with the climatic variables
  perturbed_data <- climate_data %>%
    mutate(Flood_F1 = ksmooth(1:length(perturbed_times), perturbed_times, kernel = "normal", bandwidth = 11)$y)
  
  # Run the PC algorithm (replacing with your own data and configurations)
  pc_boot <- boot.strength(
    pre_ind, 
    algorithm = "pc.stable", 
    algorithm.args = list(test = "cor", alpha = 0.1),  # Adjust alpha for multiple testing
    R = 100  # Number of bootstrap replicates
  )
  
  avg_pc <- averaged.network(pc_boot, threshold = attr(pc_boot, "threshold"))
  
  # Extract the edges from the generated DAG
  edges <- arcs(avg_pc)
  
  # Update the stability counts of the edges
  for (edge in 1:nrow(edges)) {
    edge_name <- paste(edges[edge, "from"], "->", edges[edge, "to"])
    stability_edges[[edge_name]] <- ifelse(is.null(stability_edges[[edge_name]]), 0, stability_edges[[edge_name]]) + 1
  }
}

# Stability percentages were calculated to assess the robustness of the edges
for (edge in names(stability_edges)) {
  stability <- stability_edges[[edge]] / n_iterations * 100
  cat(paste0(edge, ": ", round(stability, 1), "%\n"))
}

# Arc_strenghts graphs

attr(pc_boot, "threshold")

plot(pc_boot)
abline(v = 0.75, col = "tomato", lty = 2, lwd = 2)
abline(v = 0.85, col = "steelblue", lty = 2, lwd = 2)

# Average network structure (edges with >50% confidence)
avg_pc <- averaged.network(pc_boot, threshold = attr(pc_boot, "threshold"))

# Plot causal DAG
graphviz.plot(avg_pc, main = "Pre-Industrial: Causal DAG", fontsize = 20)

strength.plot(avg_pc, pc_boot, shape = "ellipse")

# --------------------------
# 3. STRUCTURE LEARNING WITH DOMAIN CONSTRAINTS
# --------------------------

# Hybrid approach: Add known climate interactions as whitelist
# Define tiers based on causality hierarchy
tiers <- list(
  # Tier 1: External forcings (no parent nodes)
  c("TSI", "NH_Volcanic"),
  # Tier 2: Hemispheric Mode
  c("EOF1", "NH_Tja"),
  # Tier 3: Atmospheric modes
  c("SNAO", "EA", "EATL.WRUS", "AL", "OM", "AH"),
  # Tier 4: T Regional responses
  c("Tjja_Alps"),
  # Tier 5: P Regional responses
  c("Pamj_Alps"),
  # Tier 6: Flood variability
  c("Flood_F1")
)

# Generate blacklist to enforce:
# - No edges from climate variables to external forcings
# - No edges between TSI and NH_Volcanic

blacklist_tiers <- tiers2blacklist(tiers)

# Additional explicit forbidden relationships
blacklist_custom <- rbind(
  
  # Prevent TSI <-> NH_Volcanic influence
  data.frame(from = "TSI", to = "NH_Volcanic"),
  data.frame(from = "NH_Volcanic", to = "TSI"),
  
  # Prevent climate variables influencing forcings (redundant but explicit)
  expand.grid(from = c("EOF1", "SNAO", "EA", "EATL.WRUS","AL", "OM", "AH"),
              to = c("TSI", "NH_Volcanic"))
)

# Combine blacklists
full_blacklist <- unique(rbind(blacklist_tiers, blacklist_custom))
full_whitelist <- set2blacklist(c("TSI","EOF1"))

# Learn constrained network

# Initialise variables
n_iterations <- 100
stability_edges <- list()

for (i in 1:n_iterations) {
  # Timings were perturbed by introducing random shifts ranging from -20 to +20 years 
  perturbed_time <- original_times + runif(length(original_times), -20, 20)
  
  # To integrate the perturbed data with the climatic variables
  perturbed_data <- climate_data %>%
    mutate(Flood_F1 = ksmooth(1:length(perturbed_times), perturbed_times, kernel = "normal", bandwidth = 11)$y)
  
  pc_constrained <- boot.strength(
    perturbed_data, 
    algorithm = "pc.stable", 
    algorithm.args = list(test = "cor", alpha = 0.1, whitelist = full_whitelist, blacklist = full_blacklist),  # Adjust alpha for multiple testing
    R = 100  # Number of bootstrap replicates
  )
  
  avg_ct <- averaged.network(pc_constrained, threshold = attr(pc_boot, "threshold"))
  
  edges <- arcs(avg_ct)  
  
  for (edge in 1:nrow(edges)) {
    edge_name <- paste(edges[edge, "from"], "->", edges[edge, "to"])
    stability_edges[[edge_name]] <- ifelse(is.null(stability_edges[[edge_name]]), 0, stability_edges[[edge_name]]) + 1
  }
}

for (edge in names(stability_edges)) {
  stability <- stability_edges[[edge]] / n_iterations * 100
  cat(paste0(edge, ": ", round(stability, 1), "%\n"))
}

attr(pc_constrained, "threshold")

plot(pc_constrained)
abline(v = 0.75, col = "tomato", lty = 2, lwd = 2)
abline(v = 0.85, col = "steelblue", lty = 2, lwd = 2)

avg_ct <- averaged.network(pc_constrained, threshold = attr(pc_constrained, "threshold"))

par(mfrow=c(1,2))
graphviz.plot(avg_pc, main="Pre-Industrial DAG: without tiers", fontsize = 20)
graphviz.plot(avg_ct, main="Pre-Industrial: Causal DAG with tiers", fontsize = 20)

strength.plot(avg_pc, pc_boot, shape = "ellipse")
strength.plot(avg_ct, pc_constrained, shape = "ellipse")

# --------------------------
# 4. NONLINEAR & VALIDATION
# --------------------------

set.seed(123)
climate_data_ind <- ind[,2:13] ## Industrial period
original_times_ind <- ind$Flood_F1

# Variables initialitation
n_iterations <- 100
stability_edges <- list()

for (i in 1:n_iterations) {
  # Timings were perturbed by introducing random shifts ranging from -20 to +20 years
  perturbed_times_ind <- original_times_ind + runif(length(original_times_ind), -20, 20)
  
  # To integrate the perturbed data with the climatic variables
  perturbed_data_ind <- climate_data_ind %>%
    mutate(Flood_F1 = ksmooth(1:length(perturbed_times_ind), perturbed_times_ind, kernel = "normal", bandwidth = 11)$y)
  
  # Validate on industrial era (if sufficient data)
  if (nrow(ind) > 10) {
    fit <- bn.fit(avg_ct, perturbed_data_ind)
    pred_loglik <- logLik(fit, perturbed_data_ind)  # Compare likelihoods
    cat("Out-of-sample log-likelihood:", pred_loglik, "\n")
  }
}

# --------------------------
# 5. GRANGER CAUSALITY (TIME-SERIES)
# --------------------------

# Test if solar forcing (TSI) Granger-causes regional responses
granger_results <- list()
for (target in c("NH_Tja", "Tjja_Alps","Pamj_Alps","Flood_F1")) {
  test <- grangertest(
    as.formula(paste(target, "~ TSI")),
    order = 1,  # Adjust based on autocorrelation
    data = pre_ind
  )
  granger_results[[target]] <- test$`Pr(>F)`[2]
}
# Report significant relationships
kable(data.frame(
  Target = names(granger_results),
  p_value = unlist(granger_results)
), caption = "Granger Causality Results")

# Test if atmospheric variability (EOF1) Granger-causes regional responses
granger_results <- list()
for (target in c("NH_Tja", "Tjja_Alps","Pamj_Alps","Flood_F1")) {
  test <- grangertest(
    as.formula(paste(target, "~ EOF1")),
    order = 1,  # Adjust based on autocorrelation
    data = pre_ind
  )
  granger_results[[target]] <- test$`Pr(>F)`[2]
}
# Report significant relationships
kable(data.frame(
  Target = names(granger_results),
  p_value = unlist(granger_results)
), caption = "Granger Causality Results")

# Test if North Hemispheric Temperatura (NH_Tja) Granger-causes regional responses
granger_results <- list()
for (target in c("Tjja_Alps","Pamj_Alps","Flood_F1")) {
  test <- grangertest(
    as.formula(paste(target, "~ NH_Tja")),
    order = 2,  # Adjust based on autocorrelation
    data = pre_ind
  )
  granger_results[[target]] <- test$`Pr(>F)`[2]
}
# Report significant relationships
kable(data.frame(
  Target = names(granger_results),
  p_value = unlist(granger_results)
), caption = "Granger Causality Results")

# Test if Alpine Temperatura (Tjja_Alps) Granger-causes regional responses
granger_results <- list()
for (target in c("Pamj_Alps","Flood_F1")) {
  test <- grangertest(
    as.formula(paste(target, "~ Tjja_Alps")),
    ##order = 1,  # Adjust based on autocorrelation
    data = pre_ind
  )
  granger_results[[target]] <- test$`Pr(>F)`[2]
}
# Report significant relationships
kable(data.frame(
  Target = names(granger_results),
  p_value = unlist(granger_results)
), caption = "Granger Causality Results")

# Test if Alpine precipitation (Pamj_Alps) Granger-causes atmospheric modes
granger_results <- list()
for (target in c("Tjja_Alps","Flood_F1")) {
  test <- grangertest(
    as.formula(paste(target, "~ Pamj_Alps")),
    ##order = 1,  # Adjust based on autocorrelation
    data = pre_ind
  )
  granger_results[[target]] <- test$`Pr(>F)`[2]
}
# Report significant relationships
kable(data.frame(
  Target = names(granger_results),
  p_value = unlist(granger_results)
), caption = "Granger Causality Results")

# --------------------------
# 6. SENSITIVITY ANALYSIS
# --------------------------

set.seed(123)
climate_data_ind <- ind[,2:13]
original_times_ind <- ind$F1_composite

##climate_data <- climate_data[1:701,]

original_times_ind <- ind$Flood_F1

n_iterations <- 100
stability_edges <- list()

for (i in 1:n_iterations) {
  
  perturbed_times_ind <- original_times_ind + runif(length(original_times_ind), -20, 20)
  
  perturbed_data_ind <- climate_data_ind %>%
    mutate(Flood_F1 = ksmooth(1:length(perturbed_times_ind), perturbed_times_ind, kernel = "normal", bandwidth = 11)$y)
  
  pc_ind <- boot.strength(
    perturbed_data_ind,
    algorithm = "pc.stable",
    algorithm.args = list(test = "cor", alpha = 0.1, whitelist = full_whitelist, blacklist = full_blacklist),  
    R = 100  
  )
  avg_in <- averaged.network(pc_ind, threshold = attr(pc_ind, "threshold"))
  
  edges <- arcs(avg_in)
  
  for (edge in 1:nrow(edges)) {
    edge_name <- paste(edges[edge, "from"], "->", edges[edge, "to"])
    stability_edges[[edge_name]] <- ifelse(is.null(stability_edges[[edge_name]]), 0, stability_edges[[edge_name]]) + 1
  }
}

for (edge in names(stability_edges)) {
  stability <- stability_edges[[edge]] / n_iterations * 100
  cat(paste0(edge, ": ", round(stability, 1), "%\n"))
}

par(mfrow=c(1,1
            ))
attr(pc_ind, "threshold")
plot(pc_ind)
abline(v = 0.75, col = "tomato", lty = 2, lwd = 2)
abline(v = 0.85, col = "steelblue", lty = 2, lwd = 2)

avg_in <- averaged.network(pc_ind, threshold = attr(pc_ind, "threshold"))
avg_in

par(mfrow=c(1,2))

graphviz.plot(avg_in, main="Industrial: Causal DAG with tiers", fontsize = 20)
graphviz.plot(avg_ct, main="Pre-industrial: Causal DAG with tiers", fontsize = 20)

par(mfrow=c(1,2))
strength.plot(avg_in, pc_boot, shape = "ellipse")
strength.plot(avg_ct, pc_constrained, shape = "ellipse")

# Compare edge stability
compare_df <- data.frame(
  Edge = arcs(avg_ct),
  Original = TRUE
) %>%
  full_join(
    data.frame(Edge = arcs(avg_in), Perturbed = TRUE),
    by = "Edge.to"
  ) %>%
  tidyr::replace_na(list(Original = FALSE, Perturbed = FALSE))

kable(compare_df, caption = "Edge Stability Under Dating Uncertainty")

# --------------------------
# 7. MECHANISTIC INTERPRETATION
# --------------------------

# Check if key physical pathways exist in DAG
expected_edges <- c(
  "TSI -> Flood_F1",
  "TSI -> EOF1",
  "TSI -> Tjja_Alps",
  "TSI -> NH_Tja",
  "TSI -> Pamj_Alps",
  "TSI -> SNAO",
  "NH_Volcanic -> Tjja_Alps",
  "NH_Volcanic -> NH_Tja",
  "EOF1 -> Flood_F1",
  "EOF1 -> Tjja_Alps",
  "EOF1 ->Pamj_Alps",
  "Tjja_Alps -> Flood_F1",
  "NH_Tja -> Flood_F1",
  "Pamj_Alps -> Flood_F1"
)
cat("Key physical pathways present in DAG:\n")
for (edge in expected_edges) {
  exists <- edge %in% apply(arcs(avg_ct), 1, paste, collapse = " -> ")
  cat(edge, ":", exists, "\n")
}
cat("Key physical pathways present in DAG:\n")
for (edge in expected_edges) {
  exists <- edge %in% apply(arcs(avg_in), 1, paste, collapse = " -> ")
  cat(edge, ":", exists, "\n")
}

# --------------------------
# 8. REPORTING
# --------------------------

# Bootstrap with constraints

# View edges with confidence >50%
high_conf_edges <- pc_constrained %>%
  filter(strength > 0.41) %>%
  arrange(desc(strength))

kable(high_conf_edges, caption = "High-Confidence Edges")

# Plot constrained DAG with edge strengths
avg_ct_tidy <- averaged.network(high_conf_edges, threshold = 0.41)  # Use 50% confidence threshold

# Filter strong edges and remove self-loops
edge_conf <- high_conf_edges %>%
  filter(strength > 0.41, from != to) %>%
  rowwise() %>%
  mutate(pair = paste(sort(c(from, to)), collapse = "_")) %>%
  group_by(pair) %>%
  slice_max(direction, n = 1, with_ties = FALSE) %>%  # Mantener solo la dirección más fuerte
  ungroup() %>%
  select(from, to, strength, direction)

edge_conf <- edge_conf %>%
  filter(!(from == "EOF1" & to == "TSI")) %>% 
  bind_rows(data.frame(from = "TSI", to = "EOF1", strength = 1))  # Ajusta la fuerza según corresponda

dag_graph <- graph_from_data_frame(edge_conf, directed = TRUE)
if (!is_dag(dag_graph)) {
  dag_graph <- dag_graph %>% as.directed(mode = "acyclic")  # Forzar acíclico
}

# Create graph and ensure acyclicity
dag_graph <- graph_from_data_frame(edge_conf, directed = TRUE)
dag_graph <- simplify(dag_graph, remove.loops = TRUE)  # Ensure no loops

# Remove any possible cycles (DAGs must be acyclic)
if (!igraph::is_dag(dag_graph)) {
  dag_graph <- dag_graph %>%
    igraph::as_directed(mode = "mutual") %>%
    igraph::simplify(remove.multiple = TRUE, remove.loops = TRUE)
}

# Compute node importance
node_importance <- igraph::degree(dag_graph, mode = "all")
node_data <- data.frame(name = names(node_importance), size = node_importance)

# Convert to tidygraph format
tidy_dag <- as_tbl_graph(dag_graph) %>%
  left_join(node_data, by = "name")

tidy_dag <- tidy_dag %>% activate(edges) %>% mutate(strength = runif(n(), 0.3, 1))
tidy_dag %>% activate(edges) %>% as_tibble()
tidy_dag <- tidy_dag %>% activate(nodes) %>% mutate(size = runif(n(), 2, 8))

# Improved DAG Plot
# Plot DAG with bootstrap strengths using Rgraphviz

# Use averaged network from bootstrap
##avg_pc <- averaged.network(pc_boot, threshold = 0.3)

# Convert bnlearn object to tidygraph
avg_pc_tidy <- as_tbl_graph(tidy_dag) %>%
  activate(edges) %>%
  mutate(
    strength = sapply(seq_len(n()), function(i) {
      from_node <- .N()$name[from[i]]
      to_node <- .N()$name[to[i]]
      pc_constrained$strength[
        pc_constrained$from == from_node & pc_constrained$to == to_node
      ]
    })
  )

# Plot using ggraph

# Obtener nombres de los nodos
node_names <- tidy_dag %N>% pull(name)

# Filtrar los edges eliminando conexiones con SNAO, EA, EATL.WRUS
tidy_dag_filtered <- tidy_dag %>%
  activate(edges) %>%
  filter(!(node_names[from] %in% c("SNAO", "EA", "EATL.WRUS") |
             node_names[to] %in% c("SNAO", "EA", "EATL.WRUS"))) %>%
  activate(nodes) %>%
  filter(!(name %in% c("SNAO", "EA", "EATL.WRUS")))  # También eliminar los nodos

set.seed(123)

# Extraer valores únicos de 'strength' asegurando que esté en edges
edges_data <- tidy_dag_filtered %>% activate(edges) %>% as_tibble()

if (!"strength" %in% colnames(edges_data)) {
  stop("La columna 'strength' no existe en los enlaces. Verifica el nombre correcto.")
}

unique_strengths <- sort(unique(edges_data$strength))  
num_values <- length(unique_strengths)  

# Paleta de colores para valores exactos de strength
color_palette <- setNames(colorRampPalette(c("blue", "green", "red"))(num_values), unique_strengths)

set.seed(123)  # Para reproducibilidad

# Layout limpio y ordenado
layout_type <- "fr"  # Alternativas: "sugiyama", "kk", "fr", "lgl", "tree"

ggraph(tidy_dag_filtered, layout = layout_type) +
  geom_edge_link(aes(edge_alpha = strength, edge_width = strength, edge_color = strength),
                 arrow = arrow(type = "closed", angle = 20, length = unit(3, "mm")),
                 end_cap = circle(2, 'mm')) +
  geom_node_point(aes(size = size), color = "black", fill = "white", shape = 21, stroke = 0.6) +
  geom_node_text(aes(label = name), repel = TRUE, size = 3.5, family = "sans") +
  
  scale_edge_color_gradient2(
    low = "blue", mid = "yellow", high = "red", midpoint = median(edges_data$strength),
    name = "Edge Strength"
  ) +
  scale_edge_width(range = c(0.3, 2.5), guide = "none") +
  scale_edge_alpha(range = c(0.4, 1), guide = "none") +
  scale_size_continuous(range = c(2, 8), guide = "none") +
  
  theme_graph(base_size = 14, base_family = "sans") +
  theme(
    legend.position = "right",
    legend.title = element_text(size = 10),
    legend.text = element_text(size = 9),
    plot.title = element_text(hjust = 0, face = "bold"),
    plot.subtitle = element_text(hjust = 0, size = 11),
    panel.border = element_rect(colour = "gray40", fill = NA, linewidth = 0.5)
  ) +
  
  labs(
    title = "Causal Structure of Hydroclimatic Interactions",
    subtitle = "Directed Acyclic Graph with Bootstrap Edge Strengths"
  )

# Industrial area View edges with confidence >48%
high_conf_edges <- pc_ind %>%
  filter(strength > 0.48) %>%
  arrange(desc(strength))

kable(high_conf_edges, caption = "High-Confidence Edges")

# Plot constrained DAG with edge strengths
avg_ct_tidy <- averaged.network(high_conf_edges, threshold = 0.49)  # Use 50% confidence threshold

# Filter strong edges and remove self-loops
edge_conf <- high_conf_edges %>%
  filter(strength > 0.48, from != to) %>%
  rowwise() %>%
  mutate(pair = paste(sort(c(from, to)), collapse = "_")) %>%
  group_by(pair) %>%
  slice_max(direction, n = 1, with_ties = FALSE) %>%  # Mantener solo la dirección más fuerte
  ungroup() %>%
  select(from, to, strength, direction)

edge_conf <- edge_conf %>%
  filter(!(from == "EOF1" & to == "TSI")) %>% 
  bind_rows(data.frame(from = "TSI", to = "EOF1", strength = 1))  # Ajusta la fuerza según corresponda

dag_graph <- graph_from_data_frame(edge_conf, directed = TRUE)
if (!is_dag(dag_graph)) {
  dag_graph <- dag_graph %>% as.directed(mode = "acyclic")  # Forzar acíclico
}

# Create graph and ensure acyclicity
dag_graph <- graph_from_data_frame(edge_conf, directed = TRUE)
dag_graph <- simplify(dag_graph, remove.loops = TRUE)  # Ensure no loops

# Remove any possible cycles (DAGs must be acyclic)
if (!igraph::is_dag(dag_graph)) {
  dag_graph <- dag_graph %>%
    igraph::as_directed(mode = "mutual") %>%
    igraph::simplify(remove.multiple = TRUE, remove.loops = TRUE)
}

# Compute node importance
node_importance <- igraph::degree(dag_graph, mode = "all")
node_data <- data.frame(name = names(node_importance), size = node_importance)

# Convert to tidygraph format
tidy_dag <- as_tbl_graph(dag_graph) %>%
  left_join(node_data, by = "name")

tidy_dag <- tidy_dag %>% activate(edges) %>% mutate(strength = runif(n(), 0.3, 1))
tidy_dag %>% activate(edges) %>% as_tibble()
tidy_dag <- tidy_dag %>% activate(nodes) %>% mutate(size = runif(n(), 2, 8))

# Improved DAG Plot
# Plot DAG with bootstrap strengths using Rgraphviz

# Convert bnlearn object to tidygraph
avg_pc_tidy <- as_tbl_graph(tidy_dag) %>%
  activate(edges) %>%
  mutate(
    strength = sapply(seq_len(n()), function(i) {
      from_node <- .N()$name[from[i]]
      to_node <- .N()$name[to[i]]
      pc_constrained$strength[
        pc_constrained$from == from_node & pc_constrained$to == to_node
      ]
    })
  )

# Plot using ggraph

# extract Nodes
node_names <- tidy_dag %N>% pull(name)

set.seed(123)

# Extract edges
edges_data <- tidy_dag %>% activate(edges) %>% as_tibble()

if (!"strength" %in% colnames(edges_data)) {
  stop("La columna 'strength' no existe en los enlaces. Verifica el nombre correcto.")
}

unique_strengths <- sort(unique(edges_data$strength))  
num_values <- length(unique_strengths)  

# colors graph
color_palette <- setNames(colorRampPalette(c("blue", "green", "red"))(num_values), unique_strengths)

set.seed(123)  # For reproductibility

# Ordenated layout
layout_type <- "fr"  # Alternativas: "sugiyama", "kk", "fr", "lgl", "tree"

ggraph(tidy_dag, layout = layout_type) +
  geom_edge_link(aes(edge_alpha = strength, edge_width = strength, edge_color = strength),
                 arrow = arrow(type = "closed", angle = 20, length = unit(3, "mm")),
                 end_cap = circle(2, 'mm')) +
  geom_node_point(aes(size = size), color = "black", fill = "white", shape = 21, stroke = 0.6) +
  geom_node_text(aes(label = name), repel = TRUE, size = 3.5, family = "sans") +
  
  scale_edge_color_gradient2(
    low = "blue", mid = "yellow", high = "red", midpoint = median(edges_data$strength),
    name = "Edge Strength"
  ) +
  scale_edge_width(range = c(0.3, 2.5), guide = "none") +
  scale_edge_alpha(range = c(0.4, 1), guide = "none") +
  scale_size_continuous(range = c(2, 8), guide = "none") +
  
  theme_graph(base_size = 14, base_family = "sans") +
  theme(
    legend.position = "right",
    legend.title = element_text(size = 10),
    legend.text = element_text(size = 9),
    plot.title = element_text(hjust = 0, face = "bold"),
    plot.subtitle = element_text(hjust = 0, size = 11),
    panel.border = element_rect(colour = "gray40", fill = NA, linewidth = 0.5)
  ) +
  
  labs(
    title = "Causal Structure of Hydroclimatic Interactions",
    subtitle = "Directed Acyclic Graph with Bootstrap Edge Strengths"
  )

# Save bootstrap confidence for all edges
# Filter strong edges and remove self-loops
# Industrial area View edges with confidence >50%
high_conf_edges <- pc_constrained %>%
  filter(strength >= 0.1) %>%
  arrange(desc(strength))

edge_conf <- high_conf_edges %>%
  group_by(from, to) %>%
  summarise(Strength = mean(strength), .groups = "drop")