# Quantifying effects of peril dependency
# R-code 
# Copyright 2023 - CC4.0 BY John Hillier

# This script supports the paper
# 'Open R-code to communicate the impact of co-occurring natural hazards'
# Submitted to Geoscience Communciations by Hillier et al. (2023)

# Libraries
library(ggplot2)
library(data.table)
library(extRemes)
library(SimJoint)  # Package containing function to reorder data
library(psych)
library(copula)
library(VineCopula)
library(dplyr)

##############
# User guide #
##############

# Please note that this is working code
# It likely could be more elegantly written, but it has been robustly checked to work as it is.
# It has not be robustly debugged, however, for variations (e.g. change in input format)

# 1. - Section 0: Set the user input variables.  
#      The defaults given in [] are as for the previous work cited in the paper (i.e. the Bank Underground blog - Hillier et al (2023)) and explained there.
#      by default all 5 copula options are run
#            C1 - t-copula with 5 degrees of freedom (DoF can be changed below)
#            C2 - t-copula with 10 degrees of freedom (DoF can be changed below)
#            C3 - Gaussian copula
#            C4 - Rank swapping algorithm historically widely used in the insurance sector
#            C5 - Gumbel copula
#      A location for output files is needed ['Run1' in same directory as script]
#      Please add the locations of your input files, and check the format of them.
#
#      An option to scale the input data to match the relative sizes of losses for a firm
#      is coded in. Use the sample (made up) file provided if you don't want to edit the script
#
# 2. - Section 1 Data Preparation and Review
#      It is ASSUMED that you have cleaned and formatted data prior to using this script 
#      Format is given below, and data are in a text file with tab seperated columns (i.e. \t) 
#      Check that data are numeric (e.g. integer/float)
#      Data cleaning is done in Section 1.2: Fixes some NA values.
#      
#      Any years with no events are accounted for by adding single events with 
#      very small random loss and severity 
#
#      Section 1.3 - 1.6 produces a summary of the input data
#           Input_Events_metrics.txt
#           Input_Events_Hist.png
#           OEP_gross.png - 'Occurrence Exceedence Probability' curve
#           AEP_gross.png - 'Aggregate Exceedence Probability' curve
#      Section 1.7 - 1.8 produces
#           OEP_FLWS_default.png - Comparison of gross and net of reinsurance including esitmate of simulation uncertainty
#           AEP_FLWS_default.png
#
# 3. - Sections 2 & 3 induce the stated correlation in a variety of ways.
#      The main outputs are text files giving the impact of the correlation on losses
#           AEP_gross_impact.txt
#           AEP_net_impact.txt
#
# 4. - Section 4 as Sections 2 & 3, with scaling to a company's portfolio

### A few detailed notes
# Section 4.1 - 50-500 yrs RPs for 'expert judgement' hard coded as [3:7]
# To repeat for a company, only need to run 4.6 onwards


###########################
# 0 - User set quantities #
###########################

### Reinsurance

# Defaults [] illustrative / typical for UK - See Bank blog Hillier et al (2023)
# For what a limit, reinstatement and retention are, consider looking at
# https://www.wiley.com/en-gb/Natural+Catastrophe+Risk+Management+and+Modelling:+A+Practitioner%27s+Guide-p-9781118906071
# See Fig 2.6 on p57 of this textbook, and section 2.4.2.2
# The set-up is non-proportional, no coinsurance, 1 layer, with a number of reinstatements, per-event excess of loss.
# Strictly speaking, names are slightly misleading (legacy of initial coding, and difficult to change)
#                    'retention_mult' = top of retention 'i.e. attachment'
#                    'limit_RP' = max loss covered by reinsurance e.g. 'exhaustion'
reinstatements <- 1    # User choice, >=0 [1]. 
limit_RP <- 100           # Set as return period (RP) of gross event loss [100]
retention_mult <- 1.5       # Multiplier of AAL (i.e. expected loss) [1.5]

### Dependency
r_val <- 0.71          # Rank correlation value (used as rho is copulas) 
                       # [0.71] - 'high' case, [0.44] - 'low' case
g_param <- 2.13        # Note that Gumbel parameter has to be set separately
                       # [2.13] - 'high' case, [1.43] - 'low' case, 1.00 is equivalent to r = 0.0
Sim_Itt <- 20#5    # Number of repeats of input data simulate, resampling with replacement
                       # 100 iterations is most stable, but it takes a while
DoF <- 5               # Degrees of freedom for first t-copula [5]
DoF2 <- 10             # Of second t-copula [10]

# Option to fix the ratio of the sizes of the losses based on 
# Annual average loss (AAL)
# Make this negative to turn it off (i.e. not applied)
H1H2ratio <- -0.2      # [-0.2] For every unit AAL of H1, fix the number of units of H2
                       # e.g. 2 would give twice as much H2 as H1

Haz1_name <- "Flood"         # These are used in labelling for plots only
Haz2_name <- "Wind"

Input_yrs <- (228*50)       # Not used in the bulk of the code, but needed as 
                       # there might not be an event every year (i.e. to grab this from data)   

### Location of outputs from the analysis

locn <- "Run1"    # Make sure you have a folder called this in your working directory

### Input files

# Event losses
# Format: Tab delimited text
# $1 "Event" (i.e. ID of event), $2 "Year", $3 "Day", $4 "Insurable_loss" (e.g. ground up), $5 "Severity" (e.g. SSI)
# Use these column names, in this order - to be safe.
# Separate files for Hazard 1 and Hazard 2.
# Code doesn't care what the hazards are.
Data_in_H1_f <- "./Events_TS1_F_r1.txt"  # Hazard 1
Data_in_H2_f <- "./Events_TS1_S_r1.txt"  # Hazard 2   

# Selected return periods
RPs_select_f <- "./RPs_selected.txt"   # Use file provided. Debugged/tested for these RPs.

# 'Company' curves for independently calculated hazards
# This allows for non-UK wide losses, which might have different ratio of flood to wind, for example.
# RPs, Hazard 1 gross loss, Hazard 2 gross loss,
Company_H1H2_f <- "./CompanyX_format_losses_ALL_numbers_changed.txt"  #Company Example

### Output files

# 1 - Statistics about the input events
outfile1 <- sprintf("./%s/Input_Events_metrics.txt",locn)   
file.create(outfile1)
# 2 - Histograms of input events
outfile2 <- sprintf("./%s/Input_Events_Hist.png",locn)   # Names sets file type
file.create(outfile2)
# 3 - OEP (gross loss) - individual hazards
outfile3 <- sprintf("./%s/OEP_gross.png",locn)   # Names sets file type
file.create(outfile3)
# 4 - AEP (gross loss) - individual hazards
outfile4 <- sprintf("./%s/AEP_gross.png",locn)   # Names sets file type
file.create(outfile4)
# 5- Gross/Net loss - OEP hazards combined - default correlation
outfile5 <- sprintf("./%s/OEP_FLWS_default.png",locn)   # Names sets file type
file.create(outfile5)
# 6- - Gross/Net loss - hazards combined - default correlation
outfile6 <- sprintf("./%s/AEP_FLWS_default.png",locn)   # Names sets file type
file.create(outfile6)
# 7 - AEP Gross - influence of correlation 1 (C1)
outfile7 <- sprintf("./%s/AEP_gross_C1.png",locn)   # Names sets file type
file.create(outfile7)
# 8- - AEP net - influence of correlation 1 (C1)
outfile8 <- sprintf("./%s/AEP_net_C1.png",locn)   # Names sets file type
file.create(outfile8)
# 9 - AEP Gross - influence of correlation 2 (C2)
outfile9 <- sprintf("./%s/AEP_gross_C2.png",locn)   # Names sets file type
file.create(outfile9)
# 10 - AEP net - influence of correlation 2 (C2)
outfile10 <- sprintf("./%s/AEP_net_C2.png",locn)   # Names sets file type
file.create(outfile10)
# 11 - AEP Gross - influence of correlation 3 (C3)
outfile11 <- sprintf("./%s/AEP_gross_C3.png",locn)   # Names sets file type
file.create(outfile11)
# 12 - AEP net - influence of correlation 3 (C3)
outfile12 <- sprintf("./%s/AEP_net_C3.png",locn)   # Names sets file type
file.create(outfile12)
# 13 - AEP Gross - influence of correlation 4 (C4)
outfile13 <- sprintf("./%s/AEP_gross_C4.png",locn)   # Names sets file type
file.create(outfile13)
# 14 - AEP net - influence of correlation 4 (C4)
outfile14 <- sprintf("./%s/AEP_net_C4.png",locn)   # Names sets file type
file.create(outfile14)
# 15,16 - Output files of percentage difference
outfile15 <- sprintf("./%s/AEP_gross_impact.txt",locn)  
file.create(outfile15)
outfile16 <- sprintf("./%s/AEP_net_impact.txt",locn)  
file.create(outfile16)
# 17 - AEP Gross - influence of correlation 5 (C5)
outfile17 <- sprintf("./%s/AEP_gross_C5.png",locn)   # Names sets file type
file.create(outfile17)
# 18 - AEP net - influence of correlation 5 (C5)
outfile18 <- sprintf("./%s/AEP_net_C5.png",locn)   # Names sets file type
file.create(outfile18)
# 19 - Equivalent fits (hazard and loss)
outfile19 <- sprintf("./%s/Equiv_Fits.txt",locn)   
file.create(outfile19)

# For the selected company (i.e. _c) notation
# 20 - AEP Gross - influence of correlation 6 (C6)
outfile20 <- sprintf("./%s/AEP_gross_C6.png",locn)   # Names sets file type
file.create(outfile20)
# 21 - AEP net - influence of correlation 6 (C6)
outfile21 <- sprintf("./%s/AEP_net_C6.png",locn)   # Names sets file type
file.create(outfile21)
# 22,23 - Output files of percentage difference
outfile22 <- sprintf("./%s/AEP_gross_impact_Company.txt",locn)  
file.create(outfile22)
outfile23 <- sprintf("./%s/AEP_net_impact_Company.txt",locn)  
file.create(outfile23)

##############
# FUNCTIONS  #
##############

# F1 - Combine data frames
# Put in two data frames (args 1&2) Data_H1_df,Data_H2_df containing 
# Year and Insurable_Loss columns
# Get out Reins_df (arg3) that has only these two, combined
F_Haz_comb <- function(df1_in, df2_in) {
  df_out <- df1_in[,c("Year","Insurable_Loss")]
  length(df1_in[,1])
  start_line <- length(df1_in[,1])
  end_line <- (length(df1_in[,1])+length(df2_in[,1])) -1
  df_out[start_line:end_line,] <- df2_in[,c("Year","Insurable_Loss")]
  # Sorted in descending order within the year
  df_out <- df_out[order(df_out$Year,-df_out$Insurable_Loss),]
  return(df_out)
}

# F2 - Create a gross AEP curve
# Input has to include Year, Insurable Loss
F_AEP <- function(Data_df){
  AEP_df <- aggregate(Insurable_Loss ~ Year, Data_df, function(x) sum(x))
  # Sort in descending order
  AEP_df <- AEP_df[order(-AEP_df$Insurable_Loss),]
  # Add yearly exceedence probability (i.e. rate)
  AEP_df$rate <- seq.int(nrow(AEP_df))/max(AEP_df$Year)
  # Add return period
  AEP_df$RP <- 1/AEP_df$rate
  return(AEP_df)
}

# F2b - Create a net AEP curve
# Input has to include Year, Retained
F_AEP_net <- function(Data_df){
  AEP_df <- aggregate(Retained ~ Year, Data_df, function(x) sum(x))
  # Sort in descending order
  AEP_df <- AEP_df[order(-AEP_df$Retained),]
  # Add yearly exceedence probability (i.e. rate)
  AEP_df$rate <- seq.int(nrow(AEP_df))/max(AEP_df$Year)
  # Add return period
  AEP_df$RP <- 1/AEP_df$rate
  return(AEP_df)
}

# F3 - Create a gross OEP curve
# Input has to include Year, Insurable Loss
F_OEP <- function(Data_df){
  OEP_df <- Data_df[,c("Year","Insurable_Loss")]
  # Sort in descending order
  OEP_df <- OEP_df[order(-OEP_df$Insurable_Loss),]
  # Add yearly exceedence probability (i.e. rate)
  OEP_df$rate <- seq.int(nrow(OEP_df))/max(OEP_df$Year)
  # Add return period
  OEP_df$RP <- 1/OEP_df$rate
  return(OEP_df)
}


# F3b - Create a net OEP curve
# Input has to include Year, Retained
F_OEP_net <- function(Data_df){
  OEP_df <- Data_df[,c("Year","Retained")]
  # Sort in descending order
  OEP_df <- OEP_df[order(-OEP_df$Retained),]
  # Add yearly exceedence probability (i.e. rate)
  OEP_df$rate <- seq.int(nrow(OEP_df))/max(OEP_df$Year)
  # Add return period
  OEP_df$RP <- 1/OEP_df$rate
  return(OEP_df)
}

# F4 - Apply reinsurance
# arg1 = $1 Year $2 Insurable_Loss as input
# arg2 = retention_mult (single value)
# arg3 = limit_RP(single value)
# arg4 = OEP for the combined list of the two hazards
F_ReIns_Apply <- function(Reins_df, retention_mult, limit_RP,OEP_df) {
  # Find retention
  AAL <- sum(Reins_df$Insurable_Loss)/max(Reins_df$Year) 
  retention <- AAL*retention_mult
  retention[]
  # Find limit - relating to a return period, nearest value
  Find_lim_temp <- OEP_df
  Find_lim_temp$RPdiff <- sqrt((Find_lim_temp$RP - limit_RP)^2)
  Find_lim_temp <- Find_lim_temp[order(Find_lim_temp$RPdiff),]
  limit <- Find_lim_temp[1,"Insurable_Loss"]
  limit[]
  # Apply limit to events - need to get rank within a year
  # https://stackoverflow.com/questions/57355570/create-counter-in-dataframe-that-gets-reset-based-on-changes-in-value-or-new-id
  # Various solutions, but I've used the r base one
  # The solution supplied resets for 2 columns
  #Reins_df$response <- 1
  #setDT(Reins_df)[, counter := seq_len(.N), by = .(Year, rleid(response))]
  # Simplified to one
  setDT(Reins_df)[, rank := seq_len(.N), by = .(rleid(Year))]
  
  # And, apply the simple reinsurance by event.
  # Long-winded, but can condense the code later.
  retention[] #~3 billion
  limit[]     #~9 billion
  Reins_df$Re_used <- ifelse(Reins_df$rank <= (reinstatements +1) & Reins_df$Insurable_Loss > retention,1,0)
  Reins_df$Re_overtopped <- ifelse(Reins_df$rank <= (reinstatements +1) & Reins_df$Insurable_Loss > limit,1,0)
  Reins_df$Retained <- ifelse(Reins_df$Re_used == 0, Reins_df$Insurable_Loss, retention)
  Reins_df$Retained <- ifelse(Reins_df$Re_overtopped == 0, Reins_df$Retained, Reins_df$Insurable_Loss - (limit-retention))
  #temp_2 <- subset(Reins_df,Reins_df$Re_used == 1) #.... used to check that working correctly.
  
  return(Reins_df)  
}

# F5 - Prepare the data frames so that they can be joined by simulated ranks
F_Join_Prep <- function(Data_df, H_num) {
  #Aggregate
  Agg_H_df <- aggregate(cbind(Insurable_Loss,Severity) ~ Year, Data_df, function(x) sum(x))
  #Fill holes
  Agg_H_df <- merge(x = Yrs_df, y = Agg_H_df, by = "Year", all.x = TRUE)
  Agg_H_df[is.na(Agg_H_df)] <- 0
  #Add very small random number to severity to avoid non-unique ranks
  Agg_H_df$rand <- runif(nrow(Agg_H_df),0,0.0001)
  Agg_H_df$Severity <- Agg_H_df$Severity + Agg_H_df$rand
  Agg_H_df$rand <- NULL
  #Rank - high gives high rank
  Agg_H_df$rank <- rank(Agg_H_df$Severity)
  #Rename
  colnames(Agg_H_df)[match("Year",names(Agg_H_df))] <- c(sprintf("Yr_%s",H_num))
  colnames(Agg_H_df)[match("Severity",names(Agg_H_df))] <- c(sprintf("Sev_%s",H_num))
  colnames(Agg_H_df)[match("Insurable_Loss",names(Agg_H_df))] <- c(sprintf("Loss_%s",H_num))
  colnames(Agg_H_df)[match("rank",names(Agg_H_df))] <- c(sprintf("rank_%s",H_num))
  
  return(Agg_H_df)
}

# F6 - Join by pre-simulated ranks
F_Join <- function(x2,Agg_H1H2_df){
  ## Calculate ranks of the simulated data (whatever means e.g. copula) in x2
  colnames(x2)[1] <- "Sim1"
  colnames(x2)[2] <- "Sim2"
  # Simulated data
  x2$rank_H1 <- rank(x2$Sim1)
  x2$rank_H2 <- rank(x2$Sim2)
  x2$row <- seq.int(nrow(x2)) 
  
  ## Join the observed data to the simulated ranks
  # https://stackoverflow.com/questions/1299871/how-to-join-merge-data-frames-inner-outer-left-right
  # merge(x = df1, y = df2, by = "CustomerId", all.x = TRUE)
  # left outer join in sql parlence
  
  # Do it really step-by-step! (i.e. this could be made much slicker)
  # Join by rank
  # Hazard 1 first, join by rank, then order by row from x2 to get list in same order as x2
  H1_ranked <- merge(x = x2, y = Agg_H1H2_df, by = "rank_H1", all.x = TRUE)
  temp_1 <- H1_ranked[order(H1_ranked$row),]
  # Hazard 2
  H2_ranked <- merge(x = x2, y = Agg_H1H2_df, by = "rank_H2", all.x = TRUE)
  temp_2 <- H2_ranked[order(H2_ranked$row),]
  # And select relevant columns in to df x3
  x3 <- x2
  x3[,c("Yr_H1","Loss_H1","Sev_H1")] <- temp_1[,c("Yr_H1","Loss_H1","Sev_H1")]  
  x3[,c("Yr_H2","Loss_H2","Sev_H2")] <- temp_2[,c("Yr_H2","Loss_H2","Sev_H2")] 
  
  return(x3)
}

# F7 - EP mean and standard error
# Assumes Year, Rate, RP, then some columns of simulation
# as specified by 'Itt'
F_EP_mean <- function(EP_in_df){
  temp <- EP_in_df[,5:ncol(EP_in_df)]
  EP_in_df[,"Loss"] <- rowMeans(temp)
  EP_in_df[,"Stderr"] <- apply(temp,1,sd)/sqrt(Itt)
  EP_in_df[,"MinE"] <- EP_in_df$Loss - (2*EP_in_df$Stderr)
  EP_in_df[,"MaxE"] <- EP_in_df$Loss + (2*EP_in_df$Stderr)
  return(EP_in_df)
}

#F8 - Read selected RPs from an EP Curve
F_EP_summary <- function(EP_approx_df,RPs_select_df){
  # Set up df for data
  EP_summary_df <- as.data.frame(RPs_select_df[,1])
  colnames(EP_summary_df)[1] <- "RP"
  
  # Mean and stddev
  EP_summary_df[1,2] <- mean(EP_approx_df$Loss)
  EP_summary_df[2,2] <- sd(EP_approx_df$Loss)
  colnames(EP_summary_df)[2] <- "Loss"
  # Get the numbered RPs
  EP_num_df <- as.data.frame(RPs_select_df[3:(length(RPs_select_df$V1)),1])
  #Loss
  EP_num_df$Loss <- data.frame(approx(x= EP_approx_df$RP, y = EP_approx_df$Loss, xout = EP_num_df$RP))[,2]
  #Stdev
  EP_num_df$Stderr <- data.frame(approx(x= EP_approx_df$RP, y = EP_approx_df$Stderr, xout = EP_num_df$RP))[,2]
  # And send to df
  EP_summary_df[3:(length(RPs_select_df$V1)),2] <- EP_num_df$Loss
  EP_summary_df[3:(length(RPs_select_df$V1)),3] <- EP_num_df$Stderr
  colnames(EP_summary_df)[3] <- "Stderr"
  
  return(EP_summary_df)
}

# F9 - Equivalent parameters for loss distribution
# Files for hazards 1 and 2, aggregated by year
# name of the output file
# number of the family of copula to fit

F_Equiv_fit <- function(Data_H1_df, Reorder_H2_df, outfile19,cop_num) 
{
  # Hazard 1 - Data_H1_df
  # Inputs
  H_num <- "H1"
  Data_df <- Data_H1_df
  # Function
  Agg_H_df <- F_Join_Prep(Data_df, H_num)
  # And, save 
  Agg_H1match_df <- Agg_H_df
  
  # Hazard 2 - reordered years Reorder_H2_df
  # Inputs
  H_num <- "H2"
  Data_df <- Reorder_H2_df
  # Function
  Agg_H_df <- F_Join_Prep(Data_df, H_num)
  # And, save 
  Agg_H2match_df <- Agg_H_df
  
  # Join these two
  Agg_H1H2match_df <- Agg_H1match_df
  Agg_H1H2match_df[,5:8] <- Agg_H2match_df[,c("Yr_H2","Loss_H2","Sev_H2","rank_H2")]
  
  rs_Haz <- cor(x = Agg_H1H2match_df$Sev_H1, y = Agg_H1H2match_df$Sev_H2, method="spearman")
  rs_Loss <- cor(x = Agg_H1H2match_df$Loss_H1, y = Agg_H1H2match_df$Loss_H2, method="spearman")
  rs_Rank <- cor(x = Agg_H1H2match_df$rank_H1, y = Agg_H1H2match_df$rank_H2, method="spearman")
  
  # Output to file
  write(sprintf("%s: %.3f  %.3f  %.3f","Spearman", rs_Haz, rs_Loss, rs_Rank), file=outfile19, append=T)
  
  # Fit copula
  # Severity
  u <- pobs(as.matrix(cbind(Agg_H1H2match_df$Sev_H1, Agg_H1H2match_df$Sev_H2)))[,1]   # wind
  v <- pobs(as.matrix(cbind(Agg_H1H2match_df$Sev_H1, Agg_H1H2match_df$Sev_H2)))[,2]   # rain
  selectedCopula <- BiCopSelect(u,v,familyset=cop_num)  # force to t-copula (numbers in man page for function)
  write(sprintf("%s: %.3f  %.3f  %.3f","Haz_fit", selectedCopula$par, selectedCopula$par2, selectedCopula$tau), file=outfile19, append=T)
  # Loss
  u <- pobs(as.matrix(cbind(Agg_H1H2match_df$Loss_H1, Agg_H1H2match_df$Loss_H2)))[,1]   # wind
  v <- pobs(as.matrix(cbind(Agg_H1H2match_df$Loss_H1, Agg_H1H2match_df$Loss_H2)))[,2]   # rain
  selectedCopula <- BiCopSelect(u,v,familyset=cop_num)  # force to t-copula (numbers in man page for function)
  write(sprintf("%s: %.3f  %.3f  %.3f","Sev_fit", selectedCopula$par, selectedCopula$par2, selectedCopula$tau), file=outfile19, append=T)
  
}


#######################################################
# 1 - 'Default' data - as provided - Prep. and Review #
#######################################################

### 1.1. Read in Event data & Key RPs

Data_H1_df <- as.data.frame(read.table(Data_in_H1_f,header=T,sep = "\t",na.strings=NaN, stringsAsFactors=F))
Data_H2_df <- as.data.frame(read.table(Data_in_H2_f,header=T,sep = "\t",na.strings=NaN, stringsAsFactors=F))

RPs_select_df <- as.data.frame(read.table(RPs_select_f,header=F,sep = "\t",na.strings=NaN, stringsAsFactors=F))

### 1.2 Clean Event data, and rescale if desired

# *************************************************************** # 
# It is ASSUMED that you have done this prior to using this script
# but there are some fixes here
# *************************************************************** # 

# See format near Input data at top of script 

# If any loss is NA, insurable loss in read in as a character string
# Change this
Data_H1_df$Insurable_Loss <- as.numeric(Data_H1_df$Insurable_Loss)
Data_H2_df$Insurable_Loss <- as.numeric(Data_H2_df$Insurable_Loss)

# Replace NA with very small random loss
# H1
Data_H1_df$RND <- runif(length(Data_H1_df[,1]),0,0.01)
Data_H1_df$Insurable_Loss <- ifelse(is.na(Data_H1_df$Insurable_Loss),Data_H1_df$RND,Data_H1_df$Insurable_Loss)
Data_H1_df$RND <- NULL
# H2
Data_H2_df$RND <- runif(length(Data_H2_df[,1]),0,0.01)
Data_H2_df$Insurable_Loss <- ifelse(is.na(Data_H2_df$Insurable_Loss),Data_H2_df$RND,Data_H2_df$Insurable_Loss)
Data_H2_df$RND <- NULL
   
# Force a ratio between the hazards if H1H2ratio is a positive number
# The forcing is a multiplier based on AAL
# If H1H2ratio no correction is applied.
if (H1H2ratio >= 0) {
  yrsH1 <- Input_yrs
  AAL_H1 <- sum(Data_H1_df$Insurable_Loss)/yrsH1
  AAL_H1[]
  yrsH2 <- Input_yrs
  AAL_H2 <- sum(Data_H2_df$Insurable_Loss)/yrsH2
  AAL_H2[]
  Mult_current <- AAL_H2/AAL_H1
  Mult_current[]
  fix_ratio <- H1H2ratio/Mult_current
  Data_H2_df$Insurable_Loss <- Data_H2_df$Insurable_Loss*fix_ratio
}
# And, cross-check to see if it has been changed or not.
yrsH1 <- Input_yrs
AAL_H1 <- sum(Data_H1_df$Insurable_Loss)/yrsH1
AAL_H1[]
yrsH2 <- Input_yrs
AAL_H2 <- sum(Data_H2_df$Insurable_Loss)/yrsH2
AAL_H2[]
Mult_current <- AAL_H2/AAL_H1
Mult_current[]


### 1.3 Summary of Events 
# An initial evaluation of the data

# Hazard 1
Summarise_df <- Data_H1_df
Key_stats <- summary(Summarise_df$Insurable_Loss[!is.na(Summarise_df$Insurable_Loss)])
# Output to file
write(sprintf("Hazard 1: %s",Haz1_name), file=outfile1, append=T)
write(sprintf("Simulation Yrs %d to %d",min(Summarise_df$Year), max(Summarise_df$Year)), file=outfile1, append=T)
write(sprintf("%d Years with >=1 event",length(unique(Summarise_df$Year)) ), file=outfile1, append=T)
write(sprintf("%d Unique event IDs (should be 1 per row = %d)",length(unique(Summarise_df$Event)),length(Summarise_df$Event)   ), file=outfile1, append=T)
write("Min.     1st Qu.      Median        Mean     3rd Qu.        Max. ", file=outfile1, append=T)
write(Key_stats, file=outfile1, append=T)
Summarise_df$Insurable_Loss <- ifelse(Summarise_df$Insurable_Loss <= 0, 10, Summarise_df$Insurable_Loss)
write(sprintf("LogNormal: Mean %f, stddev %f of log_e(Loss)",mean(log(Summarise_df$Insurable_Loss)),sd(log(Summarise_df$Insurable_Loss))   ), file=outfile1, append=T)

# Hazard 2
Summarise_df <- Data_H2_df
Key_stats <- summary(Summarise_df$Insurable_Loss[!is.na(Summarise_df$Insurable_Loss)])
# Output to file
write(sprintf("\nHazard 2: %s",Haz2_name), file=outfile1, append=T)
write(sprintf("Simulation Yrs %d to %d",min(Summarise_df$Year), max(Summarise_df$Year)), file=outfile1, append=T)
write(sprintf("%d Years with >=1 event",length(unique(Summarise_df$Year)) ), file=outfile1, append=T)
write(sprintf("%d Unique event IDs (should be 1 per row = %d)",length(unique(Summarise_df$Event)),length(Summarise_df$Event)   ), file=outfile1, append=T)
write("Min.     1st Qu.      Median        Mean     3rd Qu.        Max. ", file=outfile1, append=T)
write(Key_stats, file=outfile1, append=T)
Summarise_df$Insurable_Loss <- ifelse(Summarise_df$Insurable_Loss <= 0, 10, Summarise_df$Insurable_Loss)
write(sprintf("LogNormal: Mean %f, stddev %f of log_e(Loss)",mean(log(Summarise_df$Insurable_Loss)),sd(log(Summarise_df$Insurable_Loss))   ), file=outfile1, append=T)


### 1.4 pdfs of Event losses
# Produces Figures to examine and sanity-check the data 

plot1 <- ggplot(data=Data_H1_df, aes(log(Insurable_Loss))) + 
  #geom_histogram(aes(y =..density.., 
                     #alpha=0.2)) +
  geom_density(col="blue", size=2) + 
  #geom_histogram(data=Data_H2_df,
                #aes(log(Insurable_Loss), y =..density.., alpha=0.2)) +
  geom_density(data=Data_H2_df,
                col=2, size=2,
                aes(log(Insurable_Loss), y =..density..)) +
  labs(title=sprintf("%s(blue) and %s(red)",Haz1_name,Haz2_name), x="log(Loss)", y="prob. density")
plot1

ggsave(
  outfile2,    # easiest to use file name to set device
  plot = plot1,  # default of last_plot()
  #device = NULL,   # default NULL
  #path = NULL,
  #scale = 1,
  width = NA,        #NA
  height = NA,       # NA
  units = c("cm"),   #c("in", "cm", "mm", "px")
  #dpi = 300,
  #limitsize = TRUE,
  #bg = NULL,
  #...
)

### 1.5 OEP curves - Gross

# Hazard 1
# Create OEP curve - in this case gross input
OEP_df <- F_OEP(Data_H1_df)
OEP_H1_df <- OEP_df

# Hazard 2
OEP_df <- F_OEP(Data_H2_df)
OEP_H2_df <- OEP_df

# Plot the first 500 years RP
OEP_H1_df_plot <- subset(OEP_H1_df, OEP_H1_df$RP < 500)
OEP_H2_df_plot <- subset(OEP_H2_df, OEP_H2_df$RP < 500)
  
plot2 <- ggplot(data=OEP_H1_df_plot, aes(x = RP, y = (Insurable_Loss/1000000000))) + 
  geom_line(col="blue", size=2) + 
  geom_line(data=OEP_H2_df_plot,
            col=2, size=2,
            aes(x = RP, y = (Insurable_Loss/1000000000))) +
  labs(title=sprintf("OEP - %s(blue) and %s(red)",Haz1_name,Haz2_name), x="Return Period (yrs)", y="Loss (GBP billion)")
plot2

ggsave(
  outfile3,    # easiest to use file name to set device
  plot = plot2,  # default of last_plot()
)

# 1.6 AEP curves - Gross

# Hazard 1

# Create AEP curve
AEP_df <- F_AEP(Data_H1_df)
AEP_H1_df <- AEP_df

# Hazard 2
# Create AEP curve - in this case gross input
AEP_df <- F_AEP(Data_H2_df)
AEP_H2_df <- AEP_df


# Plot the first 500 years RP
AEP_H1_df_plot <- subset(AEP_H1_df, AEP_H1_df$RP < 500)
AEP_H2_df_plot <- subset(AEP_H2_df, AEP_H2_df$RP < 500)

plot3 <- ggplot(data=AEP_H1_df_plot, aes(x = RP, y = (Insurable_Loss/1000000000))) + 
  geom_line(col="blue", size=2) + 
  geom_line(data=AEP_H2_df_plot,
            col=2, size=2,
            aes(x = RP, y = (Insurable_Loss/1000000000))) +
  labs(title=sprintf("AEP - %s(blue) and %s(red)",Haz1_name,Haz2_name), x="Return Period (yrs)", y="Loss (GBP billion)")
plot3

ggsave(
  outfile4,    # easiest to use file name to set device
  plot = plot3,  # default of last_plot()
)


# 1.7 Apply illustrative reinsurance at Event Level - input case (i.e. whatever dependency is in the input data). Will need to re-applied for different levels of correlation.

# Group the two hazards
Reins_df <- F_Haz_comb(Data_H1_df,Data_H2_df)
# Create AEP curve - in this case gross input
AEP_df <- F_AEP(Reins_df)
# Create OEP curve - in this case gross input
OEP_df <- F_OEP(Reins_df)
# Apply reinsurance
Reins_Default_df <- F_ReIns_Apply(Reins_df, retention_mult, limit_RP,OEP_df)

#OEP and AEP, have a look at the combined gross and net of reinsurance

# OEP first
# Gross
OEP_df <- F_OEP(Reins_Default_df)
OEP_Default_gross_df <- OEP_df
# net - just if due to simple reinsurance structure
OEP_df <- F_OEP_net(Reins_Default_df)
OEP_Default_net_df <- OEP_df

# AEP Second
# Gross
AEP_df <- F_AEP(Reins_Default_df)
AEP_Default_gross_df <- AEP_df

# net
AEP_df <- F_AEP_net(Reins_Default_df)
AEP_Default_net_df <- AEP_df


# Plot the first 500 years RP -OEP 
OEP_Default_gross_df_plot <- subset(OEP_Default_gross_df, OEP_Default_gross_df$RP < 500)
OEP_Default_net_df_plot <- subset(OEP_Default_net_df, OEP_Default_net_df$RP < 500)

plot4 <- ggplot(data=OEP_Default_gross_df_plot, aes(x = RP, y = (Insurable_Loss/1000000000))) + 
  geom_line(col="blue", size=2) + 
  geom_line(data=OEP_Default_net_df_plot,
            col=2, size=2,
            aes(x = RP, y = (Retained/1000000000))) +
  labs(title=sprintf("OEP - %s(blue) and %s(red)","gross","net"), x="Return Period (yrs)", y="Loss (? billion)")
plot4

# Plot the first 500 years RP - AEP
AEP_Default_gross_df_plot <- subset(AEP_Default_gross_df, AEP_Default_gross_df$RP < 500)
AEP_Default_net_df_plot <- subset(AEP_Default_net_df, AEP_Default_net_df$RP < 500)

plot5 <- ggplot(data=AEP_Default_gross_df_plot, aes(x = RP, y = (Insurable_Loss/1000000000))) + 
  geom_line(col="blue", size=2) + 
  geom_line(data=AEP_Default_net_df_plot,
            col=2, size=2,
            aes(x = RP, y = (Retained/1000000000))) +
  labs(title=sprintf("AEP - %s(blue) and %s(red)","gross","net"), x="Return Period (yrs)", y="Loss (? billion)")
plot5


# 1.8 - With resampling of the original data to reduce simulation uncertainty

#https://stackoverflow.com/questions/8273313/sample-random-rows-in-dataframe
# sample_n(df, 10)
#Data_H1_resamp_df <- sample_n(Data_H1_df, length(df[,1]),replace = TRUE)

# Resampling per-event (see Cor_Tool_1.8.R) does something to the distribution of larger events per year despite the overall number of events >£Xbn being nearly the same, and the AAL being very similar in the simulations.
# Pragmatically, defaulted to resampling per year, and have to live with OEP net being illustrative.

# Go straight from, and add to the plotting files to resample

## Simulation parameters
Itt <- 1

OEPg_df <- OEP_Default_gross_df 
OEPn_df <- OEP_Default_net_df
AEPg_df <- AEP_Default_gross_df 
AEPn_df <- AEP_Default_net_df

while (Itt <= Sim_Itt) {
  col_name <- sprintf("Sim%s",Itt)
  # Resample
  OEPg_df[,col_name] <- sample(OEPg_df$Insurable_Loss, nrow(OEPg_df),replace = TRUE)
  OEPn_df[,col_name] <- sample(OEPn_df$Retained, nrow(OEPn_df),replace = TRUE)
  AEPg_df[,col_name] <- sample(AEPg_df$Insurable_Loss, nrow(AEPg_df),replace = TRUE)
  AEPn_df[,col_name] <- sample(AEPn_df$Retained, nrow(AEPn_df),replace = TRUE)
  # Sort in descending order
  col_sort_num <- which( colnames(OEPg_df)==col_name)    # This is a hack to get around handling of a large df I think
  
  temp <- as.data.frame(OEPg_df[,..col_sort_num])        # No idea what the addition of '..' means, but suggested by error message. Works.
    OEPg_df[,col_name] <- temp[order(-temp[,1]),]
  temp <- as.data.frame(OEPn_df[,..col_sort_num])
    OEPn_df[,col_name] <- temp[order(-temp[,1]),]
    
  temp <- as.data.frame(AEPg_df[,col_name])
    AEPg_df[,col_name] <- temp[order(-temp[,1]),]
  temp <- as.data.frame(AEPn_df[,col_name])
    AEPn_df[,col_name] <- temp[order(-temp[,1]),]
  Itt = Itt +1
} 

#Means and stddev of rows - self-defined function - F7
OEPg_df <- F_EP_mean(OEPg_df)
OEPn_df <- F_EP_mean(OEPn_df)
AEPg_df <- F_EP_mean(AEPg_df)
AEPn_df <- F_EP_mean(AEPn_df)

# Rename dfs for storage
OEP_Default_gross_df <- OEPg_df
OEP_Default_net_df <- OEPn_df
AEP_Default_gross_df <- AEPg_df
AEP_Default_net_df <- AEPn_df


# Plot the first 500 years RP -OEP 
OEP_Default_gross_df_plot <- subset(OEP_Default_gross_df, OEP_Default_gross_df$RP < 500)
OEP_Default_net_df_plot <- subset(OEP_Default_net_df, OEP_Default_net_df$RP < 500)

plot6 <- ggplot(data=OEP_Default_gross_df_plot, aes(x = RP, y = (Insurable_Loss/1000000000))) + 
  # Original - OEP
  geom_line(col="blue", size=2, linetype="dashed") + 
  # Stderr
  geom_ribbon (aes(ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3), fill = "blue") +
  # Mean
  geom_line(data=OEP_Default_gross_df_plot,
            col="blue", size=2,
            aes(x = RP, y = (Loss/1000000000))) +

  # Original - AEP
  geom_line(data=OEP_Default_net_df_plot,
            col=2, size=2, linetype="dashed",
            aes(x = RP, y = (Retained/1000000000))) +
  # Stderr
  geom_ribbon (data=OEP_Default_net_df_plot,
                aes(y = (Retained/1000000000), ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3, fill = "red")) +
  # Mean
  geom_line(data=OEP_Default_net_df_plot, col="red", size=2,
            aes(x = RP, y = (Loss/1000000000))) +
  # General
  theme(legend.position = "none") +
  labs(title=sprintf("OEP - %s(blue) and %s(red)","gross","net"), x="Return Period (yrs)", y="Loss (GBP billion)")
plot6

ggsave(
  outfile5,    # easiest to use file name to set device
  plot = plot6,  # default of last_plot()
)


# Plot the first 500 years RP - AEP
AEP_Default_gross_df_plot <- subset(AEP_Default_gross_df, AEP_Default_gross_df$RP < 500)
AEP_Default_net_df_plot <- subset(AEP_Default_net_df, AEP_Default_net_df$RP < 500)

plot7 <- ggplot(data=AEP_Default_gross_df_plot, aes(x = RP, y = (Insurable_Loss/1000000000))) + 
  # Original - OEP
  geom_line(col="blue", size=2,) + 
  # Stderr
  geom_ribbon (aes(ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3), fill = "blue") +
  # Mean
  geom_line(data=AEP_Default_gross_df_plot,
            col="blue", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # Original - AEP
  geom_line(data=AEP_Default_net_df_plot,
            col=2, size=2,
            aes(x = RP, y = (Retained/1000000000))) +
  # Stderr
  geom_ribbon (data=AEP_Default_net_df_plot,
               aes(y = (Retained/1000000000), ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3, fill = "red")) +
  # Mean
  geom_line(data=AEP_Default_net_df_plot, col="red", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # General
  theme(legend.position = "none") +
  
  
  labs(title=sprintf("AEP - %s(blue) and %s(red)","gross","net"), x="Return Period (yrs)", y="Loss (GBP billion)")
plot7

ggsave(
  outfile6,    # easiest to use file name to set device
  plot = plot7,  # default of last_plot()
)



##########################################################
# 2 - Inducing a stated correlation in a variety of ways #
##########################################################

# This is done initially at a level of aggregation, based on summed hazard severity
# so need to prepare a data frame for each hazard that is aggregated, no gaps and ranked by hazard severity, retaining all relevant columns, with hazard specific names i.e. H1_ etc ....

# First task is to ensure that all rows have a value (even if zero)
# Create a list of all relevant years (or other time-windows), and join to it to the data
# Inputs both need to have
# "Year", "Severity" and "Insurable_Loss"

# Get Years to patch holes with
Yrs <- max(Data_H1_df$Year)
Yrs_df <- as.data.frame(seq(1,Yrs))
colnames(Yrs_df)[1] <- "Year"
N_data <- length(Yrs_df[,1]) # Find number of observations

# Aggregate, rename columns, fill any holes, rank by severity
# Hazard 1
# Inputs
H_num <- "H1"
Data_df <- Data_H1_df
# Function
Agg_H_df <- F_Join_Prep(Data_df, H_num)
# And, save 
Agg_H1_df <- Agg_H_df

# Hazard 2
# Inputs
H_num <- "H2"
Data_df <- Data_H2_df
# Function
Agg_H_df <- F_Join_Prep(Data_df, H_num)
# And, save 
Agg_H2_df <- Agg_H_df

# Quick check to see how correlated the inputs are
# In case the 'independent' isn't that
cor_in <- cor(x = Agg_H1_df$Loss_H1, y =  Agg_H2_df$Loss_H2)
write(sprintf("\nCorrelation - rp: %s",cor_in), file=outfile1, append=T)

# Merge these two together 
# Columns for each - linking function works on the basis of two columns
# for severity, each with a rank

Agg_H1H2_df <- Agg_H1_df
c_start <- ncol(Agg_H1_df)+1
c_end <- ncol(Agg_H1_df)+ncol(Agg_H2_df)
Agg_H1H2_df[,c_start:c_end] <- Agg_H2_df

# Set up an AEP/OEP file to record the results in

l_df1 <- length(Data_H1_df[,1])
l_df2 <- length(Data_H2_df[,1])

## Set up the AEP_df for multiple columns
y_max <- max(Data_H1_df[,"Year"])
AEP_df <- as.data.frame(seq.int(1:y_max))
colnames(AEP_df)[1] <- "Year"
# Add yearly exceedence probability (i.e. rate)
AEP_df$rate <- seq.int(nrow(AEP_df))/max(AEP_df$Year)
# Add return period
AEP_df$RP <- 1/AEP_df$rate
AEPg_df <- AEP_df
AEPn_df <- AEP_df

## Set up the OEP_df for multiple columns 
Reins_df <- F_Haz_comb(Data_H1_df,Data_H2_df)
# Create OEP curve - in this case gross input
OEP_df <- F_OEP(Reins_df)
OEPg_df <- OEP_df[,c("Year","rate", "RP")]
OEPn_df <- OEP_df[,c("Year","rate", "RP")]

############################################
#######  Simulate correlated ranks      ####
############################################

# This is (i) ia a variety of ways
# but (ii) always correlated by year (so intra-seasonal structure preserved)

######################################
# 2.1 t-Copula,5 degrees of freedom  #
######################################

# "C1" is for Correlation 1

# Parameters
# Estimate could be taken from a fit, or from the academic literature
#rho <- 0.4
rho <- r_val
Itt <- 1

## Uses
# Data_H1_df - Event by event
# Data_H2_df - Event by event
# Agg_H1H2_df - Annual aggregates
# AEP/OEP files of the same length to add to - prepared just above
# e.g. OEPg_df

# Add a first line to an output file
write(sprintf("%s:","Correlation 1"), file=outfile19, append=T)
write(sprintf("%s","Spearman: Severity, Loss, Rank"), file=outfile19, append=T)
write(sprintf("%s","Fits: rho, Dof, tau"), file=outfile19, append=T)
cop_num <- 2

while (Itt <= Sim_Itt) {
  col_name <- sprintf("Sim%s",Itt)

  # A) - Simulation using the copula
  # Two dimensions, rho as correlation coefficient, df is degrees of freedom
  u <- rCopula(N_data,tCopula(dim=2,rho,df=DoF)) 
  # To match previous scripts, rename to x2
  x2 <- u
  x2 <- as.data.frame(x2)
  # x2 is a data frame that has two columns of simulated, correlated random
  # values between 0 and 1

  # Join the simulated years by severity of the hazards
  # x2 contains the ranks to join by (i.e. with the correlation in)
  # Agg_H1H2_df is the hazard/loss data, with ranked severity
  x3 <- F_Join(x2,Agg_H1H2_df)

  # B) - Apply the link to the event data in order to apply reinsurance at an event level
  # Retain the years in Data_H1_df, reordering Data_H2_df
  # Create link file i.e. yearH1, yearH2 to be joined .... get this from x3
  link_yrs <- x3[,c("Yr_H1","Yr_H2")]
  colnames(link_yrs)[match("Yr_H2",names(link_yrs))] <- c(sprintf("Year"))
  # Join this with Data for Hazard 2
  Reorder_H2_df <- merge(x = Data_H2_df, y = link_yrs, by = "Year", all.x = TRUE)
  # Change the "Year" to be that from H1 defined by the link
  Reorder_H2_df$Year <- Reorder_H2_df$Yr_H1
  Reorder_H2_df$Yr_H1 <- NULL
  # And, now treat these re-numbered years as above ...
  # Group the two hazards
  Reins_df <- F_Haz_comb(Data_H1_df,Reorder_H2_df)
  # Create AEP curve - in this case gross input
  AEPx_df <- F_AEP(Reins_df)
  # Create OEP curve - in this case gross input
  OEPx_df <- F_OEP(Reins_df)
  # Apply reinsurance
  Reins_C_df <- F_ReIns_Apply(Reins_df, retention_mult, limit_RP,OEPx_df)

  #Fit copula to input and output, and dump to file
  F_Equiv_fit(Data_H1_df, Reorder_H2_df, outfile19,cop_num)
  
  
  #OEP and AEP, have a look at the combined gross and net of reinsurance
  # OEP first, gross then net
  temp <- F_OEP(Reins_C_df)
  OEPg_df[1:nrow(temp),col_name] <- temp$Insurable_Loss
  temp <- F_OEP_net(Reins_C_df)
  OEPn_df[1:nrow(temp),col_name] <- temp$Retained
  # AEP Second, gross then net
  temp <- F_AEP(Reins_C_df)
  AEPg_df[1:length(temp[,1]),col_name] <- temp$Insurable_Loss 
  temp <- F_AEP_net(Reins_C_df)
  AEPn_df[1:length(temp[,1]),col_name] <- temp$Retained


  Itt = Itt +1
}   # End of the while loop

# Replace all na with zeros
# d[is.na(d)] <- 0
OEPg_df[is.na(OEPg_df)] <- 0
OEPn_df[is.na(OEPn_df)] <- 0
AEPg_df[is.na(AEPg_df)] <- 0
AEPn_df[is.na(AEPn_df)] <- 0

#Means and stddev of rows - self-defined function - F7
OEPg_df <- F_EP_mean(OEPg_df)
OEPn_df <- F_EP_mean(OEPn_df)
AEPg_df <- F_EP_mean(AEPg_df)
AEPn_df <- F_EP_mean(AEPn_df)

# And, file the specific ones

OEP_C1_gross_df <- OEPg_df
OEP_C1_net_df <- OEPn_df
AEP_C1_gross_df <- AEPg_df
AEP_C1_net_df <- AEPn_df


# Make the plotting generic by transfering to the 'C' files
# Will also work for summary stats if this is needed.
OEP_C_gross_df <- OEP_C1_gross_df
OEP_C_net_df <- OEP_C1_net_df
AEP_C_gross_df <- AEP_C1_gross_df
AEP_C_net_df <- AEP_C1_net_df

# Plot OEP for the first 500 years RP gross vs net (means) - cross-check
OEP_C_gross_df_plot <- subset(OEP_C_gross_df, OEP_C_gross_df$RP < 500)
OEP_C_net_df_plot <- subset(OEP_C_net_df, OEP_C_net_df$RP < 500)

plot7 <- ggplot(data=OEP_C_gross_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  geom_line(col="blue", size=2) + 
  geom_line(data=OEP_C_net_df_plot,
            col=2, size=2,
            aes(x = RP, y = (Loss/1000000000))) +
  labs(title=sprintf("OEP for C - means - %s(blue) and %s(red)","gross","net"), x="Return Period (yrs)", y="Loss (? billion)")
plot7

# Plot AEP for the first 500 years RP - AEP gross vs net - cross-check
AEP_C_gross_df_plot <- subset(AEP_C_gross_df, AEP_C_gross_df$RP < 500)
AEP_C_net_df_plot <- subset(AEP_C_net_df, AEP_C_net_df$RP < 500)

plot8 <- ggplot(data=AEP_C_gross_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  geom_line(col="blue", size=2) + 
  geom_line(data=AEP_C_net_df_plot,
            col=2, size=2,
            aes(x = RP, y = (Loss/1000000000))) +
  labs(title=sprintf("AEP for C - means - %s(blue) and %s(red)","gross","net"), x="Return Period (yrs)", y="Loss (? billion)")
plot8




# Plot the first 500 years RP of the gross AEP - effect of correlation - KEY FIGURE
AEP_C_gross_df_plot <- subset(AEP_C_gross_df, AEP_C_gross_df$RP < 500)
# Unhash this line and change to 5,000 years to show all RPs
AEP_Default_gross_df_plot <- subset(AEP_Default_gross_df, AEP_Default_gross_df$RP < 500)

plot9 <- ggplot(data=AEP_C_gross_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  # AEP gross - correlated
  # Stderr
  geom_ribbon (aes(ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3), fill = "blue") +
  # Mean
  geom_line(data=AEP_C_gross_df_plot,
            col="blue", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # AEP gross - uncorrelated
  # Stderr
  geom_ribbon (data=AEP_Default_gross_df_plot,
               aes(y = (Loss/1000000000), ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3, fill = "red")) +
  # Mean
  geom_line(data=AEP_Default_gross_df_plot, col="red", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # General
  theme(legend.position = "none") +
  
  labs(title=sprintf("AEP gross - %s(%.2f,blue) and r=%s(red)","correlated",rho,"independent"), x="Return Period (yrs)", y="Loss (? billion)")

plot9

ggsave(
  outfile7,    # easiest to use file name to set device
  plot = plot9,  # default of last_plot()
)

# Plot the first 500 years RP of the gross AEP - effect of correlation - KEY FIGURE
AEP_C_net_df_plot <- subset(AEP_C_net_df, AEP_C_net_df$RP < 500)
# Unhash this line and change to 5,000 years to show all RPs
AEP_Default_net_df_plot <- subset(AEP_Default_net_df, AEP_Default_net_df$RP < 500)

plot10 <- ggplot(data=AEP_C_net_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  # AEP gross - correlated
  # Stderr
  geom_ribbon (aes(ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3), fill = "blue") +
  # Mean
  geom_line(data=AEP_C_net_df_plot,
            col="blue", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # AEP gross - uncorrelated
  # Stderr
  geom_ribbon (data=AEP_Default_net_df_plot,
               aes(y = (Loss/1000000000), ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3, fill = "red")) +
  # Mean
  geom_line(data=AEP_Default_net_df_plot, col="red", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # General
  theme(legend.position = "none") +
  
  labs(title=sprintf("AEP net - %s(%.2f,blue) and r=%s(red)","Correlated",rho,"independent"), x="Return Period (yrs)", y="Loss (? billion)")

plot10

ggsave(
  outfile8,    # easiest to use file name to set device
  plot = plot10,  # default of last_plot()
)





######################################
# 2.2 t-Copula, 10 degrees of freedom  #
######################################

# "C2" is for Correlation 2

# Parameters
# Estimate could be taken from a fit, or from the academic literature
#rho <- 0.4
rho <- r_val
Itt <- 1

# Explicitly reset the data frames
AEPg_df <- AEP_df
AEPn_df <- AEP_df
OEPg_df <- OEP_df[,c("Year","rate", "RP")]
OEPn_df <- OEP_df[,c("Year","rate", "RP")]

## Uses
# Data_H1_df - Event by event
# Data_H2_df - Event by event
# Agg_H1H2_df - Annual aggregates
# AEP/OEP files of the same length to add to - prepared just above
# e.g. OEPg_df

# Add a first line to an output file
write(sprintf("%s:","Correlation 2"), file=outfile19, append=T)
write(sprintf("%s","Spearman: Severity, Loss, Rank"), file=outfile19, append=T)
write(sprintf("%s","Fits: rho, Dof, tau"), file=outfile19, append=T)
cop_num <- 2

while (Itt <= Sim_Itt) {
  col_name <- sprintf("Sim%s",Itt)
  
  # A) - Simulation using the copula
  # Two dimensions, rho as correlation coefficient, df is degrees of freedom
  u <- rCopula(N_data,tCopula(dim=2,rho,df=DoF2)) 
  # To match previous scripts, rename to x2
  x2 <- u
  x2 <- as.data.frame(x2)
  # x2 is a data frame that has two columns of simulated, correlated random
  # values between 0 and 1
  
  # Join the simulated years by severity of the hazards
  # x2 contains the ranks to join by (i.e. with the correlation in)
  # Agg_H1H2_df is the hazard/loss data, with ranked severity
  x3 <- F_Join(x2,Agg_H1H2_df)
  
  
  # B) - Apply the link to the event data in order to apply reinsurance at an event level
  # Retain the years in Data_H1_df, reordering Data_H2_df
  # Create link file i.e. yearH1, yearH2 to be joined .... get this from x3
  link_yrs <- x3[,c("Yr_H1","Yr_H2")]
  colnames(link_yrs)[match("Yr_H2",names(link_yrs))] <- c(sprintf("Year"))
  # Join this with Data for Hazard 2
  Reorder_H2_df <- merge(x = Data_H2_df, y = link_yrs, by = "Year", all.x = TRUE)
  # Change the "Year" to be that from H1 defined by the link
  Reorder_H2_df$Year <- Reorder_H2_df$Yr_H1
  Reorder_H2_df$Yr_H1 <- NULL
  # And, now treat these re-numbered years as above ...
  # Group the two hazards
  Reins_df <- F_Haz_comb(Data_H1_df,Reorder_H2_df)
  # Create AEP curve - in this case gross input
  AEPx_df <- F_AEP(Reins_df)
  # Create OEP curve - in this case gross input
  OEPx_df <- F_OEP(Reins_df)
  # Apply reinsurance
  Reins_C_df <- F_ReIns_Apply(Reins_df, retention_mult, limit_RP,OEPx_df)
  
  #Fit copula to input and output, and dump to file
  F_Equiv_fit(Data_H1_df, Reorder_H2_df, outfile19,cop_num)
  
  #OEP and AEP, have a look at the combined gross and net of reinsurance
  # OEP first, gross then net
  temp <- F_OEP(Reins_C_df)
  OEPg_df[1:nrow(temp),col_name] <- temp$Insurable_Loss
  temp <- F_OEP_net(Reins_C_df)
  OEPn_df[1:nrow(temp),col_name] <- temp$Retained
  # AEP Second, gross then net
  temp <- F_AEP(Reins_C_df)
  AEPg_df[1:length(temp[,1]),col_name] <- temp$Insurable_Loss 
  temp <- F_AEP_net(Reins_C_df)
  AEPn_df[1:length(temp[,1]),col_name] <- temp$Retained
  
  Itt = Itt +1
}   # End of the while loop

# Replace all na with zeros
# d[is.na(d)] <- 0
OEPg_df[is.na(OEPg_df)] <- 0
OEPn_df[is.na(OEPn_df)] <- 0
AEPg_df[is.na(AEPg_df)] <- 0
AEPn_df[is.na(AEPn_df)] <- 0

#Means and stddev of rows - self-defined function - F7
OEPg_df <- F_EP_mean(OEPg_df)
OEPn_df <- F_EP_mean(OEPn_df)
AEPg_df <- F_EP_mean(AEPg_df)
AEPn_df <- F_EP_mean(AEPn_df)

# And, file the specific ones

OEP_C2_gross_df <- OEPg_df
OEP_C2_net_df <- OEPn_df
AEP_C2_gross_df <- AEPg_df
AEP_C2_net_df <- AEPn_df


# Make the plotting generic by transfering to the 'C' files
# Will also work for summary stats if this is needed.
OEP_C_gross_df <- OEP_C2_gross_df
OEP_C_net_df <- OEP_C2_net_df
AEP_C_gross_df <- AEP_C2_gross_df
AEP_C_net_df <- AEP_C2_net_df

# Plot OEP for the first 500 years RP gross vs net (means) - cross-check
OEP_C_gross_df_plot <- subset(OEP_C_gross_df, OEP_C_gross_df$RP < 500)
OEP_C_net_df_plot <- subset(OEP_C_net_df, OEP_C_net_df$RP < 500)

plot7 <- ggplot(data=OEP_C_gross_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  geom_line(col="blue", size=2) + 
  geom_line(data=OEP_C_net_df_plot,
            col=2, size=2,
            aes(x = RP, y = (Loss/1000000000))) +
  labs(title=sprintf("OEP for C - means - %s(blue) and %s(red)","gross","net"), x="Return Period (yrs)", y="Loss (? billion)")
plot7

# Plot AEP for the first 500 years RP - AEP gross vs net - cross-check
AEP_C_gross_df_plot <- subset(AEP_C_gross_df, AEP_C_gross_df$RP < 500)
AEP_C_net_df_plot <- subset(AEP_C_net_df, AEP_C_net_df$RP < 500)

plot8 <- ggplot(data=AEP_C_gross_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  geom_line(col="blue", size=2) + 
  geom_line(data=AEP_C_net_df_plot,
            col=2, size=2,
            aes(x = RP, y = (Loss/1000000000))) +
  labs(title=sprintf("AEP for C - means - %s(blue) and %s(red)","gross","net"), x="Return Period (yrs)", y="Loss (? billion)")
plot8




# Plot the first 500 years RP of the gross AEP - effect of correlation - KEY FIGURE
AEP_C_gross_df_plot <- subset(AEP_C_gross_df, AEP_C_gross_df$RP < 500)
# Unhash this line and change to 5,000 years to show all RPs
AEP_Default_gross_df_plot <- subset(AEP_Default_gross_df, AEP_Default_gross_df$RP < 500)

plot9 <- ggplot(data=AEP_C_gross_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  # AEP gross - correlated
  # Stderr
  geom_ribbon (aes(ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3), fill = "blue") +
  # Mean
  geom_line(data=AEP_C_gross_df_plot,
            col="blue", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # AEP gross - uncorrelated
  # Stderr
  geom_ribbon (data=AEP_Default_gross_df_plot,
               aes(y = (Loss/1000000000), ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3, fill = "red")) +
  # Mean
  geom_line(data=AEP_Default_gross_df_plot, col="red", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # General
  theme(legend.position = "none") +
  
  labs(title=sprintf("AEP gross - %s(%.2f,blue) and %s(red)","correlated",rho,"independent"), x="Return Period (yrs)", y="Loss (? billion)")

plot9

ggsave(
  outfile9,    # easiest to use file name to set device
  plot = plot9,  # default of last_plot()
)

# Plot the first 500 years RP of the gross AEP - effect of correlation - KEY FIGURE
AEP_C_net_df_plot <- subset(AEP_C_net_df, AEP_C_net_df$RP < 500)
# Unhash this line and change to 5,000 years to show all RPs
AEP_Default_net_df_plot <- subset(AEP_Default_net_df, AEP_Default_net_df$RP < 500)

plot10 <- ggplot(data=AEP_C_net_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  # AEP gross - correlated
  # Stderr
  geom_ribbon (aes(ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3), fill = "blue") +
  # Mean
  geom_line(data=AEP_C_net_df_plot,
            col="blue", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # AEP gross - uncorrelated
  # Stderr
  geom_ribbon (data=AEP_Default_net_df_plot,
               aes(y = (Loss/1000000000), ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3, fill = "red")) +
  # Mean
  geom_line(data=AEP_Default_net_df_plot, col="red", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # General
  theme(legend.position = "none") +
  
  labs(title=sprintf("AEP net - %s(%.2f,blue) and %s(red)","correlated",rho,"independent"), x="Return Period (yrs)", y="Loss (? billion)")

plot10

ggsave(
  outfile10,    # easiest to use file name to set device
  plot = plot10,  # default of last_plot()
)





#########################
# 2.3 Gaussian Copula   #
#########################

# "C3" is for Correlation 3

# Parameters
# Estimate could be taken from a fit, or from the academic literature
#rho <- 0.4
rho <- r_val
Itt <- 1

# Explicitly reset the data frames
AEPg_df <- AEP_df
AEPn_df <- AEP_df
OEPg_df <- OEP_df[,c("Year","rate", "RP")]
OEPn_df <- OEP_df[,c("Year","rate", "RP")]

## Need to loop start here .........

## Uses
# Data_H1_df - Event by event
# Data_H2_df - Event by event
# Agg_H1H2_df - Annual aggregates
# AEP/OEP files of the same length to add to - prepared just above
# e.g. OEPg_df

# Add a first line to an output file
write(sprintf("%s:","Correlation 3"), file=outfile19, append=T)
write(sprintf("%s","Spearman: Severity, Loss, Rank"), file=outfile19, append=T)
write(sprintf("%s","Fits: rho, n/a = 0.00, tau"), file=outfile19, append=T)
cop_num <- 1

while (Itt <= Sim_Itt) {
  col_name <- sprintf("Sim%s",Itt)
  
  # A) - Simulation using the copula
  # Two dimensions, rho as correlation coefficient, df is degrees of freedom
  #u <- rCopula(N_data,tCopula(dim=2,rho,df=100)) 
  u <- rCopula(N_data,normalCopula(dim=2,rho)) 
  # To match previous scripts, rename to x2
  x2 <- u
  x2 <- as.data.frame(x2)
  # x2 is a data frame that has two columns of simulated, correlated random
  # values between 0 and 1
  
  # Join the simulated years by severity of the hazards
  # x2 contains the ranks to join by (i.e. with the correlation in)
  # Agg_H1H2_df is the hazard/loss data, with ranked severity
  x3 <- F_Join(x2,Agg_H1H2_df)
  
  
  # B) - Apply the link to the event data in order to apply reinsurance at an event level
  # Retain the years in Data_H1_df, reordering Data_H2_df
  # Create link file i.e. yearH1, yearH2 to be joined .... get this from x3
  link_yrs <- x3[,c("Yr_H1","Yr_H2")]
  colnames(link_yrs)[match("Yr_H2",names(link_yrs))] <- c(sprintf("Year"))
  # Join this with Data for Hazard 2
  Reorder_H2_df <- merge(x = Data_H2_df, y = link_yrs, by = "Year", all.x = TRUE)
  # Change the "Year" to be that from H1 defined by the link
  Reorder_H2_df$Year <- Reorder_H2_df$Yr_H1
  Reorder_H2_df$Yr_H1 <- NULL
  # And, now treat these re-numbered years as above ...
  # Group the two hazards
  Reins_df <- F_Haz_comb(Data_H1_df,Reorder_H2_df)
  # Create AEP curve - in this case gross input
  AEPx_df <- F_AEP(Reins_df)
  # Create OEP curve - in this case gross input
  OEPx_df <- F_OEP(Reins_df)
  # Apply reinsurance
  Reins_C_df <- F_ReIns_Apply(Reins_df, retention_mult, limit_RP,OEPx_df)
  
  #Fit copula to input and output, and dump to file
  F_Equiv_fit(Data_H1_df, Reorder_H2_df, outfile19,cop_num)
  
  #OEP and AEP, have a look at the combined gross and net of reinsurance
  # OEP first, gross then net
  temp <- F_OEP(Reins_C_df)
  OEPg_df[1:nrow(temp),col_name] <- temp$Insurable_Loss
  temp <- F_OEP_net(Reins_C_df)
  OEPn_df[1:nrow(temp),col_name] <- temp$Retained
  # AEP Second, gross then net
  temp <- F_AEP(Reins_C_df)
  AEPg_df[1:length(temp[,1]),col_name] <- temp$Insurable_Loss 
  temp <- F_AEP_net(Reins_C_df)
  AEPn_df[1:length(temp[,1]),col_name] <- temp$Retained
  
  Itt = Itt +1
}   # End of the while loop

# Replace all na with zeros
# d[is.na(d)] <- 0
OEPg_df[is.na(OEPg_df)] <- 0
OEPn_df[is.na(OEPn_df)] <- 0
AEPg_df[is.na(AEPg_df)] <- 0
AEPn_df[is.na(AEPn_df)] <- 0

#Means and stddev of rows - self-defined function - F7
OEPg_df <- F_EP_mean(OEPg_df)
OEPn_df <- F_EP_mean(OEPn_df)
AEPg_df <- F_EP_mean(AEPg_df)
AEPn_df <- F_EP_mean(AEPn_df)

# And, file the specific ones

OEP_C3_gross_df <- OEPg_df
OEP_C3_net_df <- OEPn_df
AEP_C3_gross_df <- AEPg_df
AEP_C3_net_df <- AEPn_df


# Make the plotting generic by transfering to the 'C' files
# Will also work for summary stats if this is needed.
OEP_C_gross_df <- OEP_C3_gross_df
OEP_C_net_df <- OEP_C3_net_df
AEP_C_gross_df <- AEP_C3_gross_df
AEP_C_net_df <- AEP_C3_net_df

# Plot OEP for the first 500 years RP gross vs net (means) - cross-check
OEP_C_gross_df_plot <- subset(OEP_C_gross_df, OEP_C_gross_df$RP < 500)
OEP_C_net_df_plot <- subset(OEP_C_net_df, OEP_C_net_df$RP < 500)

plot7 <- ggplot(data=OEP_C_gross_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  geom_line(col="blue", size=2) + 
  geom_line(data=OEP_C_net_df_plot,
            col=2, size=2,
            aes(x = RP, y = (Loss/1000000000))) +
  labs(title=sprintf("OEP for C - means - %s(blue) and %s(red)","gross","net"), x="Return Period (yrs)", y="Loss (? billion)")
plot7

# Plot AEP for the first 500 years RP - AEP gross vs net - cross-check
AEP_C_gross_df_plot <- subset(AEP_C_gross_df, AEP_C_gross_df$RP < 500)
AEP_C_net_df_plot <- subset(AEP_C_net_df, AEP_C_net_df$RP < 500)

plot8 <- ggplot(data=AEP_C_gross_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  geom_line(col="blue", size=2) + 
  geom_line(data=AEP_C_net_df_plot,
            col=2, size=2,
            aes(x = RP, y = (Loss/1000000000))) +
  labs(title=sprintf("AEP for C - means - %s(blue) and %s(red)","gross","net"), x="Return Period (yrs)", y="Loss (? billion)")
plot8




# Plot the first 500 years RP of the gross AEP - effect of correlation - KEY FIGURE
AEP_C_gross_df_plot <- subset(AEP_C_gross_df, AEP_C_gross_df$RP < 500)
# Unhash this line and change to 5,000 years to show all RPs
AEP_Default_gross_df_plot <- subset(AEP_Default_gross_df, AEP_Default_gross_df$RP < 500)

plot9 <- ggplot(data=AEP_C_gross_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  # AEP gross - correlated
  # Stderr
  geom_ribbon (aes(ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3), fill = "blue") +
  # Mean
  geom_line(data=AEP_C_gross_df_plot,
            col="blue", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # AEP gross - uncorrelated
  # Stderr
  geom_ribbon (data=AEP_Default_gross_df_plot,
               aes(y = (Loss/1000000000), ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3, fill = "red")) +
  # Mean
  geom_line(data=AEP_Default_gross_df_plot, col="red", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # General
  theme(legend.position = "none") +
  
  labs(title=sprintf("AEP gross - %s(%.2f,blue) and %s(red)","correlated",rho,"independent"), x="Return Period (yrs)", y="Loss (? billion)")

plot9

ggsave(
  outfile11,    # easiest to use file name to set device
  plot = plot9,  # default of last_plot()
)

# Plot the first 500 years RP of the gross AEP - effect of correlation - KEY FIGURE
AEP_C_net_df_plot <- subset(AEP_C_net_df, AEP_C_net_df$RP < 500)
# Unhash this line and change to 5,000 years to show all RPs
AEP_Default_net_df_plot <- subset(AEP_Default_net_df, AEP_Default_net_df$RP < 500)

plot10 <- ggplot(data=AEP_C_net_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  # AEP gross - correlated
  # Stderr
  geom_ribbon (aes(ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3), fill = "blue") +
  # Mean
  geom_line(data=AEP_C_net_df_plot,
            col="blue", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # AEP gross - uncorrelated
  # Stderr
  geom_ribbon (data=AEP_Default_net_df_plot,
               aes(y = (Loss/1000000000), ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3, fill = "red")) +
  # Mean
  geom_line(data=AEP_Default_net_df_plot, col="red", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # General
  theme(legend.position = "none") +
  
  labs(title=sprintf("AEP net - %s(%.2f,blue) and %s(red)","correlated",rho,"independent"), x="Return Period (yrs)", y="Loss (? billion)")

plot10

ggsave(
  outfile12,    # easiest to use file name to set device
  plot = plot10,  # default of last_plot()
)




#################################################
# 2.4 'General Rank' - i.e. swapping alogrithm  #
#################################################

# "C4" is for Correlation 4

# Parameters
# Estimate could be taken from a fit, or from the academic literature
#rho <- 0.4
rho <- r_val
Itt <- 1

# Explicitly reset the data frames
AEPg_df <- AEP_df
AEPn_df <- AEP_df
OEPg_df <- OEP_df[,c("Year","rate", "RP")]
OEPn_df <- OEP_df[,c("Year","rate", "RP")]

## Need to loop start here .........

## Uses
# Data_H1_df - Event by event
# Data_H2_df - Event by event
# Agg_H1H2_df - Annual aggregates
# AEP/OEP files of the same length to add to - prepared just above
# e.g. OEPg_df
seed_SJs <- as.data.frame(as.integer(runif(Sim_Itt,min=1,max=1000000)))
seed_SJs[Itt,1]

# Add a first line to an output file
write(sprintf("%s:","Correlation 4"), file=outfile19, append=T)
write(sprintf("%s","Spearman: Severity, Loss, Rank"), file=outfile19, append=T)
write(sprintf("%s","Fits - Gaussian: rho, n/a = 0.00, tau"), file=outfile19, append=T)
cop_num <- 1

while (Itt <= Sim_Itt) {
  col_name <- sprintf("Sim%s",Itt)
  
  # A) - Simulation using the rank swapping algorithm
  # SJspearman requests type: [type=list; target=double].
  # So, move out of dataframe mode
  x2 <- array(dim = c(N_data, 2)) # Set up empty array, in line with example
  x2[,1] <- as.double(runif(N_data, min = 0, max = 1))
  x2[,2] <- as.double(runif(N_data, min = 0, max = 1))
  
  # Copy original data for comparison at end
  #x2_orig <- x2
  
  #SJspearman needs input (X) to have sorted columns.
  # i.e. any existing correlation is removed
  x2[,1] <- sort(x2[,1])
  x2[,2] <- sort(x2[,2])
  correlation2 <- matrix(c(1, rho, rho, 1), nrow = 2) # Specify the correlation matrix desired
  
  # And induce the dependency
  # OPTION spearman or pearson - has out one line
  x2[ , ] <- SJspearman(X = x2[ , ], cor = correlation2, verbose = F, seed = seed_SJs[Itt,1]) $X
  #x2[ , ] <- SJpearson(X = x2[ , ], cor = correlation2, verbose = F) $X
  x2 <- as.data.frame(x2)
  # x2 is a data frame that has two columns of simulated, correlated random
  # values between 0 and 1
  
  # Join the simulated years by severity of the hazards
  # x2 contains the ranks to join by (i.e. with the correlation in)
  # Agg_H1H2_df is the hazard/loss data, with ranked severity
  x3 <- F_Join(x2,Agg_H1H2_df)
  
  
  # B) - Apply the link to the event data in order to apply reinsurance at an event level
  # Retain the years in Data_H1_df, reordering Data_H2_df
  # Create link file i.e. yearH1, yearH2 to be joined .... get this from x3
  link_yrs <- x3[,c("Yr_H1","Yr_H2")]
  colnames(link_yrs)[match("Yr_H2",names(link_yrs))] <- c(sprintf("Year"))
  # Join this with Data for Hazard 2
  Reorder_H2_df <- merge(x = Data_H2_df, y = link_yrs, by = "Year", all.x = TRUE)
  # Change the "Year" to be that from H1 defined by the link
  Reorder_H2_df$Year <- Reorder_H2_df$Yr_H1
  Reorder_H2_df$Yr_H1 <- NULL
  # And, now treat these re-numbered years as above ...
  # Group the two hazards
  Reins_df <- F_Haz_comb(Data_H1_df,Reorder_H2_df)
  # Create AEP curve - in this case gross input
  AEPx_df <- F_AEP(Reins_df)
  # Create OEP curve - in this case gross input
  OEPx_df <- F_OEP(Reins_df)
  # Apply reinsurance
  Reins_C_df <- F_ReIns_Apply(Reins_df, retention_mult, limit_RP,OEPx_df)
  
  #Fit copula to input and output, and dump to file
  F_Equiv_fit(Data_H1_df, Reorder_H2_df, outfile19,cop_num)
  
  #OEP and AEP, have a look at the combined gross and net of reinsurance
  # OEP first, gross then net
  temp <- F_OEP(Reins_C_df)
  OEPg_df[1:nrow(temp),col_name] <- temp$Insurable_Loss
  temp <- F_OEP_net(Reins_C_df)
  OEPn_df[1:nrow(temp),col_name] <- temp$Retained
  # AEP Second, gross then net
  temp <- F_AEP(Reins_C_df)
  AEPg_df[1:length(temp[,1]),col_name] <- temp$Insurable_Loss 
  temp <- F_AEP_net(Reins_C_df)
  AEPn_df[1:length(temp[,1]),col_name] <- temp$Retained
  
  Itt = Itt +1
}   # End of the while loop

# Replace all na with zeros
# d[is.na(d)] <- 0
OEPg_df[is.na(OEPg_df)] <- 0
OEPn_df[is.na(OEPn_df)] <- 0
AEPg_df[is.na(AEPg_df)] <- 0
AEPn_df[is.na(AEPn_df)] <- 0

#Means and stddev of rows - self-defined function - F7
OEPg_df <- F_EP_mean(OEPg_df)
OEPn_df <- F_EP_mean(OEPn_df)
AEPg_df <- F_EP_mean(AEPg_df)
AEPn_df <- F_EP_mean(AEPn_df)

# And, file the specific ones

OEP_C4_gross_df <- OEPg_df
OEP_C4_net_df <- OEPn_df
AEP_C4_gross_df <- AEPg_df
AEP_C4_net_df <- AEPn_df


# Make the plotting generic by transfering to the 'C' files
# Will also work for summary stats if this is needed.
OEP_C_gross_df <- OEP_C4_gross_df
OEP_C_net_df <- OEP_C4_net_df
AEP_C_gross_df <- AEP_C4_gross_df
AEP_C_net_df <- AEP_C4_net_df

# Plot OEP for the first 500 years RP gross vs net (means) - cross-check
OEP_C_gross_df_plot <- subset(OEP_C_gross_df, OEP_C_gross_df$RP < 500)
OEP_C_net_df_plot <- subset(OEP_C_net_df, OEP_C_net_df$RP < 500)

plot7 <- ggplot(data=OEP_C_gross_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  geom_line(col="blue", size=2) + 
  geom_line(data=OEP_C_net_df_plot,
            col=2, size=2,
            aes(x = RP, y = (Loss/1000000000))) +
  labs(title=sprintf("OEP for C - means - %s(blue) and %s(red)","gross","net"), x="Return Period (yrs)", y="Loss (? billion)")
plot7

# Plot AEP for the first 500 years RP - AEP gross vs net - cross-check
AEP_C_gross_df_plot <- subset(AEP_C_gross_df, AEP_C_gross_df$RP < 500)
AEP_C_net_df_plot <- subset(AEP_C_net_df, AEP_C_net_df$RP < 500)

plot8 <- ggplot(data=AEP_C_gross_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  geom_line(col="blue", size=2) + 
  geom_line(data=AEP_C_net_df_plot,
            col=2, size=2,
            aes(x = RP, y = (Loss/1000000000))) +
  labs(title=sprintf("AEP for C - means - %s(blue) and %s(red)","gross","net"), x="Return Period (yrs)", y="Loss (? billion)")
plot8




# Plot the first 500 years RP of the gross AEP - effect of correlation - KEY FIGURE
AEP_C_gross_df_plot <- subset(AEP_C_gross_df, AEP_C_gross_df$RP < 500)
# Unhash this line and change to 5,000 years to show all RPs
AEP_Default_gross_df_plot <- subset(AEP_Default_gross_df, AEP_Default_gross_df$RP < 500)

plot9 <- ggplot(data=AEP_C_gross_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  # AEP gross - correlated
  # Stderr
  geom_ribbon (aes(ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3), fill = "blue") +
  # Mean
  geom_line(data=AEP_C_gross_df_plot,
            col="blue", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # AEP gross - uncorrelated
  # Stderr
  geom_ribbon (data=AEP_Default_gross_df_plot,
               aes(y = (Loss/1000000000), ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3, fill = "red")) +
  # Mean
  geom_line(data=AEP_Default_gross_df_plot, col="red", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # General
  theme(legend.position = "none") +
  
  labs(title=sprintf("AEP gross - %s(%.2f,blue) and %s(red)","correlated",rho,"independent"), x="Return Period (yrs)", y="Loss (? billion)")

plot9

ggsave(
  outfile13,    # easiest to use file name to set device
  plot = plot9,  # default of last_plot()
)

# Plot the first 500 years RP of the gross AEP - effect of correlation - KEY FIGURE
AEP_C_net_df_plot <- subset(AEP_C_net_df, AEP_C_net_df$RP < 500)
# Unhash this line and change to 5,000 years to show all RPs
AEP_Default_net_df_plot <- subset(AEP_Default_net_df, AEP_Default_net_df$RP < 500)

plot10 <- ggplot(data=AEP_C_net_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  # AEP gross - correlated
  # Stderr
  geom_ribbon (aes(ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3), fill = "blue") +
  # Mean
  geom_line(data=AEP_C_net_df_plot,
            col="blue", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # AEP gross - uncorrelated
  # Stderr
  geom_ribbon (data=AEP_Default_net_df_plot,
               aes(y = (Loss/1000000000), ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3, fill = "red")) +
  # Mean
  geom_line(data=AEP_Default_net_df_plot, col="red", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # General
  theme(legend.position = "none") +
  
  labs(title=sprintf("AEP net - %s(%.2f,blue) and %s(red)","correlated",rho,"independent"), x="Return Period (yrs)", y="Loss (? billion)")

plot10

ggsave(
  outfile14,    # easiest to use file name to set device
  plot = plot10,  # default of last_plot()
)


#########################
# 2.5 Gumbel Copula   #
#########################
# https://www.r-bloggers.com/2016/03/how-to-fit-a-copula-model-in-r-heavily-revised-part-1-basic-tools/


# "C5" is for Correlation 5

# Parameters
# Estimate could be taken from a fit, or from the academic literature
#rho <- 0.4
rho <- r_val
Itt <- 1

# Explicitly reset the data frames
AEPg_df <- AEP_df
AEPn_df <- AEP_df
OEPg_df <- OEP_df[,c("Year","rate", "RP")]
OEPn_df <- OEP_df[,c("Year","rate", "RP")]

# Work out the rough value of the Gumbel parameter to use
# Inverse modelling, noting that we're not concerned about the marginal
# Use this manually, and only needs doing once

gumbel_check <- 0    # Use '0' to turn off this section of code
if (gumbel_check > 0.5)
  {
  i <- 0
  count <- 1
  i_inc <- 0.01
  i_max <- 0.99
  l_search <- i_max/i_inc +1
  convert_df <- data.frame(matrix(ncol = 3, nrow = l_search))
  while(i < i_max)
  {
    j <- 1/(1-i)  # So that can search to infinity reasonably well
    u <- rCopula(N_data,gumbelCopula(dim=2,j))
    cor_check <- cor(method = "spearman",u)
    convert_df[count,1] <- i
    convert_df[count,2] <- j
    convert_df[count,3] <- cor_check[1,2]
    i = i + i_inc
    count = count + 1
  }
  # After manual lookup, the equivalent values are
  # r = 0.44, j = 0.30, param = 1.43
  # r = 0.63, j = 0.46, param = 1.89
  # r = 0.71, j = 0.53, param = 2.13
}

## Need to loop start here .........

## Uses
# Data_H1_df - Event by event
# Data_H2_df - Event by event
# Agg_H1H2_df - Annual aggregates
# AEP/OEP files of the same length to add to - prepared just above
# e.g. OEPg_df

# Add a first line to an output file
write(sprintf("%s:","Correlation 5"), file=outfile19, append=T)
write(sprintf("%s","Spearman: Severity, Loss, Rank"), file=outfile19, append=T)
write(sprintf("%s","Fits: theta, n/a = 0.00, tau"), file=outfile19, append=T)
cop_num <- 4

while (Itt <= Sim_Itt) {
  col_name <- sprintf("Sim%s",Itt)
  
  # A) - Simulation using the copula
  # Two dimensions, rho as correlation coefficient, df is degrees of freedom
  #u <- rCopula(N_data,tCopula(dim=2,rho,df=100)) 
  u <- rCopula(N_data,gumbelCopula(dim=2,g_param)) 
  # To match previous scripts, rename to x2
  x2 <- u
  x2 <- as.data.frame(x2)
  # x2 is a data frame that has two columns of simulated, correlated random
  # values between 0 and 1
  
  # Join the simulated years by severity of the hazards
  # x2 contains the ranks to join by (i.e. with the correlation in)
  # Agg_H1H2_df is the hazard/loss data, with ranked severity
  x3 <- F_Join(x2,Agg_H1H2_df)
  
  
  # B) - Apply the link to the event data in order to apply reinsurance at an event level
  # Retain the years in Data_H1_df, reordering Data_H2_df
  # Create link file i.e. yearH1, yearH2 to be joined .... get this from x3
  link_yrs <- x3[,c("Yr_H1","Yr_H2")]
  colnames(link_yrs)[match("Yr_H2",names(link_yrs))] <- c(sprintf("Year"))
  # Join this with Data for Hazard 2
  Reorder_H2_df <- merge(x = Data_H2_df, y = link_yrs, by = "Year", all.x = TRUE)
  # Change the "Year" to be that from H1 defined by the link
  Reorder_H2_df$Year <- Reorder_H2_df$Yr_H1
  Reorder_H2_df$Yr_H1 <- NULL
  # And, now treat these re-numbered years as above ...
  # Group the two hazards
  Reins_df <- F_Haz_comb(Data_H1_df,Reorder_H2_df)
  # Create AEP curve - in this case gross input
  AEPx_df <- F_AEP(Reins_df)
  # Create OEP curve - in this case gross input
  OEPx_df <- F_OEP(Reins_df)
  # Apply reinsurance
  Reins_C_df <- F_ReIns_Apply(Reins_df, retention_mult, limit_RP,OEPx_df)
  
  #Fit copula to input and output, and dump to file
  F_Equiv_fit(Data_H1_df, Reorder_H2_df, outfile19,cop_num)
  
  #OEP and AEP, have a look at the combined gross and net of reinsurance
  # OEP first, gross then net
  temp <- F_OEP(Reins_C_df)
  OEPg_df[1:nrow(temp),col_name] <- temp$Insurable_Loss
  temp <- F_OEP_net(Reins_C_df)
  OEPn_df[1:nrow(temp),col_name] <- temp$Retained
  # AEP Second, gross then net
  temp <- F_AEP(Reins_C_df)
  AEPg_df[1:length(temp[,1]),col_name] <- temp$Insurable_Loss 
  temp <- F_AEP_net(Reins_C_df)
  AEPn_df[1:length(temp[,1]),col_name] <- temp$Retained
  
  Itt = Itt +1
}   # End of the while loop

# Replace all na with zeros
# d[is.na(d)] <- 0
OEPg_df[is.na(OEPg_df)] <- 0
OEPn_df[is.na(OEPn_df)] <- 0
AEPg_df[is.na(AEPg_df)] <- 0
AEPn_df[is.na(AEPn_df)] <- 0

#Means and stddev of rows - self-defined function - F7
OEPg_df <- F_EP_mean(OEPg_df)
OEPn_df <- F_EP_mean(OEPn_df)
AEPg_df <- F_EP_mean(AEPg_df)
AEPn_df <- F_EP_mean(AEPn_df)

# And, file the specific ones

OEP_C5_gross_df <- OEPg_df
OEP_C5_net_df <- OEPn_df
AEP_C5_gross_df <- AEPg_df
AEP_C5_net_df <- AEPn_df


# Make the plotting generic by transfering to the 'C' files
# Will also work for summary stats if this is needed.
OEP_C_gross_df <- OEP_C5_gross_df
OEP_C_net_df <- OEP_C5_net_df
AEP_C_gross_df <- AEP_C5_gross_df
AEP_C_net_df <- AEP_C5_net_df

# Plot OEP for the first 500 years RP gross vs net (means) - cross-check
OEP_C_gross_df_plot <- subset(OEP_C_gross_df, OEP_C_gross_df$RP < 500)
OEP_C_net_df_plot <- subset(OEP_C_net_df, OEP_C_net_df$RP < 500)

plot7 <- ggplot(data=OEP_C_gross_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  geom_line(col="blue", size=2) + 
  geom_line(data=OEP_C_net_df_plot,
            col=2, size=2,
            aes(x = RP, y = (Loss/1000000000))) +
  labs(title=sprintf("OEP for C - means - %s(blue) and %s(red)","gross","net"), x="Return Period (yrs)", y="Loss (? billion)")
plot7

# Plot AEP for the first 500 years RP - AEP gross vs net - cross-check
AEP_C_gross_df_plot <- subset(AEP_C_gross_df, AEP_C_gross_df$RP < 500)
AEP_C_net_df_plot <- subset(AEP_C_net_df, AEP_C_net_df$RP < 500)

plot8 <- ggplot(data=AEP_C_gross_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  geom_line(col="blue", size=2) + 
  geom_line(data=AEP_C_net_df_plot,
            col=2, size=2,
            aes(x = RP, y = (Loss/1000000000))) +
  labs(title=sprintf("AEP for C - means - %s(blue) and %s(red)","gross","net"), x="Return Period (yrs)", y="Loss (? billion)")
plot8




# Plot the first 500 years RP of the gross AEP - effect of correlation - KEY FIGURE
AEP_C_gross_df_plot <- subset(AEP_C_gross_df, AEP_C_gross_df$RP < 500)
# Unhash this line and change to 5,000 years to show all RPs
AEP_Default_gross_df_plot <- subset(AEP_Default_gross_df, AEP_Default_gross_df$RP < 500)

plot9 <- ggplot(data=AEP_C_gross_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  # AEP gross - correlated
  # Stderr
  geom_ribbon (aes(ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3), fill = "blue") +
  # Mean
  geom_line(data=AEP_C_gross_df_plot,
            col="blue", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # AEP gross - uncorrelated
  # Stderr
  geom_ribbon (data=AEP_Default_gross_df_plot,
               aes(y = (Loss/1000000000), ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3, fill = "red")) +
  # Mean
  geom_line(data=AEP_Default_gross_df_plot, col="red", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # General
  theme(legend.position = "none") +
  
  labs(title=sprintf("AEP gross - %s(%.2f,blue) and %s(red)","correlated",rho,"independent"), x="Return Period (yrs)", y="Loss (? billion)")

plot9

ggsave(
  outfile17,    # easiest to use file name to set device
  plot = plot9,  # default of last_plot()
)

# Plot the first 500 years RP of the gross AEP - effect of correlation - KEY FIGURE
AEP_C_net_df_plot <- subset(AEP_C_net_df, AEP_C_net_df$RP < 500)
# Unhash this line and change to 5,000 years to show all RPs
AEP_Default_net_df_plot <- subset(AEP_Default_net_df, AEP_Default_net_df$RP < 500)

plot10 <- ggplot(data=AEP_C_net_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  # AEP gross - correlated
  # Stderr
  geom_ribbon (aes(ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3), fill = "blue") +
  # Mean
  geom_line(data=AEP_C_net_df_plot,
            col="blue", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # AEP gross - uncorrelated
  # Stderr
  geom_ribbon (data=AEP_Default_net_df_plot,
               aes(y = (Loss/1000000000), ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3, fill = "red")) +
  # Mean
  geom_line(data=AEP_Default_net_df_plot, col="red", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # General
  theme(legend.position = "none") +
  
  labs(title=sprintf("AEP net - %s(%.2f,blue) and %s(red)","correlated",rho,"independent"), x="Return Period (yrs)", y="Loss (? billion)")

plot10

ggsave(
  outfile18,    # easiest to use file name to set device
  plot = plot10,  # default of last_plot()
)






##########################
# 3 - Output Analytics   #
##########################

# Results are in the following files
# Loop through to extract at selected RPs
# And, tabulated comparison
#AEP_C_gross_df

# Select certain RPs from a curve

AEPg_D_summary_df <- F_EP_summary(AEP_Default_gross_df,RPs_select_df)
AEPg_C1_summary_df <- F_EP_summary(AEP_C1_gross_df,RPs_select_df)
AEPg_C2_summary_df <- F_EP_summary(AEP_C2_gross_df,RPs_select_df)
AEPg_C3_summary_df <- F_EP_summary(AEP_C3_gross_df,RPs_select_df)
AEPg_C4_summary_df <- F_EP_summary(AEP_C4_gross_df,RPs_select_df)
AEPg_C5_summary_df <- F_EP_summary(AEP_C5_gross_df,RPs_select_df)
AEPg_summary_df <- as.data.frame(AEPg_D_summary_df[,"RP"])
colnames(AEPg_summary_df)[1] <- "RP"
AEPg_summary_df$D <- AEPg_D_summary_df$Loss
AEPg_summary_df$C1 <- AEPg_C1_summary_df$Loss
AEPg_summary_df$C2 <- AEPg_C2_summary_df$Loss
AEPg_summary_df$C3 <- AEPg_C3_summary_df$Loss
AEPg_summary_df$C4 <- AEPg_C4_summary_df$Loss
AEPg_summary_df$C5 <- AEPg_C5_summary_df$Loss
# Work out percentage losses
AEPg_summary_df$C1_per <- 100*((AEPg_summary_df$C1/AEPg_summary_df$D)-1)
AEPg_summary_df$C2_per <- 100*((AEPg_summary_df$C2/AEPg_summary_df$D)-1)
AEPg_summary_df$C3_per <- 100*((AEPg_summary_df$C3/AEPg_summary_df$D)-1)
AEPg_summary_df$C4_per <- 100*((AEPg_summary_df$C4/AEPg_summary_df$D)-1)
AEPg_summary_df$C5_per <- 100*((AEPg_summary_df$C5/AEPg_summary_df$D)-1)

#AEP_C_net_df

AEPn_D_summary_df <- F_EP_summary(AEP_Default_net_df,RPs_select_df)
AEPn_C1_summary_df <- F_EP_summary(AEP_C1_net_df,RPs_select_df)
AEPn_C2_summary_df <- F_EP_summary(AEP_C2_net_df,RPs_select_df)
AEPn_C3_summary_df <- F_EP_summary(AEP_C3_net_df,RPs_select_df)
AEPn_C4_summary_df <- F_EP_summary(AEP_C4_net_df,RPs_select_df)
AEPn_C5_summary_df <- F_EP_summary(AEP_C5_net_df,RPs_select_df)
AEPn_summary_df <- as.data.frame(AEPn_D_summary_df[,"RP"])
colnames(AEPn_summary_df)[1] <- "RP"
AEPn_summary_df$D <- AEPn_D_summary_df$Loss
AEPn_summary_df$C1 <- AEPn_C1_summary_df$Loss
AEPn_summary_df$C2 <- AEPn_C2_summary_df$Loss
AEPn_summary_df$C3 <- AEPn_C3_summary_df$Loss
AEPn_summary_df$C4 <- AEPn_C4_summary_df$Loss
AEPn_summary_df$C5 <- AEPn_C5_summary_df$Loss
# Work out percentage losses
AEPn_summary_df$C1_per <- 100*((AEPn_summary_df$C1/AEPn_summary_df$D)-1)
AEPn_summary_df$C2_per <- 100*((AEPn_summary_df$C2/AEPn_summary_df$D)-1)
AEPn_summary_df$C3_per <- 100*((AEPn_summary_df$C3/AEPn_summary_df$D)-1)
AEPn_summary_df$C4_per <- 100*((AEPn_summary_df$C4/AEPn_summary_df$D)-1)
AEPn_summary_df$C5_per <- 100*((AEPn_summary_df$C5/AEPn_summary_df$D)-1)


# 15,16 - Output files of percentage difference
write.table(AEPg_summary_df, file=outfile15, append=F)
write.table(AEPn_summary_df, file=outfile16, append=F)



###############################
# 4 - Company-level analysis  #
###############################

# Scale UK industry losses by appropriate amounts to make them match
# real/simulated curves (e.g. for a company), generated independently

Data_H1_orig_df <- Data_H1_df
Data_H2_orig_df <- Data_H2_df

######## FOR EACH COMPANY, can just RUN FROM HERE !!
# Repeats for a company, need only from 4.6 the copula modelling only

Data_H1_df <- Data_H1_orig_df
Data_H2_df <- Data_H2_orig_df

### 4.1. Read in Event data & Key RPs, and scale

Company_H1H2_df <- as.data.frame(read.table(Company_H1H2_f,header=F,sep = "\t",na.strings=NaN, stringsAsFactors=F))
#Company_H1H2_df[,2] <- as.numeric(Company_H1H2_df[,2])

# Determine gross the AEP at selected RPs
# This is after the scaling is forced to a set ratio, but this is not relevant as it'll get scaled again.
#AEP_H1_df - gross loss 
#AEP_H2_df - gross loss
# Get the numbered RPs
EP_num_df <- as.data.frame(RPs_select_df[3:(length(RPs_select_df$V1)),1])
colnames(EP_num_df)[1] <- "RP"
# Losses from the catastrophe model
EP_num_df$Loss_H1_m <- data.frame(approx(x= AEP_H1_df$RP, y = AEP_H1_df$Insurable_Loss, xout = EP_num_df$RP))[,2]
EP_num_df$Loss_H2_m <- data.frame(approx(x= AEP_H2_df$RP, y = AEP_H2_df$Insurable_Loss, xout = EP_num_df$RP))[,2]
# Add losses from the company
EP_num_df$Loss_H1_c <- Company_H1H2_df$V2[3:length(Company_H1H2_df$V2)]
EP_num_df$Loss_H2_c <- Company_H1H2_df$V3[3:length(Company_H1H2_df$V3)]
# Work out a multiplier per hazard
EP_num_df$multH1 <- EP_num_df$Loss_H1_c/EP_num_df$Loss_H1_m
EP_num_df$multH2 <- EP_num_df$Loss_H2_c/EP_num_df$Loss_H2_m
# Take my 'expert judgement' as mean of key RPs 50-500 yrs 
Company_mult_H1 <- mean(EP_num_df$multH1[3:7])
Company_mult_H2 <- mean(EP_num_df$multH2[3:7])
Company_mult_H1[]
Company_mult_H2[]
# Apply these to the original data [overwrites that loaded in]
Data_H1_df$Insurable_Loss <- Data_H1_df$Insurable_Loss*Company_mult_H1
Data_H2_df$Insurable_Loss <- Data_H2_df$Insurable_Loss*Company_mult_H2

plot1 <- ggplot(data=Data_H1_df, aes(log(Insurable_Loss))) + 
  #geom_histogram(aes(y =..density.., 
  #alpha=0.2)) +
  geom_density(col="blue", size=2) + 
  #geom_histogram(data=Data_H2_df,
  #aes(log(Insurable_Loss), y =..density.., alpha=0.2)) +
  geom_density(data=Data_H2_df,
               col=2, size=2,
               aes(log(Insurable_Loss), y =..density..)) +
  labs(title=sprintf("%s(blue) and %s(red)",Haz1_name,Haz2_name), x="log(Loss)", y="prob. density")
plot1

### 4.2 OEP curves - Gross

# Hazard 1
# Create OEP curve - in this case gross input
OEP_df <- F_OEP(Data_H1_df)
OEP_H1c_df <- OEP_df

# Hazard 2
OEP_df <- F_OEP(Data_H2_df)
OEP_H2c_df <- OEP_df

# Plot the first 500 years RP
OEP_H1_df_plot <- subset(OEP_H1c_df, OEP_H1c_df$RP < 500)
OEP_H2_df_plot <- subset(OEP_H2c_df, OEP_H2c_df$RP < 500)

plot2 <- ggplot(data=OEP_H1_df_plot, aes(x = RP, y = (Insurable_Loss/1000000000))) + 
  geom_line(col="blue", size=2) + 
  geom_line(data=OEP_H2_df_plot,
            col=2, size=2,
            aes(x = RP, y = (Insurable_Loss/1000000000))) +
  labs(title=sprintf("OEP - %s(blue) and %s(red)",Haz1_name,Haz2_name), x="Return Period (yrs)", y="Loss (? billion)")
plot2

#ggsave(
#  outfile3,    # easiest to use file name to set device
#  plot = plot2,  # default of last_plot()
#)

# 4.2 AEP curves - Gross

# Hazard 1

# Create AEP curve
AEP_df <- F_AEP(Data_H1_df)
AEP_H1c_df <- AEP_df

# Hazard 2
# Create AEP curve - in this case gross input
AEP_df <- F_AEP(Data_H2_df)
AEP_H2c_df <- AEP_df


# Plot the first 500 years RP
AEP_H1c_df_plot <- subset(AEP_H1c_df, AEP_H1c_df$RP < 500)
AEP_H2c_df_plot <- subset(AEP_H2c_df, AEP_H2c_df$RP < 500)

plot3 <- ggplot(data=AEP_H1_df_plot, aes(x = RP, y = (Insurable_Loss/1000000000))) + 
  geom_line(col="blue", size=2) + 
  geom_line(data=AEP_H2_df_plot,
            col=2, size=2,
            aes(x = RP, y = (Insurable_Loss/1000000000))) +
  labs(title=sprintf("AEP - %s(blue) and %s(red)",Haz1_name,Haz2_name), x="Return Period (yrs)", y="Loss (? billion)")
plot3

#ggsave(
#  outfile4,    # easiest to use file name to set device
#  plot = plot3,  # default of last_plot()
#)


# 4.3 Apply illustrative reinsurance at Event Level - input case (i.e. whatever dependency is in the input data). Will need to re-applied for different levels of correlation.

# Group the two hazards
Reins_df <- F_Haz_comb(Data_H1_df,Data_H2_df)
# Create AEP curve - in this case gross input
AEP_df <- F_AEP(Reins_df)
# Create OEP curve - in this case gross input
OEP_df <- F_OEP(Reins_df)
# Apply reinsurance
Reins_Default_df <- F_ReIns_Apply(Reins_df, retention_mult, limit_RP,OEP_df)

#OEP and AEP, have a look at the combined gross and net of reinsurance

# OEP first
# Gross
OEP_df <- F_OEP(Reins_Default_df)
OEP_Default_gross_c_df <- OEP_df
# net - just if due to simple reinsurance structure
OEP_df <- F_OEP_net(Reins_Default_df)
OEP_Default_net_c_df <- OEP_df

# AEP Second
# Gross
AEP_df <- F_AEP(Reins_Default_df)
AEP_Default_gross_c_df <- AEP_df

# net
AEP_df <- F_AEP_net(Reins_Default_df)
AEP_Default_net_c_df <- AEP_df


# Plot the first 500 years RP -OEP 
OEP_Default_gross_df_plot <- subset(OEP_Default_gross_c_df, OEP_Default_gross_c_df$RP < 500)
OEP_Default_net_df_plot <- subset(OEP_Default_net_c_df, OEP_Default_net_c_df$RP < 500)

plot4 <- ggplot(data=OEP_Default_gross_df_plot, aes(x = RP, y = (Insurable_Loss/1000000000))) + 
  geom_line(col="blue", size=2) + 
  geom_line(data=OEP_Default_net_df_plot,
            col=2, size=2,
            aes(x = RP, y = (Retained/1000000000))) +
  labs(title=sprintf("OEP - %s(blue) and %s(red)","gross","net"), x="Return Period (yrs)", y="Loss (? billion)")
plot4

# Plot the first 500 years RP - AEP
AEP_Default_gross_df_plot <- subset(AEP_Default_gross_c_df, AEP_Default_gross_c_df$RP < 500)
AEP_Default_net_df_plot <- subset(AEP_Default_net_c_df, AEP_Default_net_c_df$RP < 500)

plot5 <- ggplot(data=AEP_Default_gross_df_plot, aes(x = RP, y = (Insurable_Loss/1000000000))) + 
  geom_line(col="blue", size=2) + 
  geom_line(data=AEP_Default_net_df_plot,
            col=2, size=2,
            aes(x = RP, y = (Retained/1000000000))) +
  labs(title=sprintf("AEP - %s(blue) and %s(red)","gross","net"), x="Return Period (yrs)", y="Loss (? billion)")
plot5


# 4.4 - With resampling of the original data to reduce simulation uncertainty (see Section 1.8)

## Simulation parameters
Itt <- 1

OEPg_df <- OEP_Default_gross_c_df 
OEPn_df <- OEP_Default_net_c_df
AEPg_df <- AEP_Default_gross_c_df 
AEPn_df <- AEP_Default_net_c_df

while (Itt <= Sim_Itt) {
  col_name <- sprintf("Sim%s",Itt)
  # Resample
  OEPg_df[,col_name] <- sample(OEPg_df$Insurable_Loss, nrow(OEPg_df),replace = TRUE)
  OEPn_df[,col_name] <- sample(OEPn_df$Retained, nrow(OEPn_df),replace = TRUE)
  AEPg_df[,col_name] <- sample(AEPg_df$Insurable_Loss, nrow(AEPg_df),replace = TRUE)
  AEPn_df[,col_name] <- sample(AEPn_df$Retained, nrow(AEPn_df),replace = TRUE)
  # Sort in descending order
  col_sort_num <- which( colnames(OEPg_df)==col_name)    # This is a hack to get around handling of a large df I think
  
  temp <- as.data.frame(OEPg_df[,..col_sort_num])        # No idea what the addition of '..' means, but suggested by error message. Works.
  OEPg_df[,col_name] <- temp[order(-temp[,1]),]
  temp <- as.data.frame(OEPn_df[,..col_sort_num])
  OEPn_df[,col_name] <- temp[order(-temp[,1]),]
  
  temp <- as.data.frame(AEPg_df[,col_name])
  AEPg_df[,col_name] <- temp[order(-temp[,1]),]
  temp <- as.data.frame(AEPn_df[,col_name])
  AEPn_df[,col_name] <- temp[order(-temp[,1]),]
  Itt = Itt +1
} 

#Means and stddev of rows - self-defined function - F7
OEPg_df <- F_EP_mean(OEPg_df)
OEPn_df <- F_EP_mean(OEPn_df)
AEPg_df <- F_EP_mean(AEPg_df)
AEPn_df <- F_EP_mean(AEPn_df)

# Rename dfs for storage
OEP_Default_gross_c_df <- OEPg_df
OEP_Default_net_c_df <- OEPn_df
AEP_Default_gross_c_df <- AEPg_df
AEP_Default_net_c_df <- AEPn_df

#### 4.5 - Inducing a stated correlation #####
# i.e. as section 2 above
# using _c for 'company', but on the OEP / AEP curves only
# There is some repetition here, but I'm keeping this identical to Section 2 above, excpet the _c 

# First task is to ensure that all rows have a value (even if zero)
# Create a list of all relevant years (or other time-windows), and join to it to the data
# Inputs both need to have
# "Year", "Severity" and "Insurable_Loss"

# Get Years to patch holes with
Yrs <- max(Data_H1_df$Year)
Yrs_df <- as.data.frame(seq(1,Yrs))
colnames(Yrs_df)[1] <- "Year"
N_data <- length(Yrs_df[,1]) # Find number of observations

# Aggregate, rename columns, fill any holes, rank by severity
# Hazard 1
# Inputs
H_num <- "H1"
Data_df <- Data_H1_df
# Function
Agg_H_df <- F_Join_Prep(Data_df, H_num)
# And, save 
Agg_H1_df <- Agg_H_df

# Hazard 2
# Inputs
H_num <- "H2"
Data_df <- Data_H2_df
# Function
Agg_H_df <- F_Join_Prep(Data_df, H_num)
# And, save 
Agg_H2_df <- Agg_H_df

# Quick check to see how correlated the inputs are
# In case the 'independent' isn't that
cor_in <- cor(x = Agg_H1_df$Loss_H1, y =  Agg_H2_df$Loss_H2)
write(sprintf("\nCorrelation - rp: %s",cor_in), file=outfile1, append=T)

# Merge these two together 
# Columns for each - linking function works on the basis of two columns
# for severity, each with a rank

Agg_H1H2_df <- Agg_H1_df
c_start <- ncol(Agg_H1_df)+1
c_end <- ncol(Agg_H1_df)+ncol(Agg_H2_df)
Agg_H1H2_df[,c_start:c_end] <- Agg_H2_df


# Set up an AEP/OEP file to record the results in

l_df1 <- length(Data_H1_df[,1])
l_df2 <- length(Data_H2_df[,1])

## Set up the AEP_df for multiple columns
y_max <- max(Data_H1_df[,"Year"])
AEP_df <- as.data.frame(seq.int(1:y_max))
colnames(AEP_df)[1] <- "Year"
# Add yearly exceedence probability (i.e. rate)
AEP_df$rate <- seq.int(nrow(AEP_df))/max(AEP_df$Year)
# Add return period
AEP_df$RP <- 1/AEP_df$rate
AEPg_df <- AEP_df
AEPn_df <- AEP_df

## Set up the OEP_df for multiple columns 
Reins_df <- F_Haz_comb(Data_H1_df,Data_H2_df)
# Create OEP curve - in this case gross input
OEP_df <- F_OEP(Reins_df)
OEPg_df <- OEP_df[,c("Year","rate", "RP")]
OEPn_df <- OEP_df[,c("Year","rate", "RP")]


##########################
#  4.6 Gaussian Copula   #
##########################

# "C6" is for Correlation 6

# Parameters
# Estimate could be taken from a fit, or from the academic literature
#rho <- 0.4
rho <- r_val
Itt <- 1

# Explicitly reset the data frames
AEPg_df <- AEP_df
AEPn_df <- AEP_df
OEPg_df <- OEP_df[,c("Year","rate", "RP")]
OEPn_df <- OEP_df[,c("Year","rate", "RP")]

## Need to loop start here .........

## Uses
# Data_H1_df - Event by event
# Data_H2_df - Event by event
# Agg_H1H2_df - Annual aggregates
# AEP/OEP files of the same length to add to - prepared just above
# e.g. OEPg_df

# Add a first line to an output file
write(sprintf("%s:","Correlation 6"), file=outfile19, append=T)
write(sprintf("%s","Spearman: Severity, Loss, Rank"), file=outfile19, append=T)
write(sprintf("%s","Fits: rho, n/a = 0.00, tau"), file=outfile19, append=T)
cop_num <- 1

while (Itt <= Sim_Itt) {
  col_name <- sprintf("Sim%s",Itt)
  
  # A) - Simulation using the copula
  # Two dimensions, rho as correlation coefficient, df is degrees of freedom
  #u <- rCopula(N_data,tCopula(dim=2,rho,df=100)) 
  u <- rCopula(N_data,normalCopula(dim=2,rho)) 
  # To match previous scripts, rename to x2
  x2 <- u
  x2 <- as.data.frame(x2)
  # x2 is a data frame that has two columns of simulated, correlated random
  # values between 0 and 1
  
  # Join the simulated years by severity of the hazards
  # x2 contains the ranks to join by (i.e. with the correlation in)
  # Agg_H1H2_df is the hazard/loss data, with ranked severity
  x3 <- F_Join(x2,Agg_H1H2_df)
  
  
  # B) - Apply the link to the event data in order to apply reinsurance at an event level
  # Retain the years in Data_H1_df, reordering Data_H2_df
  # Create link file i.e. yearH1, yearH2 to be joined .... get this from x3
  link_yrs <- x3[,c("Yr_H1","Yr_H2")]
  colnames(link_yrs)[match("Yr_H2",names(link_yrs))] <- c(sprintf("Year"))
  # Join this with Data for Hazard 2
  Reorder_H2_df <- merge(x = Data_H2_df, y = link_yrs, by = "Year", all.x = TRUE)
  # Change the "Year" to be that from H1 defined by the link
  Reorder_H2_df$Year <- Reorder_H2_df$Yr_H1
  Reorder_H2_df$Yr_H1 <- NULL
  # And, now treat these re-numbered years as above ...
  # Group the two hazards
  Reins_df <- F_Haz_comb(Data_H1_df,Reorder_H2_df)
  # Create AEP curve - in this case gross input
  AEPx_df <- F_AEP(Reins_df)
  # Create OEP curve - in this case gross input
  OEPx_df <- F_OEP(Reins_df)
  # Apply reinsurance
  Reins_C_df <- F_ReIns_Apply(Reins_df, retention_mult, limit_RP,OEPx_df)
  
  #Fit copula to input and output, and dump to file
  F_Equiv_fit(Data_H1_df, Reorder_H2_df, outfile19,cop_num)
  
  #OEP and AEP, have a look at the combined gross and net of reinsurance
  # OEP first, gross then net
  temp <- F_OEP(Reins_C_df)
  OEPg_df[1:nrow(temp),col_name] <- temp$Insurable_Loss
  temp <- F_OEP_net(Reins_C_df)
  OEPn_df[1:nrow(temp),col_name] <- temp$Retained
  # AEP Second, gross then net
  temp <- F_AEP(Reins_C_df)
  AEPg_df[1:length(temp[,1]),col_name] <- temp$Insurable_Loss 
  temp <- F_AEP_net(Reins_C_df)
  AEPn_df[1:length(temp[,1]),col_name] <- temp$Retained
  
  Itt = Itt +1
}   # End of the while loop

# Replace all na with zeros
# d[is.na(d)] <- 0
OEPg_df[is.na(OEPg_df)] <- 0
OEPn_df[is.na(OEPn_df)] <- 0
AEPg_df[is.na(AEPg_df)] <- 0
AEPn_df[is.na(AEPn_df)] <- 0

#Means and stddev of rows - self-defined function - F7
OEPg_df <- F_EP_mean(OEPg_df)
OEPn_df <- F_EP_mean(OEPn_df)
AEPg_df <- F_EP_mean(AEPg_df)
AEPn_df <- F_EP_mean(AEPn_df)

# And, file the specific ones

OEP_C6_gross_df <- OEPg_df
OEP_C6_net_df <- OEPn_df
AEP_C6_gross_df <- AEPg_df
AEP_C6_net_df <- AEPn_df


# Make the plotting generic by transfering to the 'C' files
# Will also work for summary stats if this is needed.
OEP_C_gross_df <- OEP_C6_gross_df
OEP_C_net_df <- OEP_C6_net_df
AEP_C_gross_df <- AEP_C6_gross_df
AEP_C_net_df <- AEP_C6_net_df

# Plot OEP for the first 500 years RP gross vs net (means) - cross-check
OEP_C_gross_df_plot <- subset(OEP_C_gross_df, OEP_C_gross_df$RP < 500)
OEP_C_net_df_plot <- subset(OEP_C_net_df, OEP_C_net_df$RP < 500)

plot7 <- ggplot(data=OEP_C_gross_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  geom_line(col="blue", size=2) + 
  geom_line(data=OEP_C_net_df_plot,
            col=2, size=2,
            aes(x = RP, y = (Loss/1000000000))) +
  labs(title=sprintf("OEP for C - means - %s(blue) and %s(red)","gross","net"), x="Return Period (yrs)", y="Loss (? billion)")
plot7

# Plot AEP for the first 500 years RP - AEP gross vs net - cross-check
AEP_C_gross_df_plot <- subset(AEP_C_gross_df, AEP_C_gross_df$RP < 500)
AEP_C_net_df_plot <- subset(AEP_C_net_df, AEP_C_net_df$RP < 500)

plot8 <- ggplot(data=AEP_C_gross_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  geom_line(col="blue", size=2) + 
  geom_line(data=AEP_C_net_df_plot,
            col=2, size=2,
            aes(x = RP, y = (Loss/1000000000))) +
  labs(title=sprintf("AEP for C - means - %s(blue) and %s(red)","gross","net"), x="Return Period (yrs)", y="Loss (? billion)")
plot8


# Plot the first 500 years RP of the gross AEP - effect of correlation - KEY FIGURE
AEP_C_gross_df_plot <- subset(AEP_C_gross_df, AEP_C_gross_df$RP < 500)
# Unhash this line and change to 5,000 years to show all RPs
AEP_Default_gross_c_df_plot <- subset(AEP_Default_gross_c_df, AEP_Default_gross_c_df$RP < 500)

plot9 <- ggplot(data=AEP_C_gross_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  # AEP gross - correlated
  # Stderr
  geom_ribbon (aes(ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3), fill = "blue") +
  # Mean
  geom_line(data=AEP_C_gross_df_plot,
            col="blue", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # AEP gross - uncorrelated
  # Stderr
  geom_ribbon (data=AEP_Default_gross_c_df_plot,
               aes(y = (Loss/1000000000), ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3, fill = "red")) +
  # Mean
  geom_line(data=AEP_Default_gross_c_df_plot, col="red", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # General
  theme(legend.position = "none") +
  
  labs(title=sprintf("AEP gross - %s(%.2f,blue) and %s(red)","correlated",rho,"independent"), x="Return Period (yrs)", y="Loss (? billion)")

plot9

ggsave(
  outfile20,    # easiest to use file name to set device
  plot = plot9,  # default of last_plot()
)

# Plot the first 500 years RP of the gross AEP - effect of correlation - KEY FIGURE
AEP_C_net_df_plot <- subset(AEP_C_net_df, AEP_C_net_df$RP < 500)
# Unhash this line and change to 5,000 years to show all RPs
AEP_Default_net_c_df_plot <- subset(AEP_Default_net_c_df, AEP_Default_net_c_df$RP < 500)

plot10 <- ggplot(data=AEP_C_net_df_plot, aes(x = RP, y = (Loss/1000000000))) + 
  # AEP gross - correlated
  # Stderr
  geom_ribbon (aes(ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3), fill = "blue") +
  # Mean
  geom_line(data=AEP_C_net_df_plot,
            col="blue", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # AEP gross - uncorrelated
  # Stderr
  geom_ribbon (data=AEP_Default_net_c_df_plot,
               aes(y = (Loss/1000000000), ymin=MinE/1000000000, ymax=MaxE/1000000000, alpha=0.3, fill = "red")) +
  # Mean
  geom_line(data=AEP_Default_net_c_df_plot, col="red", size=2, linetype="dashed",
            aes(x = RP, y = (Loss/1000000000))) +
  # General
  theme(legend.position = "none") +
  
  labs(title=sprintf("AEP net - %s(%.2f,blue) and %s(red)","correlated",rho,"independent"), x="Return Period (yrs)", y="Loss (? billion)")

plot10

ggsave(
  outfile21,    # easiest to use file name to set device
  plot = plot10,  # default of last_plot()
)

### 4.7 - Output Analytics   

# Results are in the following files
# Loop through to extract at selected RPs
# And, tabulated comparison
# AEP_C_gross_df

# Select certain RPs from a curve
# _c = "company"

AEPg_D_summary_df <- F_EP_summary(AEP_Default_gross_c_df,RPs_select_df)
AEPg_C6_summary_df <- F_EP_summary(AEP_C6_gross_df,RPs_select_df)
AEPg_summary_df <- as.data.frame(AEPg_D_summary_df[,"RP"])
colnames(AEPg_summary_df)[1] <- "RP"
AEPg_summary_df$D <- AEPg_D_summary_df$Loss
AEPg_summary_df$C6 <- AEPg_C6_summary_df$Loss
# Work out percentage losses
AEPg_summary_df$C6_per <- 100*((AEPg_summary_df$C6/AEPg_summary_df$D)-1)

#AEP_C_net_df

AEPn_D_summary_df <- F_EP_summary(AEP_Default_net_c_df,RPs_select_df)
AEPn_C6_summary_df <- F_EP_summary(AEP_C6_net_df,RPs_select_df)
AEPn_summary_df <- as.data.frame(AEPn_D_summary_df[,"RP"])
colnames(AEPn_summary_df)[1] <- "RP"
AEPn_summary_df$D <- AEPn_D_summary_df$Loss
AEPn_summary_df$C6 <- AEPn_C6_summary_df$Loss
# Work out percentage losses
AEPn_summary_df$C6_per <- 100*((AEPn_summary_df$C6/AEPn_summary_df$D)-1)

###############################
# 5- Input into SCR modelling #
###############################

# Run3_AEP_net_impact_Company.txt
# Run3_AEP_gross_impact_Company.txt

# 22,23 - Output files of percentage difference
write.table(AEPg_summary_df, file=outfile22, append=F)
write.table(AEPn_summary_df, file=outfile23, append=F)


