## ---- warning=FALSE, message=FALSE--------------------------------------------
library(SPONGE)
library(doParallel)
library(foreach)
library(dplyr)

# Register your backend here
num.of.cores <- 4
cl <- makeCluster(num.of.cores) 
registerDoParallel(cl)

## ---- echo=FALSE, results='asis'----------------------------------------------
knitr::kable(train_cancer_gene_expr[1:5,1:8])

## ---- echo=FALSE, results='asis'----------------------------------------------
knitr::kable(test_cancer_gene_expr[1:5,1:8])

## ---- warning=FALSE, message=FALSE--------------------------------------------
knitr::kable(train_cancer_mir_expr[1:5,1:8])

## ---- warning=FALSE, message=FALSE--------------------------------------------
knitr::kable(test_cancer_mir_expr[1:5,1:8])

## ---- warning=FALSE, message=FALSE--------------------------------------------
knitr::kable(train_ceRNA_interactions[1:5,1:8])

## ---- warning=FALSE, message=FALSE--------------------------------------------
knitr::kable(train_network_centralities[1:5,1:5])

## ---- warning=FALSE, message=FALSE--------------------------------------------
knitr::kable(train_cancer_metadata[1:5,1:8])

## ---- warning=FALSE, message=FALSE--------------------------------------------
knitr::kable(test_cancer_metadata[1:5,1:8])

## ----message=FALSE, warning=FALSE---------------------------------------------
filtered_network_centralities=filter_ceRNA_network(sponge_effects = train_ceRNA_interactions, Node_Centrality = train_network_centralities,add_weighted_centrality=T, mscor.threshold = 0.01, padj.threshold = 0.1)

## ---- warning=FALSE, message=FALSE--------------------------------------------

RNAs <- c("lncRNA","protein_coding")
RNAs.ofInterest <- ensembl.df %>% dplyr::filter(gene_biotype %in% RNAs) %>%
  dplyr::select(ensembl_gene_id)

central_gene_modules<-get_central_modules(central_nodes = RNAs.ofInterest$ensembl_gene_id,node_centrality = filtered_network_centralities$Node_Centrality,ceRNA_class = RNAs, centrality_measure = "Weighted_Degree", cutoff = 10)


## ---- warning=FALSE, message=FALSE--------------------------------------------

Sponge.modules <- define_modules(network = filtered_network_centralities$Sponge.filtered, central.modules = central_gene_modules, remove.central = F, set.parallel = F)
# Module size distribution
Size.modules <- sapply(Sponge.modules, length)


## ---- warning=FALSE, message=FALSE--------------------------------------------

train.modules <- enrichment_modules(Expr.matrix = train_cancer_gene_expr, modules = Sponge.modules, bin.size = 10, min.size = 1, max.size = 2000, min.expr = 1, method = "OE", cores=1)
test.modules <-  enrichment_modules(Expr.matrix = test_cancer_gene_expr, modules = Sponge.modules, bin.size = 10, min.size = 1, max.size = 2000, min.expr = 1, method = "OE", cores=1)

## ---- warning=FALSE, message=FALSE--------------------------------------------
# We find modules that were identified both in the train and test and use those as input features for the model
common_modules = intersect(rownames(train.modules), rownames(test.modules))
train.modules = train.modules[common_modules, ]
test.modules = test.modules[common_modules, ]

trained.model = calibrate_model(Input = train.modules, modules_metadata = train_cancer_metadata, label = "SUBTYPE", sampleIDs = "sampleID",Metric = "Exact_match", n_folds = 2, repetitions = 1)

trained.model[["ConfusionMatrix_training"]]

## -----------------------------------------------------------------------------
Input.test <- t(test.modules) %>% scale(center = T, scale = T)
Prediction.model <- predict(trained.model$Model, Input.test)

# We compute the confusion metrix on the test set
ConfusionMatrix_testing <- caret::confusionMatrix(as.factor(Prediction.model), as.factor(test_cancer_metadata$SUBTYPE))
trained.model$ConfusionMatrix_testing<-ConfusionMatrix_testing
ConfusionMatrix_testing

## -----------------------------------------------------------------------------
# Define random modules
Random.modules <- Random_spongEffects(sponge_modules = Sponge.modules,
                                      gene_expr = train_cancer_gene_expr, min.size = 1,bin.size = 10, max.size = 200,
                                      min.expression=1, replace = F,method = "OE",cores = 1)
# We can now use the randomly defined modules to calculate their enrichment in the test set
Random.modules.test <- enrichment_modules(Expr.matrix = test_cancer_gene_expr, modules = Random.modules$Random_Modules, bin.size = 10, min.size = 1, max.size = 2000, min.expr = 1, method = "OE", cores=1)


## ---- warning=FALSE, message=FALSE--------------------------------------------
# We find random modules that were identified both in the train and test and use those as input features for the model
common_modules_random = intersect(rownames(Random.modules$Enrichment_Random_Modules), rownames(Random.modules.test))
Random.modules.train = Random.modules$Enrichment_Random_Modules[common_modules_random, ]
Random.modules.test = Random.modules.test[common_modules_random, ]

Random.model = calibrate_model(Input = Random.modules.train, modules_metadata = train_cancer_metadata, label = "SUBTYPE", sampleIDs = "sampleID",Metric = "Exact_match", n_folds = 2, repetitions = 1)

Random.model[["ConfusionMatrix_training"]]

## -----------------------------------------------------------------------------
Input.test <- t(Random.modules.test) %>% scale(center = T, scale = T)
Input.test<-Input.test[ , apply(Input.test, 2, function(x) !any(is.na(x)))]

Prediction.model <- predict(Random.model$Model, Input.test)

# We compute the confusion metrix on the test set
ConfusionMatrix_testing_random <- caret::confusionMatrix(as.factor(Prediction.model), as.factor(test_cancer_metadata$SUBTYPE))
Random.model$ConfusionMatrix_testing_random<-ConfusionMatrix_testing_random
ConfusionMatrix_testing_random

## -----------------------------------------------------------------------------
Input.centralgenes.train <- train_cancer_gene_expr[rownames(train_cancer_gene_expr) %in% names(Sponge.modules), ]
Input.centralgenes.test <- test_cancer_gene_expr[rownames(test_cancer_gene_expr) %in% names(Sponge.modules), ]

common_modules = intersect(rownames(Input.centralgenes.train), rownames(Input.centralgenes.test))
Input.centralgenes.train = Input.centralgenes.train[common_modules, ]
Input.centralgenes.test = Input.centralgenes.test[common_modules, ]

# Calibrate model
CentralGenes.model = calibrate_model(Input = Input.centralgenes.train, modules_metadata = train_cancer_metadata, label = "SUBTYPE", sampleIDs = "sampleID",Metric = "Exact_match", n_folds = 1, repetitions = 1)

# Validate on test set
Input.centralgenes.test <- t(Input.centralgenes.test) %>% scale(center = T, scale = T)
CentralGenes.prediction <- predict(CentralGenes.model$Model, Input.centralgenes.test)

# We compute the confusion metrix on the test set
ConfusionMatrix_testing <- caret::confusionMatrix(as.factor(CentralGenes.prediction), as.factor(test_cancer_metadata$SUBTYPE))
CentralGenes.model$ConfusionMatrix_testing<-ConfusionMatrix_testing
ConfusionMatrix_testing


## -----------------------------------------------------------------------------
plot_accuracy_sensitivity_specificity(trained_model=trained.model,central_genes_model=NA,
                                      random_model= Random.model,
                                      training_dataset_name="TCGA",testing_dataset_name="TCGA",
                                      subtypes=as.factor(test_cancer_metadata$SUBTYPE))

## ---- warning=FALSE, message=FALSE--------------------------------------------
lollipop_plot=plot_top_modules(trained_model=trained.model, k_modules_red = 2, k_modules = 4)
lollipop_plot

## ---- warning=FALSE, message=FALSE--------------------------------------------
density_plot_train=plot_density_scores(trained_model=trained.model,spongEffects = train.modules, meta_data = train_cancer_metadata, label = "SUBTYPE", sampleIDs = "sampleID")
density_plot_train


## -----------------------------------------------------------------------------
heatmap.train = plot_heatmaps(trained_model = trained.model,spongEffects = train.modules,
               meta_data = train_cancer_metadata, label = "SUBTYPE", sampleIDs = "sampleID",Modules_to_Plot = 2,
              show.rownames = F, show.colnames = F)
heatmap.train

## -----------------------------------------------------------------------------
heatmap.test = plot_heatmaps(trained_model = trained.model,spongEffects = test.modules,
               meta_data = test_cancer_metadata, label = "SUBTYPE", sampleIDs = "sampleID",Modules_to_Plot = 2,
              show.rownames = F, show.colnames = F)
heatmap.test

## -----------------------------------------------------------------------------
plot_involved_miRNAs_to_modules(sponge_modules=Sponge.modules,
                                trained_model=trained.model,
                                gene_mirna_candidates= train_genes_miRNA_candidates,
                                k_modules = 2,
                                filter_miRNAs = 0.0,
                                bioMart_gene_symbol_columns = "hgnc_symbol",
                                bioMart_gene_ensembl = "hsapiens_gene_ensembl")

## ---- warning=FALSE, message=FALSE--------------------------------------------
#stop your backend parallelisation if registered
stopCluster(cl)