## ----style, echo=FALSE, message=FALSE, warning=FALSE, results="asis"----------------------------------------------------------------------------------------------------------------------------------
rm(list = ls())
library("rmarkdown")
options(width = 200)

## ----message=FALSE, warning=FALSE---------------------------------------------------------------------------------------------------------------------------------------------------------------------
.packages <- c(
  "edgeR", "viridis", "Biobase", "BiocFileCache", "SummarizedExperiment", 
  "ggplot2", "dplyr", "tidyr", "tibble", "readr", 
  "TimeSeriesExperiment")
sapply(.packages, require, character.only = TRUE)
theme_set(theme_bw())
theme_update(
  text = element_text(size = 15),
  strip.background = element_rect(fill = "grey70"),
  strip.text = element_text(size = 10))

## ---- message=FALSE, warning=FALSE--------------------------------------------------------------------------------------------------------------------------------------------------------------------
# Remote location of the data
urls <- paste0(
    "https://www.ncbi.nlm.nih.gov/geo/download/",
    "?acc=GSE114762&format=file&file=",
    c(
        "GSE114762_raw_counts.csv.gz",
        "GSE114762_gene_data.csv.gz",
        "GSE114762_sample_info.csv.gz"
    )
)

## ---- message=FALSE, warning=FALSE--------------------------------------------------------------------------------------------------------------------------------------------------------------------
library(BiocFileCache)
bfc <- BiocFileCache(ask = FALSE)

cnts <- read_csv(bfcrpath(rnames = urls[1])) %>%
  remove_rownames() %>%
  as.matrix()
rownames(cnts) <- cnts[,1]
cnts <- cnts[,-1]

gene.data <- read_csv(bfcrpath(rnames = urls[2])) %>%
  as.data.frame() %>%
  remove_rownames() 
rownames(gene.data) <- gene.data[,1]
gene.data <- gene.data[,-1]

pheno.data <- read_csv(bfcrpath(rnames = urls[3])) %>%
  as.data.frame() %>%
  remove_rownames() 
rownames(pheno.data) <- pheno.data[,1]
pheno.data <- pheno.data[,-1]

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
cop1.te <- TimeSeriesExperiment(
  assays = list(cnts),
  rowData = gene.data,
  colData = pheno.data,
  timepoint = "time",
  replicate = "replicate",
  group = "group"
)
cop1.te

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
cop1.es <- ExpressionSet(
  as.matrix(cnts),
  phenoData = AnnotatedDataFrame(pheno.data),
  featureData = AnnotatedDataFrame(gene.data))

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
(cop1.te <- makeTimeSeriesExperimentFromExpressionSet(
  cop1.es, timepoint = "time", group = "group",
  replicate = "individual"))

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
cop1.se <- SummarizedExperiment(
  assays = list(counts = cnts),
  rowData = gene.data, colData = pheno.data)

(cop1.te <- makeTimeSeriesExperimentFromSummarizedExperiment(
  cop1.se, time = "time", group = "group",
  replicate = "individual"))

# Remove raw data
rm(cnts, gene.data, pheno.data, cop1.se, cop1.es)

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# Compute CPMs 
cop1.te <- normalizeData(cop1.te)

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# Fine genes with minimum mean count of 5 at least in one of the two groups
min_mean_cpm <- 5
group_cpm_means <- data.frame(row.names = rownames(cop1.te))
norm.cnts <- assays(cop1.te)$norm
for(g in unique(groups(cop1.te))) {
  g_cnts <- norm.cnts[ , which(groups(cop1.te) == g)]
  group_cpm_means[, g] <- apply(g_cnts, 1, mean)
}
group_cpm_max <- apply(as.vector(group_cpm_means), 1, max)
genes_expressed <- rownames(cop1.te)[group_cpm_max > min_mean_cpm]

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# Filter out the noisy genes
cop1.te <- filterFeatures(cop1.te, genes_expressed)
cop1.te

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# Collapsed sample data stored in cop1.te@sample.data.collapsed,
# and mean expression values in cop1.te@data.collapsed
cop1.te <- collapseReplicates(cop1.te, FUN = mean)

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# Before conversion, scales gene expression to an even sum, for a fair
# gene-to-gene comparison.
cop1.te <- makeTimeSeries(
    cop1.te, feature.trans.method = "scale_feat_sum")

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# untransfomed
head(timeSeries(cop1.te, "ts"))

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# transfomed
head(timeSeries(cop1.te, "ts_trans"))

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# Add lags to time-course data
cop1.te <- addLags(cop1.te, lambda = c(0.5, 0.25))
head(timeSeries(cop1.te, "ts_trans"))

## ----heatmap, fig.height=9, fig.width=7---------------------------------------------------------------------------------------------------------------------------------------------------------------
plotHeatmap(cop1.te, num.feat = 100)

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
cop1.te <- runPCA(cop1.te, var.stabilize.method = "asinh")

## ----pca-samples-group, fig.wide = TRUE, fig.height=3, fig.width=7------------------------------------------------------------------------------------------------------------------------------------
plotSamplePCA(cop1.te, col.var = "group", size = 5)

## ----pca-samples-time, fig.wide = TRUE, fig.height=3, fig.width=7-------------------------------------------------------------------------------------------------------------------------------------
plotSamplePCA(cop1.te, col.var = "timepoint", size = 5)

## ----pca-genes, fig.width = 10, fig.height = 8--------------------------------------------------------------------------------------------------------------------------------------------------------
plotTimeSeriesPCA(
    cop1.te,
    linecol = c("WT" = "#e31a1c", "Loxp" = "#1f78b4"),
    main = paste("Visualizing gene profiles with PCA"),
    m  = 15, n = 15, col = adjustcolor("grey77", alpha=0.7), 
    cex.main = 3, cex.axis = 2, cex.lab = 2)

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
params_for_clustering <- list(
  dynamic = TRUE, 
  dynamic_cut_params = list(deepSplit = TRUE, minModuleSize = 150))

cop1.te <- clusterTimeSeries(
  cop1.te, n.top.feat = 3000, groups = "all",
  clust.params = params_for_clustering)

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# See the count of genes in each cluster
cluster_map <- clusterMap(cop1.te)
table(cluster_map$cluster)

## ----hclust-dendrogram, fig.width = 7, fig.height = 4-------------------------------------------------------------------------------------------------------------------------------------------------
# Plot the hierarchical clustering of genes2cluster
hclust_obj <- clusterAssignment(cop1.te, "hclust")
plot(x = hclust_obj, labels = FALSE, xlab = "genes", sub = "")

## ----gene-clusters, fig.width = 7, fig.height = 5.5, fig.wide = TRUE----------------------------------------------------------------------------------------------------------------------------------
plotTimeSeriesClusters(cop1.te, transparency = 0.2) +
    scale_color_manual(values = c("WT" = "#1f78b4", "Loxp" = "#e31a1c")) +
    theme(strip.text = element_text(size = 10)) +
    ylim(NA, 0.55)

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
cop1.te <- timepointDE(cop1.te, timepoints = "all", alpha = 0.05)

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
tmp_de <- differentialExpression(cop1.te, "timepoint_de")
# First 6 DE genes at timepoint = 2.5:
head(tmp_de$`2.5`)

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
top10_genes <- sapply(tmp_de, function(tab) {
    if(nrow(tab) == 0)
      return(rep(NA, 10))
    top10 <- tab %>% 
      arrange(desc(abs(logFC))) %>%
      .[["symbol"]] %>% .[1:10]
    if(length(top10) < 10)
      top10 <- c(top10, rep(NA, 10 - length(top10)))
    return(top10)
  }) %>% 
  as.data.frame()
top10_genes

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
de_genes <- lapply(tmp_de, function(x) x$symbol)
de_any_tmp <- unique(unlist(de_genes))
cat("Out of all", nrow(cop1.te), ", there were",
    length(de_any_tmp), "were found differentially expressed at any timepoint.")

## ----tmp-de-intersect, fig.width=7, fig.height=4------------------------------------------------------------------------------------------------------------------------------------------------------
library(UpSetR)
upset(fromList(de_genes),
      sets = rev(c("2.5", "4", "6", "9", "13")), keep.order = TRUE,
      number.angles = 30, #point.size = 3.5, line.size = 1,
      mainbar.y.label = "DE Gene Intersections",
      sets.x.label = "DE Genes Per Timepoint")

## ---- eval = FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  cop1.te <- trajectoryDE(cop1.te, dist_method = "euclidean")
#  de_trajectory_res <- differentialExpression(cop1.te, "trajectory_de")
#  saveRDS(de_trajectory_res, "de_trajectory_cop1.rds")

## ----echo = FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
de_trajectory_res <- readRDS("de_trajectory_cop1.rds")

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
head(de_trajectory_res, 20)

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# Select top most different genes across two groups
n_genes <- 20
genes_with_de_trajectory <- de_trajectory_res %>%
    filter(pval <= max(0.05, min(pval)), R2 > 0.7) %>%
    arrange(-R2)
(genes_to_plot <- genes_with_de_trajectory$feature[1:n_genes])

## ----genes-de-trajectory, fig.width = 7, fig.height = 5.5, fig.wide = TRUE----------------------------------------------------------------------------------------------------------------------------
plotTimeSeries(cop1.te, features = genes_to_plot) +
    scale_color_manual(values = c("WT" =  "#1f78b4", "Loxp" = "#e31a1c"))

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
plotTimeSeries(cop1.te, features = genes_to_plot) +
    scale_color_manual(values = c("WT" =  "#1f78b4", "Loxp" = "#e31a1c"))

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
length(genes_with_de_trajectory$feature)

## ----de-gene-clusters, fig.width = 8, fig.height = 6, fig.wide = TRUE---------------------------------------------------------------------------------------------------------------------------------
plotTimeSeriesClusters(
  cop1.te, transparency = 0.5,
  features = genes_with_de_trajectory$feature) +
  scale_color_manual(values = c("WT" = "#1f78b4", "Loxp" = "#e31a1c"))

## ---- eval = FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  enrich_res <- pathwayEnrichment(
#    object = cop1.te,
#    features = genes_with_de_trajectory$feature,
#    species = "Mm", ontology ="BP")
#  saveRDS(enrich_res, "enrich_res_cop1.rds")

## ----echo = FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
enrich_res <- readRDS("enrich_res_cop1.rds")

## ----go-enrich-clst-c4,  fig.width = 8, fig.height = 4, fig.wide = TRUE-------------------------------------------------------------------------------------------------------------------------------
clst <- "C4"
plotEnrichment(
    enrich = enrich_res[[clst]], n_max = 15) +
  ggtitle(paste0("Cluster , ", clst, " enrichment"))

## ----go-enrich-clst-c5,  fig.width = 8, fig.height = 4, fig.wide = TRUE-------------------------------------------------------------------------------------------------------------------------------
clst <- "C5"
plotEnrichment(
    enrich = enrich_res[[clst]], n_max = 15) +
  ggtitle(paste0("Cluster , ", clst, " enrichment"))

## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
sessionInfo()