## ---- eval=FALSE--------------------------------------------------------------
#  library(MultiAssayExperiment)
#  library(ELMER.data)
#  library(ELMER)
#  # get distal probes that are 2kb away from TSS on chromosome 1
#  distal.probes <- get.feature.probe(
#    genome = "hg19",
#    met.platform = "450K",
#    rm.chr = paste0("chr",c(2:22,"X","Y"))
#  )
#  data(LUSC_RNA_refined,package = "ELMER.data") # GeneExp
#  data(LUSC_meth_refined,package = "ELMER.data") # Meth
#  
#  mae <- createMAE(
#    exp = GeneExp,
#    met = Meth,
#    save = TRUE,
#    linearize.exp = TRUE,
#    save.filename = "mae.rda",
#    filter.probes = distal.probes,
#    met.platform = "450K",
#    genome = "hg19",
#    TCGA = TRUE
#  )
#  
#  group.col <- "definition"
#  group1 <-  "Primary solid Tumor"
#  group2 <- "Solid Tissue Normal"
#  dir.out <- "result"
#  diff.dir <-  "hypo" # Search for hypomethylated probes in group 1
#  
#  sig.diff <- get.diff.meth(
#    data = mae,
#    group.col = group.col,
#    group1 = group1,
#    group2 = group2,
#    minSubgroupFrac = 0.2,
#    sig.dif = 0.3,
#    diff.dir = diff.dir,
#    cores = 1,
#    dir.out = dir.out,
#    pvalue = 0.01
#  )
#  
#  
#  nearGenes <- GetNearGenes(
#    data = mae,
#    probes = sig.diff$probe,
#    numFlankingGenes = 20
#  ) # 10 upstream and 10 dowstream genes
#  
#  pair <- get.pair(
#    data = mae,
#    group.col = group.col,
#    group1 = group1,
#    mode = "unsupervised",
#    group2 = group2,
#    nearGenes = nearGenes,
#    diff.dir = diff.dir,
#    minSubgroupFrac = 0.4, # % of samples to use in to create groups U/M
#    permu.dir = file.path(dir.out,"permu"),
#    permu.size = 100, # Please set to 100000 to get significant results
#    raw.pvalue = 0.05,
#    Pe = 0.01, # Please set to 0.001 to get significant results
#    filter.probes = TRUE, # See preAssociationProbeFiltering function
#    filter.percentage = 0.05,
#    filter.portion = 0.3,
#    dir.out = dir.out,
#    cores = 1,
#    label = diff.dir
#  )
#  
#  # Identify enriched motif for significantly hypomethylated probes which
#  # have putative target genes.
#  enriched.motif <- get.enriched.motif(
#    data = mae,
#    probes = pair$Probe,
#    dir.out = dir.out,
#    label = diff.dir,
#    min.incidence = 10,
#    lower.OR = 1.1
#  )
#  
#  TF <- get.TFs(
#    data = mae,
#    mode = "unsupervised",
#    group.col = group.col,
#    group1 = group1,
#    group2 = group2,
#    enriched.motif = enriched.motif,
#    dir.out = dir.out,
#    cores = 1,
#    label = diff.dir
#  )
#  
#  

## ---- eval=FALSE--------------------------------------------------------------
#  library(stringr)
#  library(TCGAbiolinks)
#  library(dplyr)
#  library(ELMER)
#  library(MultiAssayExperiment)
#  library(parallel)
#  library(readr)
#  dir.create("~/paper_elmer/",showWarnings = FALSE)
#  setwd("~/paper_elmer/")
#  
#  file <- "mae_BRCA_hg38_450K_no_ffpe.rda"
#  if(file.exists(file)) {
#    mae <- get(load(file))
#  } else {
#    getTCGA(
#      disease = "BRCA", # TCGA disease abbreviation (BRCA,BLCA,GBM, LGG, etc)
#      basedir = "DATA", # Where data will be downloaded
#      genome  = "hg38"
#    ) # Genome of refenrece "hg38" or "hg19"
#  
#    distal.probes <- get.feature.probe(
#      feature = NULL,
#      genome = "hg38",
#      met.platform = "450K"
#    )
#  
#  
#    mae <- createMAE(
#      exp = "~/paper_elmer/Data/BRCA/BRCA_RNA_hg38.rda",
#      met = "~/paper_elmer/Data/BRCA/BRCA_meth_hg38.rda",
#      met.platform = "450K",
#      genome = "hg38",
#      linearize.exp = TRUE,
#      filter.probes = distal.probes,
#      met.na.cut = 0.2,
#      save = FALSE,
#      TCGA = TRUE
#    )
#    # Remove FFPE samples from the analysis
#    mae <- mae[,!mae$is_ffpe]
#  
#    # Get molecular subytpe information from cell paper and more metadata (purity etc...)
#    # https://doi.org/10.1016/j.cell.2015.09.033
#    file <- "http://ars.els-cdn.com/content/image/1-s2.0-S0092867415011952-mmc2.xlsx"
#    downloader::download(file, basename(file))
#    subtypes <- readxl::read_excel(basename(file), skip = 2)
#  
#    subtypes$sample <- substr(subtypes$Methylation,1,16)
#    meta.data <- merge(colData(mae),subtypes,by = "sample",all.x = T)
#    meta.data <- meta.data[match(colData(mae)$sample,meta.data$sample),]
#    meta.data <- S4Vectors::DataFrame(meta.data)
#    rownames(meta.data) <- meta.data$sample
#    stopifnot(all(meta.data$patient == colData(mae)$patient))
#    colData(mae) <- meta.data
#    save(mae, file = "mae_BRCA_hg38_450K_no_ffpe.rda")
#  }
#  dir.out <- "BRCA_unsupervised_hg38/hypo"
#  cores <- 10
#  diff.probes <- get.diff.meth(
#    data = mae,
#    group.col = "definition",
#    group1 = "Primary solid Tumor",
#    group2 = "Solid Tissue Normal",
#    diff.dir = "hypo", # Get probes hypometh. in group 1
#    cores = cores,
#    minSubgroupFrac = 0.2, # % group samples  used.
#    pvalue = 0.01,
#    sig.dif = 0.3,
#    dir.out = dir.out,
#    save = TRUE
#  )
#  
#  # For each differently methylated probes we will get the
#  # 20 nearby genes (10 downstream and 10 upstream)
#  nearGenes <- GetNearGenes(
#    data = mae,
#    probes =  diff.probes$probe,
#    numFlankingGenes = 20
#  )
#  
#  # This step is the most time consuming. Depending on the size of the groups
#  # and the number of probes found previously it migh take hours
#  Hypo.pair <- get.pair(
#    data = mae,
#    nearGenes = nearGenes,
#    group.col = "definition",
#    group1 = "Primary solid Tumor",
#    group2 = "Solid Tissue Normal",
#    permu.dir = paste0(dir.out,"/permu"),
#    permu.size = 10000,
#    mode = "unsupervised",
#    minSubgroupFrac = 0.4, # 40% of samples to create U and M
#    raw.pvalue = 0.001,
#    Pe = 0.001,
#    filter.probes = TRUE,
#    filter.percentage = 0.05,
#    filter.portion = 0.3,
#    dir.out = dir.out,
#    cores = cores,
#    label = "hypo"
#  )
#  # Number of pairs: 2950
#  
#  
#  enriched.motif <- get.enriched.motif(
#    data = mae,
#    min.motif.quality = "DS",
#    probes = unique(Hypo.pair$Probe),
#    dir.out = dir.out,
#    label = "hypo",
#    min.incidence = 10,
#    lower.OR = 1.1
#  )
#  TF <- get.TFs(
#    data = mae,
#    group.col = "definition",
#    group1 = "Primary solid Tumor",
#    group2 = "Solid Tissue Normal",
#    minSubgroupFrac = 0.4, # Set to 1 if supervised mode
#    enriched.motif = enriched.motif,
#    dir.out = dir.out,
#    cores = cores,
#    label = "hypo"
#  )
#  

## ---- eval=FALSE--------------------------------------------------------------
#  library(stringr)
#  library(TCGAbiolinks)
#  library(dplyr)
#  library(ELMER)
#  library(MultiAssayExperiment)
#  library(parallel)
#  library(readr)
#  #-----------------------------------
#  # 1 - Samples
#  # ----------------------------------
#  dir.create("~/paper_elmer/",showWarnings = FALSE)
#  setwd("~/paper_elmer/")
#  
#  file <- "mae_BRCA_hg38_450K_no_ffpe.rda"
#  if(file.exists(file)) {
#    mae <- get(load(file))
#  } else {
#    getTCGA(
#      disease = "BRCA", # TCGA disease abbreviation (BRCA,BLCA,GBM, LGG, etc)
#      basedir = "DATA", # Where data will be downloaded
#      genome  = "hg38"
#    ) # Genome of refenrece "hg38" or "hg19"
#  
#    distal.probes <- get.feature.probe(
#      feature = NULL,
#      genome = "hg38",
#      met.platform = "450K"
#    )
#  
#    mae <- createMAE(
#      exp = "DATA/BRCA/BRCA_RNA_hg38.rda",
#      met = "DATA/BRCA/BRCA_meth_hg38.rda",
#      met.platform = "450K",
#      genome = "hg38",
#      linearize.exp = TRUE,
#      filter.probes = distal.probes,
#      met.na.cut = 0.2,
#      save = FALSE,
#      TCGA = TRUE
#    )
#    # Remove FFPE samples from the analysis
#    mae <- mae[,!mae$is_ffpe]
#  
#    # Get molecular subytpe information from cell paper and more metadata (purity etc...)
#    # https://doi.org/10.1016/j.cell.2015.09.033
#    file <- "http://ars.els-cdn.com/content/image/1-s2.0-S0092867415011952-mmc2.xlsx"
#    downloader::download(file, basename(file))
#    subtypes <- readxl::read_excel(basename(file), skip = 2)
#  
#    subtypes$sample <- substr(subtypes$Methylation,1,16)
#    meta.data <- merge(colData(mae),subtypes,by = "sample",all.x = T)
#    meta.data <- meta.data[match(colData(mae)$sample,meta.data$sample),]
#    meta.data <- S4Vectors::DataFrame(meta.data)
#    rownames(meta.data) <- meta.data$sample
#    stopifnot(all(meta.data$patient == colData(mae)$patient))
#    colData(mae) <- meta.data
#    save(mae, file = "mae_BRCA_hg38_450K_no_ffpe.rda")
#  }
#  
#  cores <- 6
#  direction <- c( "hypo","hyper")
#  genome <- "hg38"
#  group.col  <- "PAM50"
#  groups <- t(combn(na.omit(unique(colData(mae)[,group.col])),2))
#  for(g in 1:nrow(groups)) {
#    group1 <- groups[g,1]
#    group2 <- groups[g,2]
#    for (j in direction){
#      tryCatch({
#        message("Analysing probes ",j, "methylated in ", group1, " vs ", group2)
#        dir.out <- paste0("BRCA_supervised_",genome,"/",group1,"_",group2,"/",j)
#        dir.create(dir.out, recursive = TRUE)
#        #--------------------------------------
#        # STEP 3: Analysis                     |
#        #--------------------------------------
#        # Step 3.1: Get diff methylated probes |
#        #--------------------------------------
#        Sig.probes <- get.diff.meth(
#          data       = mae,
#          group.col  = group.col,
#          group1     = group1,
#          group2     = group2,
#          sig.dif    = 0.3,
#          minSubgroupFrac = 1,
#          cores      = cores,
#          dir.out    = dir.out,
#          diff.dir   = j,
#          pvalue     = 0.01
#        )
#        if(nrow(Sig.probes) == 0) next
#        #-------------------------------------------------------------
#        # Step 3.2: Identify significant probe-gene pairs            |
#        #-------------------------------------------------------------
#        # Collect nearby 20 genes for Sig.probes
#        nearGenes <- GetNearGenes(
#          data  = mae,
#          probe = Sig.probes$probe
#        )
#  
#        pair <- get.pair(
#          data       = mae,
#          nearGenes  = nearGenes,
#          group.col  = group.col,
#          group1     = group1,
#          group2     = group2,
#          permu.dir  = paste0(dir.out,"/permu"),
#          dir.out    = dir.out,
#          mode       = "supervised",
#          diff.dir   = j,
#          cores      = cores,
#          label      = j,
#          permu.size = 10000,
#          raw.pvalue = 0.001
#        )
#  
#        Sig.probes.paired <- readr::read_csv(
#          paste0(dir.out,
#                 "/getPair.",j,
#                 ".pairs.significant.csv")
#        )[,1, drop = TRUE]
#  
#  
#        #-------------------------------------------------------------
#        # Step 3.3: Motif enrichment analysis on the selected probes |
#        #-------------------------------------------------------------
#        if(length(Sig.probes.paired) > 0 ){
#          #-------------------------------------------------------------
#          # Step 3.3: Motif enrichment analysis on the selected probes |
#          #-------------------------------------------------------------
#          enriched.motif <- get.enriched.motif(
#            probes  = Sig.probes.paired,
#            dir.out = dir.out,
#            data    = mae,
#            label   = j,
#            plot.title =  paste0("BRCA: OR for paired probes ",
#                                 j, "methylated in ",
#                                 group1, " vs ",group2)
#          )
#          motif.enrichment <- readr::read_csv(
#            paste0(dir.out,
#                   "/getMotif.",j,
#                   ".motif.enrichment.csv")
#          )
#          if(length(enriched.motif) > 0){
#            #-------------------------------------------------------------
#            # Step 3.4: Identifying regulatory TFs                        |
#            #-------------------------------------------------------------
#            print("get.TFs")
#  
#            TF <- get.TFs(
#              data           = mae,
#              enriched.motif = enriched.motif,
#              dir.out        = dir.out,
#              mode           = "supervised",
#              group.col      = group.col,
#              group1         = group1,
#              diff.dir       = j,
#              group2         = group2,
#              cores          = cores,
#              label          = j
#            )
#            TF.meth.cor <- get(
#              load(paste0(dir.out, "/getTF.",j, ".TFs.with.motif.pvalue.rda"))
#            )
#            save(
#              mae, TF, enriched.motif, Sig.probes.paired,
#              pair, nearGenes, Sig.probes, motif.enrichment,
#              TF.meth.cor,
#              file = paste0(dir.out,"/ELMER_results_",j,".rda")
#            )
#          }
#        }
#      }, error = function(e){
#        message(e)
#      })
#    }
#  }