## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", crop = NULL ) # Related to https://stat.ethz.ch/pipermail/bioc-devel/2020-April/016656.html ## ----vignetteSetup, echo=FALSE, message=FALSE, warning = FALSE---------------- ## Track time spent on making the vignette startTime <- Sys.time() ## Bib setup library("RefManageR") ## Write bibliography information bib <- c( R = citation(), BiocStyle = citation("BiocStyle")[1], knitr = citation("knitr")[1], RefManageR = citation("RefManageR")[1], rmarkdown = citation("rmarkdown")[1], sessioninfo = citation("sessioninfo")[1], testthat = citation("testthat")[1], ODER = citation("ODER")[1] ) ## ----"install", eval = FALSE-------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # # BiocManager::install("eolagbaju/ODER") # # ## Check that you have a valid Bioconductor installation # BiocManager::valid() ## ----"citation"--------------------------------------------------------------- ## Citation info citation("ODER") ## ----load_data, eval = requireNamespace('ODER')------------------------------- library("ODER") library("magrittr") # Download recount data in the form of BigWigs gtex_metadata <- recount::all_metadata("gtex") gtex_metadata <- gtex_metadata %>% as.data.frame() %>% dplyr::filter(project == "SRP012682") url <- recount::download_study( project = "SRP012682", type = "samples", download = FALSE ) # file_cache is an internal ODER function to cache files for # faster repeated loading bw_path <- file_cache(url[1]) bw_path ## ----ODER--------------------------------------------------------------------- # load reference annotation from Ensembl gtf_url <- paste0( "http://ftp.ensembl.org/pub/release-103/gtf/homo_sapiens", "/Homo_sapiens.GRCh38.103.chr.gtf.gz" ) gtf_path <- file_cache(gtf_url) gtf_gr <- rtracklayer::import(gtf_path) # As of rtracklayer 1.25.16, BigWig is not supported on Windows. if (!xfun::is_windows()) { opt_ers <- ODER( bw_paths = bw_path, auc_raw = gtex_metadata[["auc"]][1], auc_target = 40e6 * 100, chrs = c("chr21"), genome = "hg38", mccs = c(2, 4, 6, 8, 10), mrgs = c(10, 20, 30), gtf = gtf_gr, ucsc_chr = TRUE, ignore.strand = TRUE, exons_no_overlap = NULL, bw_chr = "chr" ) } ## ----plot_example------------------------------------------------------------- # visualise the spread of mccs and mrgs if (!xfun::is_windows()) { # uses product of ODER plot_ers(opt_ers[["deltas"]], opt_ers[["opt_mcc_mrg"]]) } ## ----annotatER_example-------------------------------------------------------- library(utils) # running only chr21 to reduce runtime chrs_to_keep <- c("21") # prepare the txdb object to create a genomic state # based on https://support.bioconductor.org/p/93235/ hg38_chrominfo <- GenomeInfoDb::getChromInfoFromUCSC("hg38") new_info <- hg38_chrominfo$size[match( chrs_to_keep, GenomeInfoDb::mapSeqlevels(hg38_chrominfo$chrom, "Ensembl") )] names(new_info) <- chrs_to_keep gtf_gr_tx <- GenomeInfoDb::keepSeqlevels(gtf_gr, chrs_to_keep, pruning.mode = "tidy" ) GenomeInfoDb::seqlengths(gtf_gr_tx) <- new_info GenomeInfoDb::seqlevelsStyle(gtf_gr_tx) <- "UCSC" GenomeInfoDb::genome(gtf_gr_tx) <- "hg38" ucsc_txdb <- GenomicFeatures::makeTxDbFromGRanges(gtf_gr_tx) genom_state <- derfinder::makeGenomicState(txdb = ucsc_txdb) # convert UCSC chrs format to Ensembl to match the ERs ens_txdb <- ucsc_txdb GenomeInfoDb::seqlevelsStyle(ens_txdb) <- "Ensembl" # lung_junc_21_22 is an example data set of junctions # obtained from recount3, derived from the lung tissue # run annotatERs() data(lung_junc_21_22, package = "ODER") if (!xfun::is_windows()) { # uses product of ODER annot_ers <- annotatERs( opt_ers = head(opt_ers[["opt_ers"]], 100), junc_data = lung_junc_21_22, genom_state = genom_state, gtf = gtf_gr, txdb = ens_txdb ) # print first 5 ERs annot_ers[1:5] } ## ----refine_ERs_example------------------------------------------------------- if (!xfun::is_windows()) { # uses product of ODER refined_ers <- refine_ERs(annot_ers) refined_ers } ## ----get_count_matrix_example------------------------------------------------- # create sample metadata containing identifiers for each BigWig run <- gtex_metadata[["run"]][[1]] col_info <- as.data.frame(run) # install megadepth megadepth::install_megadepth() if (!xfun::is_windows()) { # uses product of ODER er_count_matrix <- get_count_matrix( bw_paths = bw_path, annot_ers = annot_ers, cols = col_info ) er_count_matrix } ## ----reproduce1, echo=FALSE--------------------------------------------------- ## Date the vignette was generated Sys.time() ## ----reproduce2, echo=FALSE--------------------------------------------------- ## Processing time in seconds totalTime <- diff(c(startTime, Sys.time())) round(totalTime, digits = 3) ## ----reproduce3, echo=FALSE------------------------------------------------------------------------------------------- ## Session info library("sessioninfo") options(width = 120) session_info()