## ----setup, echo=FALSE, results="hide"---------------------------------------- knitr::opts_chunk$set(tidy = FALSE, cache = FALSE, dev = "png", message = FALSE, error = FALSE, warning = TRUE) suppressPackageStartupMessages(require(BiocStyle)) ## ----citaction, eval = FALSE, echo = FALSE------------------------------------ # #**Note:** if you use DEWSeq in published research, please cite: # # # #> Authors. (Year) # #> Title # #> *Genome Biology*, **15**:550. # #> [10.1186/s13059-014-0550-8](http://dx.doi.org/10.1186/s13059-014-0550-8) ## ---- fig.cap = "Crosslink site trunction at reverse transcription", fig.small=TRUE, echo = FALSE---- # out.width="80%", knitr::include_graphics("truncation.png") ## ---- fig.cap = "Truncation sites (referred to as crosslink sites) are the neighboring position of the aligned read", out.width="80%", echo = FALSE---- # fig.width=200, out.width="200px" knitr::include_graphics("truncationsite.png") ## ---- fig.cap = "Different binding modes of RNA-binding proteins", out.width="120%", echo = FALSE---- # fig.width=200, out.width="200px" knitr::include_graphics("binding_modes.png") ## ---- fig.cap = "Chance of crosslinking", out.width="80%", echo = FALSE------- # fig.width=200, out.width="200px" knitr::include_graphics("crosslinking_chance.png") ## ---- fig.cap = "Different RNase concentrations will result in different fragment sizes.", out.width="80%", echo = FALSE---- knitr::include_graphics("digestionpatterns.png") ## ---- fig.cap = "Read-throughs", out.width="80%", echo = FALSE---------------- knitr::include_graphics("readthrough.png") ## ---- fig.cap = "Early truncation events", out.width="100%", echo = FALSE----- knitr::include_graphics("earlytruncation.png") ## ---- fig.cap = "Sliding window approach with single-nucleotide position count data", out.width="140%", out.height="120%", echo = FALSE---- knitr::include_graphics("dewseqoverview.png") ## ---- fig.cap = "Combining significant windows", echo = FALSE, out.width="80%"---- # fig.width=200, out.width="200px" knitr::include_graphics("overlapping_sliding_windows.png") ## ---- fig.cap = "Combining significant windows", out.width="80%", echo = FALSE---- knitr::include_graphics("controls.png") ## ---- fig.cap = "htseq-clip workflow", out.width="100%", echo = FALSE--------- knitr::include_graphics("htseq-clip.png") ## ----install, eval = FALSE---------------------------------------------------- # if(!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("DEWSeq") ## ----load library, eval = TRUE------------------------------------------------ require(DEWSeq) require(IHW) require(tidyverse) ## ----filenames, eval = TRUE--------------------------------------------------- countFile <- file.path(system.file('extdata',package='DEWSeq'),'SLBP_K562_w50s20_counts.txt.gz') annotationFile <- file.path(system.file('extdata',package='DEWSeq'),'SLBP_K562_w50s20_annotation.txt.gz') ## ----loading tidyverse, eval = TRUE, results='hide'--------------------------- library(tidyverse) library(data.table) ## ----read in count matrix, eval = TRUE---------------------------------------- countData <- fread(countFile, sep = "\t") ## ----read count matrix tidyr, eval = TRUE------------------------------------- countData <- read_tsv(countFile) ## ----read in annotation, eval = TRUE------------------------------------------ annotationData <- fread(annotationFile, sep = "\t") ## ----read annotation tidyr, eval = FALSE-------------------------------------- # annotationData <- read_tsv(annotationFile) ## ----datasummary-------------------------------------------------------------- head(countData) dim(countData) head(annotationData) dim(annotationData) ## ----create colData----------------------------------------------------------- colData <- data.frame( row.names = colnames(countData)[-1], # since the first column is unique_id type = factor( c(rep("IP", 2), ## change this accordingly rep("SMI", 1)), ## levels = c("IP", "SMI")) ) ## ----example import----------------------------------------------------------- ddw <- DESeqDataSetFromSlidingWindows(countData = countData, colData = colData, annotObj = annotationData, tidy = TRUE, design = ~type) ddw ## ----row filtering, eval = TRUE----------------------------------------------- keep <- rowSums(counts(ddw)) >= 10 ddw <- ddw[keep,] ## ----estimate size factors, eval = TRUE--------------------------------------- ddw <- estimateSizeFactors(ddw) sizeFactors(ddw) ## ----filter for mRNAs, eval = TRUE-------------------------------------------- ddw_mRNAs <- ddw[ rowData(ddw)[,"gene_type"] == "protein_coding", ] ddw_mRNAs <- estimateSizeFactors(ddw_mRNAs) sizeFactors(ddw) <- sizeFactors(ddw_mRNAs) sizeFactors(ddw) ## ----tmp significant windows-------------------------------------------------- ddw_tmp <- ddw ddw_tmp <- estimateDispersions(ddw_tmp, fitType = "local", quiet = TRUE) ddw_tmp <- nbinomWaldTest(ddw_tmp) tmp_significant_windows <- results(ddw_tmp, contrast = c("type", "IP", "SMI"), tidy = TRUE, filterFun = ihw) %>% dplyr::filter(padj < 0.05) %>% .[["row"]] rm("ddw_tmp") ## ----------------------------------------------------------------------------- ddw_mRNAs <- ddw_mRNAs[ !rownames(ddw_mRNAs) %in% tmp_significant_windows, ] ddw_mRNAs <- estimateSizeFactors(ddw_mRNAs) sizeFactors(ddw) <- sizeFactors(ddw_mRNAs) rm( list = c("tmp_significant_windows", "ddw_mRNAs")) sizeFactors(ddw) ## ----estimate dispersions and wald test, eval = TRUE-------------------------- ddw <- estimateDispersions(ddw, fitType = "local", quiet = TRUE) ddw <- nbinomWaldTest(ddw) ## ---- fig.cap = "Dispersion Estimates", fig.wide = TRUE----------------------- plotDispEsts(ddw) ## ----DEWSeq results, eval = TRUE---------------------------------------------- resultWindows <- resultsDEWSeq(ddw, contrast = c("type", "IP", "SMI"), tidy = TRUE) %>% as_tibble resultWindows ## ----IHW multiple hypothesis correction, eval = TRUE, warning = FALSE--------- resultWindows[,"p_adj_IHW"] <- adj_pvalues(ihw(pSlidingWindows ~ baseMean, data = resultWindows, alpha = 0.05, nfolds = 10)) ## ----windows tables, eval = TRUE---------------------------------------------- resultWindows <- resultWindows %>% mutate(significant = resultWindows$p_adj_IHW < 0.01) sum(resultWindows$significant) ## ----how genes form windows--------------------------------------------------- resultWindows %>% filter(significant) %>% arrange(desc(log2FoldChange)) %>% .[["gene_name"]] %>% unique %>% head(20) ## ----extractRegions, eval = TRUE, results = "hide"---------------------------- resultRegions <- extractRegions(windowRes = resultWindows, padjCol = "p_adj_IHW", padjThresh = 0.01, log2FoldChangeThresh = 0.5) %>% as_tibble ## ----resultRegions output, eval = TRUE---------------------------------------- resultRegions ## ----toBED output, eval = FALSE----------------------------------------------- # toBED(windowRes = resultWindows, # regionRes = resultRegions, # fileName = "enrichedWindowsRegions.bed") ## ----------------------------------------------------------------------------- sessionInfo()