## ----setup, include = FALSE------------------------------------------------ knitr::opts_chunk$set( collapse = TRUE, dpi=60 ) ## ----library, message=FALSE------------------------------------------------ library(seqsetvis) library(data.table) library(GenomicRanges) ## ----files, echo=FALSE----------------------------------------------------- bam_file = system.file("extdata/test_peaks.bam", package = "seqsetvis") xls_file = system.file("extdata/test_peaks.xls", package = "seqsetvis") np_file = system.file("extdata/test_loading.narrowPeak", package = "seqsetvis") pe_file = system.file("extdata/Bcell_PE.mm10.bam", package = "seqsetvis") data("test_peaks") query_gr = test_peaks strand_colors = c("*" = "darkgray", "+" = "blue", "-" = "red") theme_set(theme_classic()) p_default = ggplot() + facet_grid("peak_id~peak_status") + scale_color_manual(values = strand_colors) + labs(x = "bp", y = "read pileup") ## ----basic, fig.cap="Read pileup with default parameters"------------------ bam_dt = ssvFetchBam(bam_file, query_gr, return_data.table = TRUE) bam_dt[, c("peak_status", "peak_id") := tstrsplit(id, "_", keep = c(4,5))] p_basic = p_default + geom_path(data = bam_dt, aes(x = x, y = y, color = strand)) p_basic ## ----basic2, fig.cap="Read pileup with default parameters"----------------- bam_dt = ssvFetchBam(bam_file, query_gr, fragLens = NA, win_size = 5, return_data.table = TRUE) bam_dt[, c("peak_status", "peak_id") := tstrsplit(id, "_", keep = c(4,5))] p_basic = p_default + geom_path(data = bam_dt, aes(x = x, y = y, color = strand)) p_basic ## ----basic_strand, fig.cap="Strand sensitive read pileup"------------------ bam_dt = ssvFetchBam(bam_file, query_gr, fragLens = NA, win_size = 5, return_data.table = TRUE, target_strand = "both") bam_dt[, c("peak_status", "peak_id") := tstrsplit(id, "_", keep = c(4,5))] p_basic + geom_path(data = bam_dt, aes(x = x, y = y, color = strand)) + facet_grid("peak_id~peak_status") ## ---- basic_NAfragLens, fig.cap="no extension to fragment length"---------- bam_dt = ssvFetchBam(bam_file, win_size = 5, fragLens = NA, query_gr, return_data.table = TRUE, target_strand = "both") bam_dt[, c("peak_status", "peak_id") := tstrsplit(id, "_", keep = c(4,5))] p_basic + geom_path(data = bam_dt, aes(x = x, y = y, color = strand)) ## -------------------------------------------------------------------------- bam_dt = ssvFetchBam(bam_file, win_size = 10, fragLens = "auto", query_gr, return_data.table = TRUE, max_dupes = 1, target_strand = "both") bam_dt[, c("peak_status", "peak_id") := tstrsplit(id, "_", keep = c(4,5))] p_basic + geom_path(data = bam_dt, aes(x = x, y = y, color = strand)) ## ---- fig.cap="Calculated fragment length doesn't match peak strand cross correlation very well."---- shift_dt = crossCorrByRle(bam_file, query_gr) shift_dt[, c("peak_status", "peak_id") := tstrsplit(id, "_", keep = c(4,5))] ggplot(shift_dt, aes(x = shift, y = correlation)) + geom_path() + facet_grid("peak_id~peak_status") + annotate("line", x = rep(172, 2), y = range(shift_dt$correlation), color = "red") ## -------------------------------------------------------------------------- shift_dt[, .(max_shift = shift[which.max(correlation)]), .(peak_status, peak_id)] ## ----calc frag len--------------------------------------------------------- fl = fragLen_calcStranded(bam_file, subset(query_gr, grepl("peak", name))) fl ## -------------------------------------------------------------------------- bam_dt = ssvFetchBam(bam_file, win_size = 10, fragLens = 141, query_gr, return_data.table = TRUE, max_dupes = 1, target_strand = "both") bam_dt[, c("peak_status", "peak_id") := tstrsplit(id, "_", keep = c(4,5))] p_basic + geom_path(data = bam_dt, aes(x = x, y = y, color = strand)) ## -------------------------------------------------------------------------- data("Bcell_peaks") pe_default = ssvFetchBamPE(pe_file, Bcell_peaks) pe_raw = ssvFetchBamPE(pe_file, Bcell_peaks, return_unprocessed = TRUE) pe_raw[isize > 0, mean(isize), .(id)] ggplot(pe_raw[isize > 0], aes(x = isize)) + geom_histogram() + facet_wrap("id") shift_dt = crossCorrByRle(pe_file, Bcell_peaks, flip_strand = TRUE) shift_dt[, shift[which.max(correlation)], .(id)] ggplot(shift_dt, aes(x = shift, y = correlation)) + geom_path() + facet_wrap("id") + annotate("line", x = rep(160, 2), y = range(shift_dt$correlation), color = "red")