## ----echo = FALSE, warning = FALSE, message = FALSE--------------------------- library(BiocParallel) register(bpstart(SerialParam())) ## ----setup, warning = FALSE, message = FALSE---------------------------------- library(chromswitch) ## ----message = FALSE, warning = FALSE----------------------------------------- library(rtracklayer) ## ----qs_query----------------------------------------------------------------- # Path to BED file in chromswitch installation query_path <- system.file("extdata/query.bed", package = "chromswitch") # Read in BED file, creating a GRanges object query <- rtracklayer::import(con = query_path, format = "BED") query ## ----qs_meta------------------------------------------------------------------ # Path to TSV in chromswitch meta_path <- system.file("extdata/metadata.tsv", package = "chromswitch") # Read in the table from a 2-column TSV file metadata <- read.delim(meta_path, sep = "\t", header = TRUE) metadata ## ----qs_pks------------------------------------------------------------------- # Paths to the BED files containing peak calls for each sample peak_paths <- system.file("extdata", paste0(metadata$Sample, ".H3K4me3.bed"), package = "chromswitch") # Import BED files containing MACS2 narrow peak calls using rtracklayer peaks <- readNarrowPeak(paths = peak_paths, # Paths to files, metadata = metadata) # Metadata dataframe ## ----quickstart_1, warning = FALSE-------------------------------------------- callSummary(query = query, # Input 1: Query regions metadata = metadata, # Input 2: Metadata dataframe peaks = peaks, # Input 3: Peaks mark = "H3K4me3") # Arbitrary string describing the data type ## ----quickstart_2, warning = FALSE-------------------------------------------- callBinary(query = query, # Input 1: Query regions metadata = metadata, # Input 2: Metadata dataframe peaks = peaks) # Input 3: Peaks ## ----regions------------------------------------------------------------------ # Path to BED file in chromswitch installation query_path <- system.file("extdata/query.bed", package = "chromswitch") # Read in BED file, creating a GRanges object query <- rtracklayer::import(con = query_path, format = "BED") query ## ----meta--------------------------------------------------------------------- # Path to TSV in chromswitch meta_path <- system.file("extdata/metadata.tsv", package = "chromswitch") # Read in the table from a 2-column TSV file metadata <- read.delim(meta_path, sep = "\t", header = TRUE) metadata ## ----paths-------------------------------------------------------------------- # Paths to the BED files containing peak calls for each sample peak_paths <- system.file("extdata", paste0(metadata$Sample, ".H3K4me3.bed"), package = "chromswitch") # Import BED files containing MACS2 narrow peak calls using # a helper function from chromswitch peaks <- readNarrowPeak(paths = peak_paths, # Paths to files, metadata = metadata) # Ensure the list is named by sample names(peaks) <- metadata$Sample ## ----read--------------------------------------------------------------------- extra_cols <- c("signalValue" = "numeric", "pValue" = "numeric", "qValue" = "numeric", "peak" = "numeric") # Obtain a list of GRanges objects containing peak calls peaks <- lapply(peak_paths, rtracklayer::import, format = "bed", extraCols = extra_cols) # Ensure the list is named by sample names(peaks) <- metadata$Sample ## ----manual------------------------------------------------------------------- # Read in all files into dataframes df <- lapply(peak_paths, read.delim, sep = "\t", header = FALSE, col.names = c("chr", "start", "end", "name", "score", "strand", "signalValue", "pValue", "qValue", "peak")) # Convert the dataframes into GRanges objects, retaining # additional columns besides chr, start, end peaks <- lapply(df, makeGRangesFromDataFrame, keep.extra.columns = TRUE) # Ensure the list is named by sample names(peaks) <- metadata$Sample ## ----summary_basic, warning = FALSE------------------------------------------- out <- callSummary(query = query, # Input 1: Query regions metadata = metadata, # Input 2: Metadata dataframe peaks = peaks, # Input 3: Peaks mark = "H3K4me3") # Arbitrary string describing the data type out ## ----threshold---------------------------------------------------------------- out[out$Consensus >= 0.75, ] ## ----summary2, warning = FALSE------------------------------------------------ out2 <- callSummary( # Standard arguments of the function query = query, metadata = metadata, peaks = peaks, # Arbitrary string describing data type mark = "H3K4me3", # For quality control, filter peaks based on associated stats # prior to constructing feature matrices filter = TRUE, # Provide column names and thresholds to use in the same order filter_columns = c("qValue", "signalValue"), filter_thresholds = c(10, 4), # Normalization options normalize = TRUE, # Strongly recommended # By default, set to equal summarize_columns, below normalize_columns = c("qValue", "signalValue"), # Columns to use for for feature matrix construction summarize_columns = c("qValue", "signalValue"), # In addition to summarizing peak statistics, # we can also optionally compute the # fraction of the region overlapped by peaks # and the number of peaks fraction = TRUE, n = FALSE, # TRUE by default, return the optimal number # of clusters, otherwise require k = 2 optimal_clusters = TRUE, # Set this to TRUE to save a PDF of the heatmap # for each region to the current working directory heatmap = FALSE, # Chromswitch uses BiocParallel as a backend for # parallel computations. Analysis is parallelized at the # level of query regions. BPPARAM = BiocParallel::SerialParam()) out2 ## ----binary_basic, warning = FALSE-------------------------------------------- out3 <- callBinary( # Standard arguments of the function query = query, metadata = metadata, peaks = peaks, # Logical, controls whether to # reduce gaps between peaks to eliminate noise reduce = TRUE, # Peaks in the same sample which are within this many bp # of each other will be merged gap = 300, # The fraction of reciprocal overlap required to define # two peaks as non unique, used to construct a binary ft matrix p = 0.4, # Include in output the number of features obtained in # each query region n_features = TRUE) out3 ## ----threshold2--------------------------------------------------------------- out3[out3$Consensus >= 0.75, ] ## ----pipe--------------------------------------------------------------------- library(magrittr) ## ----dplyr, warning = FALSE, message = FALSE---------------------------------- library(dplyr) ## ----filter------------------------------------------------------------------- # Number of peaks in each sample prior to filtering lapply(H3K4me3, length) %>% as.data.frame() H3K4me3_filt <- filterPeaks(peaks = H3K4me3, columns = c("qValue", "signalValue"), thresholds = c(10, 4)) # Number of peaks in each sample after filtering lapply(H3K4me3_filt, length) %>% as.data.frame() ## ----normalize---------------------------------------------------------------- # Summary of the two statistics we will use downstream in raw data in one sample H3K4me3_filt %>% lapply(as.data.frame) %>% lapply(select, signalValue, qValue) %>% lapply(summary) %>% `[`(1) H3K4me3_norm <- normalizePeaks(H3K4me3_filt, columns = c("qValue", "signalValue"), tail = 0.005) # Summary after normalization H3K4me3_norm %>% lapply(as.data.frame) %>% lapply(select, signalValue, qValue) %>% lapply(summary) %>% `[`(1) ## ----retrieve----------------------------------------------------------------- # TTYH1 ttyh1 <- query[2] ttyh1 ttyh1_pk <- retrievePeaks(peaks = H3K4me3_norm, metadata = metadata, region = ttyh1) ttyh1_pk ## ----summarizePeaks----------------------------------------------------------- summary_mat <- summarizePeaks(localpeaks = ttyh1_pk, mark = "H3K4me3", cols = c("qValue", "signalValue"), fraction = TRUE, n = FALSE) # The sample-by-feature matrix summary_mat ## ----cluster------------------------------------------------------------------ cluster(ft_mat = summary_mat, query = ttyh1, metadata = metadata, heatmap = TRUE, title = "TTYH1 - summary", optimal_clusters = TRUE) ## ----reduce------------------------------------------------------------------- ttyh1_pk_red <- reducePeaks(localpeaks = ttyh1_pk, gap = 300) ## ----binarizePeaks------------------------------------------------------------ binary_mat <- binarizePeaks(ttyh1_pk_red, p = 0.4) # Chromswitch finds a single unique peak in the region binary_mat ## ----cluster2----------------------------------------------------------------- cluster(ft_mat = binary_mat, metadata = metadata, query = ttyh1, optimal_clusters = TRUE) ## ----session------------------------------------------------------------------ sessionInfo()