## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----quickIntro, eval=FALSE--------------------------------------------------- # # load the package # library(icetea) # # # provide demultiplexing barcodes and sample names # idxlist <- c("CAAGTG", "TTAGCC", "GTGGAA", "TGTGAG") # dir <- system.file("extdata", package="icetea") # # corresponding sample names # fnames <- c("embryo1", "embryo2", "embryo3", "embryo4") # ## create CapSet object # cs <- newCapSet(expMethod = 'MAPCap', # fastq_R1 = file.path(dir, 'mapcap_test_R1.fastq.gz'), # fastq_R2 = file.path(dir, 'mapcap_test_R2.fastq.gz'), # idxList = idxlist, # sampleNames = fnames) # # # demultiplex fastq and trim the barcodes # dir.create("splitting") # cs <- demultiplexFASTQ(cs, max_mismatch = 1, outdir = "splitting", ncores = 10) # # # map fastq # dir.create("mapping") # cs <- mapCaps(cs, subread_idx, outdir = "mapping", ncores = 20, logfile = "mapping/subread_mapping.log") # # # filter PCR duplicates # dir.create("removedup") # cs <- filterDuplicates(cs, outdir = "removedup") # # # detect TSS # dir.create("tssCalling") # cs <- detectTSS(cs, groups = c("wt", "wt", "mut", "mut"), outfile_prefix = "tssCalling/testTSS") ## ----makeCS, eval=TRUE, results = "hold"-------------------------------------- # provide demultiplexing barcodes as strings idxlist <- c("CAAGTG", "TTAGCC", "GTGGAA", "TGTGAG") # provide corresponding sample names fnames <- c("embryo1", "embryo2", "embryo3", "embryo4") # `dir` contains example data provided with this package dir <- system.file("extdata", package = "icetea") ## CapSet object from raw (multiplexed) fastq files library(icetea) cs <- newCapSet(expMethod = 'MAPCap', fastq_R1 = file.path(dir, 'mapcap_test_R1.fastq.gz'), fastq_R2 = file.path(dir, 'mapcap_test_R2.fastq.gz'), idxList = idxlist, sampleNames = fnames) ## ----sampleinfo, eval=TRUE, results = "hold"---------------------------------- si <- sampleInfo(cs) dir <- system.file("extdata/bam", package = "icetea") si$mapped_file <- list.files(dir, pattern = ".bam$", full.names = TRUE) sampleInfo(cs) <- si ## ----cs2, eval = TRUE, results = "hold"--------------------------------------- dir <- system.file("extdata/filtered_bam", package = "icetea") cs2 <- newCapSet(expMethod = 'MAPCap', filtered_file = list.files(dir, pattern = ".bam$", full.names = TRUE), sampleNames = fnames) ## ----demultiplex, eval=TRUE, results = "hold"--------------------------------- # demultiplex fastq and trim the barcodes dir.create("splitting") cs <- demultiplexFASTQ(cs, max_mismatch = 1, outdir = "splitting", ncores = 10) ## ----indexgenome, eval=FALSE-------------------------------------------------- # dir.create("genome_index") # library(Rsubread) # buildindex(basename = "genome_index/dm6", reference = "/path/to/dm6/genome.fa") ## ----mapcaps, eval=FALSE------------------------------------------------------ # # provide location of a subread index file # subread_idx <- "genome_index/dm6" # # map fastq # cs <- mapCaps(cs, subread_idx, outdir = "mapping", ncores = 20, logfile = "mapping/subread_mapping.log") # # you can save the CapSet object for later use # save(cs, file = "myCapSet.Rdata") ## ----splitbam, eval=FALSE----------------------------------------------------- # splitBAM_byIndex(bamFile, index_list, outfile_list, max_mismatch = 1, ncores = 10) ## ----filterdups, eval=TRUE, results = "hold"---------------------------------- # load a previously saved CapSet object (or create new one) cs <- exampleCSobject() # filter PCR duplicates and save output in a new directory dir.create("removedup") cs <- filterDuplicates(cs, outdir = "removedup") ## ----getTSS, eval=TRUE, results = "hold"-------------------------------------- # detect TSS dir.create("tssCalling") cs <- detectTSS(cs, groups = c("wt", "wt", "mut", "mut"), outfile_prefix = "tssCalling/testTSS", restrictChr = "X", ncores = 1) # export the detected TSS bed files exportTSS(cs, merged = TRUE, outfile_prefix = "tssCalling/testTSS") ## ----plotstats, eval=TRUE, results = "hold", fig.width=7, fig.height=5-------- # separated barchart for numbers plotReadStats(cs, plotValue = "numbers", plotType = "dodge") ## ----plotstats2, eval=TRUE, results = "hide", fig.width=7, fig.height=5------- # stacked barchart for proportions plotReadStats(cs, plotValue = "proportions", plotType = "stack" ) ## ----plotPrecision, eval=TRUE, results = "hold", fig.width=7, fig.height=5---- library("TxDb.Dmelanogaster.UCSC.dm6.ensGene") seqlevelsStyle(TxDb.Dmelanogaster.UCSC.dm6.ensGene) <- "ENSEMBL" # only analyse genes on chrX to make the analysis faster seqlevels(TxDb.Dmelanogaster.UCSC.dm6.ensGene) <- "X" transcripts <- transcripts(TxDb.Dmelanogaster.UCSC.dm6.ensGene) # Plotting the precision using a pre computed set of TSS (.bed files) : tssfile <- system.file("extdata", "testTSS_merged.bed", package = "icetea") plotTSSprecision(reference = transcripts, detectedTSS = tssfile, sampleNames = "testsample") ## ----diffTSS, eval=FALSE------------------------------------------------------ # ## fitDiffTSS returns a DGEGLM object # csfit <- fitDiffTSS(cs, groups = rep(c("wt","mut"), each = 2), normalization = "windowTMM", # outplots = NULL, plotref = "embryo1") # save(csfit, file = "diffTSS_fit.Rdata") # # ## This object is then used by the detectDiffTSS function to return differentially expressed TSSs # de_tss <- detectDiffTSS(csfit, testGroup = "mut", contGroup = "wt", # TSSfile = file.path(dir, "testTSS_merged.bed"), MAplot_fdr = 0.05) # ## export the output # library(rtracklayer) # export.bed(de_tss[de_tss$score < 0.05], con = "diffTSS_output.bed") ## ----spikeNorm, eval=FALSE---------------------------------------------------- # ## get gene counts for spike-in RNA mapped to human genome # library("TxDb.Hsapiens.UCSC.hg38.knownGene") # normfacs <- getNormFactors(cs_spike, features = genes(TxDb.Hsapiens.UCSC.hg38.knownGene)) # # csfit <- fitDiffTSS(cs, groups = rep(c("wt","mut"), each = 2), # normalization = NULL, normFactors = normfacs, # outplots = NULL, plotRefSample = "embryo1", ncores = 1) ## ----genecounts, eval=FALSE--------------------------------------------------- # # get transcripts # dm6trans <- transcriptsBy(TxDb.Dmelanogaster.UCSC.dm6.ensGene, "gene") # # get gene counts, counting reads around 500 bp of the TSS # gcounts <- getGeneCounts(cs, dm6trans) ## ----annotateTSS, eval = TRUE, results = "hold", fig.width=7, fig.height=5---- # save the output as data.frame (outdf) + plot on screen outdf <- annotateTSS(tssBED = tssfile, txdb = TxDb.Dmelanogaster.UCSC.dm6.ensGene, plotValue = "number", outFile = NULL)