## ----preloadLibrary, echo=FALSE, results="hide", warning=FALSE------------- suppressPackageStartupMessages({ library(InPAS) library(BSgenome.Mmusculus.UCSC.mm10) library(TxDb.Mmusculus.UCSC.mm10.knownGene) library(org.Hs.eg.db) library(cleanUpdTSeq) }) ## ----loadLibrary----------------------------------------------------------- library(InPAS) library(BSgenome.Mmusculus.UCSC.mm10) library(TxDb.Mmusculus.UCSC.mm10.knownGene) path <- file.path(find.package("InPAS"), "extdata") ## ----prepareAnno----------------------------------------------------------- library(org.Hs.eg.db) samplefile <- system.file("extdata", "hg19_knownGene_sample.sqlite", package="GenomicFeatures") txdb <- loadDb(samplefile) utr3.sample.anno <- utr3Annotation(txdb=txdb, orgDbSYMBOL="org.Hs.egSYMBOL") utr3.sample.anno ## ----loadAnno-------------------------------------------------------------- ##step1 annotation # load from dataset data(utr3.mm10) ## ----step2HugeDataF-------------------------------------------------------- load(file.path(path, "polyA.rds")) library(cleanUpdTSeq) data(classifier) bedgraphs <- c(file.path(path, "Baf3.extract.bedgraph"), file.path(path, "UM15.extract.bedgraph")) hugeData <- FALSE ##step1 Calculate coverage coverage <- coverageFromBedGraph(bedgraphs, tags=c("Baf3", "UM15"), genome=BSgenome.Mmusculus.UCSC.mm10, hugeData=hugeData) ## we hope the coverage rate of should be greater than 0.75 in the expressed gene. ## which is used because the demo data is a subset of genome. coverageRate(coverage=coverage, txdb=TxDb.Mmusculus.UCSC.mm10.knownGene, genome=BSgenome.Mmusculus.UCSC.mm10, which = GRanges("chr6", ranges=IRanges(98013000, 140678000))) ##step2 Predict cleavage sites CPs <- CPsites(coverage=coverage, genome=BSgenome.Mmusculus.UCSC.mm10, utr3=utr3.mm10, search_point_START=200, cutEnd=.2, long_coverage_threshold=3, background="10K", txdb=TxDb.Mmusculus.UCSC.mm10.knownGene, PolyA_PWM=pwm, classifier=classifier, shift_range=50, step=10) head(CPs) ##step3 Estimate 3UTR usage res <- testUsage(CPsites=CPs, coverage=coverage, genome=BSgenome.Mmusculus.UCSC.mm10, utr3=utr3.mm10, method="fisher.exact", gp1="Baf3", gp2="UM15") ##step4 view the results as(res, "GRanges") filterRes(res, gp1="Baf3", gp2="UM15", background_coverage_threshold=3, adj.P.Val_cutoff=0.05, dPDUI_cutoff=0.3, PDUI_logFC_cutoff=0.59) ## ----OneStep--------------------------------------------------------------- if(interactive()){ res <- inPAS(bedgraphs=bedgraphs, tags=c("Baf3", "UM15"), genome=BSgenome.Mmusculus.UCSC.mm10, utr3=utr3.mm10, gp1="Baf3", gp2="UM15", txdb=TxDb.Mmusculus.UCSC.mm10.knownGene, search_point_START=200, short_coverage_threshold=15, long_coverage_threshold=3, cutStart=0, cutEnd=.2, hugeData=FALSE, shift_range=50, PolyA_PWM=pwm, classifier=classifier, method="fisher.exact", adj.P.Val_cutoff=0.05, dPDUI_cutoff=0.3, PDUI_logFC_cutoff=0.59) } ## ----SingleGroupData------------------------------------------------------- inPAS(bedgraphs=bedgraphs[1], tags=c("Baf3"), genome=BSgenome.Mmusculus.UCSC.mm10, utr3=utr3.mm10, gp1="Baf3", gp2=NULL, txdb=TxDb.Mmusculus.UCSC.mm10.knownGene, search_point_START=200, short_coverage_threshold=15, long_coverage_threshold=3, cutStart=0, cutEnd=.2, hugeData=FALSE, PolyA_PWM=pwm, classifier=classifier, method="singleSample", adj.P.Val_cutoff=0.05, step=10) ## ----sessionInfo, results='asis'------------------------------------------- sessionInfo()