## ---- eval=TRUE, warnings = FALSE, echo=TRUE,message=FALSE--------------- library(ChIPanalyser) #Load data data(ChIPanalyserData) # Loading DNASequenceSet from BSgenome object library(BSgenome.Dmelanogaster.UCSC.dm3) DNASequenceSet <-getSeq(BSgenome.Dmelanogaster.UCSC.dm3) #Loading Position Frequency Matrix PFM <- file.path(system.file("extdata",package="ChIPanalyser"),"BCDSlx.pfm") #Checking if correctly loaded ls() ## ----eval=TRUE, warnings = FALSE----------------------------------------- # Building a genomicProfileParameters objects for data # storage and PWM computation GPP <- genomicProfileParameters(PFM=PFM,PFMFormat="raw", BPFrequency=DNASequenceSet, ScalingFactorPWM = 1.5, PWMThreshold = 0.7) GPP # Building occupancyProfileParameters with default values OPP <- occupancyProfileParameters() OPP # Building occupancyProfileParameters with custom values OPP <- occupancyProfileParameters(ploidy= 2, boundMolecules= 1000, chipMean = 200, chipSd = 200, chipSmooth = 250, maxSignal = 1.847, backgroundSignal = 0.02550997) OPP ## Extracting ChIP score eveLocusChip<-processingChIPseq(eveLocusChip,eveLocus,noiseFilter="zero",cores=1) str(eveLocusChip) ### Extracting occupancy profile parameters object built from ChIP data OPP<-eveLocusChip[[2]] eveLocusChip<-eveLocusChip[[1]] ## ----eval=FALSE---------------------------------------------------------- # ScalingFactorPWM(genomicProfileParameters) <- c(0.25, 0.5, 0.75, 1, 1.25, # 1.5, 1.75, 2, 2.5, 3, 3.5 ,4 ,4.5, 5) # # boundMolecules(occupancyProfileParameters) <- c(1, 10, 20, 50, 100, # 200, 500,1000,2000, 5000,10000,20000,50000, 100000, # 200000, 500000, 1000000) ## ---- eval = TRUE, warnings = FALSE-------------------------------------- optimalParam <- suppressWarnings(computeOptimal(DNASequenceSet = DNASequenceSet, genomicProfileParameters = GPP, LocusProfile = eveLocusChip, setSequence = eveLocus, DNAAccessibility = Access, occupancyProfileParameters = OPP, optimalMethod = "all", peakMethod="moving_kernel", cores=1)) str(optimalParam) ## ---- eval=TRUE, warnings = FALSE---------------------------------------- genomeWide <- computeGenomeWidePWMScore(DNASequenceSet=DNASequenceSet, genomicProfileParameters=GPP, DNAAccessibility = Access,cores=1) genomeWide ## ----eval=TRUE, warnings = FALSE----------------------------------------- SitesAboveThreshold <- computePWMScore(DNASequenceSet=DNASequenceSet, genomicProfileParameters=genomeWide, setSequence=eveLocus, DNAAccessibility = Access,cores=1) SitesAboveThreshold ## ----eval=TRUE, warnings = FALSE----------------------------------------- Occupancy <- computeOccupancy(SitesAboveThreshold, occupancyProfileParameters= OPP) Occupancy ## ---- eval=TRUE, warnings = FALSE---------------------------------------- chipProfile <- computeChipProfile(setSequence = eveLocus, occupancy = Occupancy,occupancyProfileParameters = OPP, method="moving_kernel") chipProfile ## ---- eval=TRUE, warnings = FALSE---------------------------------------- AccuracyEstimate <- profileAccuracyEstimate(LocusProfile = eveLocusChip, predictedProfile = chipProfile, occupancyProfileParameters = OPP,method="all") AccuracyEstimate <-AccuracyEstimate[[1]][[1]][[1]] AccuracyEstimate ## ---- eval=TRUE, warnings = FALSE, fig.width=10, fig.height=8------------ # Plotting Optimal heat maps par(oma=c(0,0,3,0)) layout(matrix(1:8,ncol=4, byrow=T),width=c(6,1.5,6,1.5),height=c(1,1)) plotOptimalHeatMaps(optimalParam,layout=FALSE) ## ---- eval=TRUE, warnings = FALSE, fig.width=10, fig.height=4.5---------- # Plotting occupancy Profile ## plotOccupancyProfile(predictedProfile=chipProfile[[1]][[1]], setSequence=eveLocus, chipProfile = eveLocusChip[[1]], DNAAccessibility = Access, occupancy = AllSitesAboveThreshold(Occupancy)[[1]][[1]], profileAccuracy=AccuracyEstimate, occupancyProfileParameters = OPP, geneRef=geneRef ) ## ---- eval=FALSE, echo=TRUE---------------------------------------------- # genomicProfileParameters(PWM, PFM, ScalingFactorPWM, PFMFormat, pseudocount, # BPFrequency, naturalLog, noOfSites, # minPWMScore, maxPWMScore, PWMThreshold, # AllSitesAboveThreshold, DNASequenceLength, # averageExpPWMScore, strandRule,whichstrand, NoAccess) ## ---- eval=FALSE, echo=TRUE---------------------------------------------- # # Assign Value wanted for each parameter # GPP <- genomicProfileParameters(PWM, PFM,ScalingFactorPWM, PFMFormat, # pseudocount, BPFrequency, naturalLog, noOfSites, # PWMThreshold, DNASequenceLength, # strandRule, whichstrand) ## ---- eval=TRUE, warnings = FALSE, echo=TRUE----------------------------- # return empty genomicProfileParameters object GPP <- genomicProfileParameters() # return minimal working object GPP <- genomicProfileParameters(PFM=PFM,PFMFormat="raw") # Suggested Minimal Build GPP <- genomicProfileParameters(PFM=PFM,PFMFormat="raw", BPFrequency=DNASequenceSet) ## ---- eval=TRUE, warnings = FALSE, echo=TRUE----------------------------- #Accessing PositionWeightMatrix slot PositionWeightMatrix(GPP) ## ----eval=FALSE, warnings= FALSE, echo=TRUE------------------------------ # # Setting PositionWeightMatrix slot # PositionWeightMatrix(GPP) <- newPWM # ### This is not the advised method # ### newPWM is a matrix following the format described above ## ---- eval=TRUE, warnings = FALSE, echo=TRUE----------------------------- # Accessing PositionFrequencyMatrix slot PositionFrequencyMatrix(GPP) ## ----eval=FALSE, warnings=FALSE, echo=TRUE------------------------------- # # Setting PositionFrequencyMatrix slot # PositionFrequencyMatrix(GPP) <- newPFM # ## ----eval=FALSE, echo=TRUE----------------------------------------------- # PFMFormat(GPP) # # PFMFormat(GPP)<-"raw" ## ---- eval=F, echo=TRUE-------------------------------------------------- # ScalingFactorPWM(GPP) # # ScalingFactorPWM(GPP) <- 0.5 # # ScalingFactorPWM(GPP) <- c(0.5, 1, 1.5, 2) ## ---- eval=F, echo=TRUE-------------------------------------------------- # PWMpseudocount(GPP) # # PWMpseudocount(GPP) <- 1 ## ---- eval=F, echo=TRUE-------------------------------------------------- # BPFrequency(GPP) # # BPFrequency(GPP)<-c(0.2900342,0.2101426,0.2099192,0.2899039) # # BPFrequency(GPP) <- DNASequenceSet # ## ---- eval=F, echo=TRUE-------------------------------------------------- # naturalLog(GPP) # # naturalLog(GPP) <- FALSE # ## ---- eval=F, echo=TRUE-------------------------------------------------- # noOfSites(GPP) # # noOfSites(GPP) <- 8 # ## ---- eval=F, echo=TRUE-------------------------------------------------- # PWMThreshold(GPP) # # PWMThreshold(GPP) <- 0.7 # ## ---- eval=F, echo=TRUE-------------------------------------------------- # strandRule(GPP) # # strandRule(GPP) <- "mean" # ## ---- eval=F, echo=TRUE-------------------------------------------------- # whichstrand(GPP) # # whichstrand(GPP) <- "+" # ## ---- eval=TRUE, warnings = FALSE, echo=TRUE----------------------------- maxPWMScore(Occupancy) ## ---- eval=TRUE, warnings = FALSE, echo=TRUE----------------------------- minPWMScore(Occupancy) ## ---- eval=TRUE, warnings = FALSE, echo=TRUE----------------------------- averageExpPWMScore(Occupancy) ## ---- eval=TRUE, warnings = FALSE, echo=TRUE----------------------------- DNASequenceLength(Occupancy) ## ---- eval=TRUE, warnings = FALSE, echo=TRUE----------------------------- NoAccess(Occupancy) ## ---- eval=TRUE, warnings = FALSE, echo=TRUE----------------------------- AllSitesAboveThreshold(Occupancy) # Or searchSites(Occupancy) ## ---- eval=FALSE--------------------------------------------------------- # OPP <- occupancyProfileParameters(ploidy = 2 ,boundMolecules = 1000 , # backgroundSignal = 0 ,maxSignal = 1, chipMean = 150 , chipSd = 150 , # chipSmooth = 250 , stepSize = 10 , # removeBackground = 0 ) ## ---- eval=FALSE--------------------------------------------------------- # ploidy(OPP) # ploidy(OPP) <- 2 ## ---- eval=FALSE--------------------------------------------------------- # boundMolecules(OPP) # boundMolecules(OPP) <- 5000 ## ---- eval=FALSE, echo=TRUE---------------------------------------------- # backgroundSignal(OPP) # # backgroundSignal(OPP) <- 0.02550997 ## ---- eval=FALSE, echo=TRUE---------------------------------------------- # maxSignal(OPP) # # maxSignal(OPP) <- 1.86 ## ---- eval=FALSE, echo=TRUE---------------------------------------------- # chipMean(OPP) # # chipMean(OPP) <- 150 # ## ---- eval=FALSE, echo=TRUE---------------------------------------------- # chipSd(OPP) # # chipSd(OPP) <- 150 ## ---- eval=FALSE, echo=TRUE---------------------------------------------- # chipSmooth(OPP) # # chipSmooth(OPP) <- 250 ## ---- eval=FALSE, echo=TRUE---------------------------------------------- # stepSize(OPP) # # stepSize(OPP) <- 10 ## ---- eval=FALSE, echo=TRUE---------------------------------------------- # removeBackground(OPP) # # removeBackground(OPP) <- 0 ## ---- eval=FALSE--------------------------------------------------------- # processingChIPseq(profile,loci=NULL,reduce=NULL, # occupancyProfileParameters=NULL,noiseFilter="zero",peaks=NULL, # Access=NULL,cores=1) # ## ---- eval=F------------------------------------------------------------- # computeGenomeWidePWMScore(DNASequenceSet, genomicProfileParameters, # DNAAccessibility = NULL, GenomeWide = TRUE,cores=1, verbose = TRUE) ## ---- eval=TRUE, warnings = FALSE, echo=TRUE----------------------------- DNASequenceSet ## ---- eval=TRUE, warnings = FALSE---------------------------------------- #Extracting DNAStringSet from BSgenome DNASequenceSet <- getSeq(BSgenome.Dmelanogaster.UCSC.dm3) ## ---- eval=TRUE, warnings = FALSE, echo=TRUE----------------------------- GPP ## ---- eval=TRUE, warnings = FALSE, echo=TRUE----------------------------- # DNA accessibility Access ## ---- eval=FALSE, warnings = FALSE, echo=TRUE---------------------------- # # With DNAAccessibility # # GenomeWide <- computeGenomeWidePWMScore(DNASequenceSet = DNASequenceSet, # genomicProfileParameters = GPP, DNAAccessibility = Access,cores=1) # # GenomeWide # # # Without DNA accessibility # # GenomeWide <- computeGenomeWidePWMScore(DNASequenceSet = DNASequenceSet, # genomicProfileParameters = GPP,cores=1) # GenomeWide ## ---- eval=FALSE--------------------------------------------------------- # computePWMScore(DNASequenceSet, genomicProfileParameter, # setSequence = NULL, DNAAccessibility = NULL, cores=1 ,verbose = TRUE) ## ---- eval=TRUE, warnings = FALSE, echo=TRUE----------------------------- eveLocus ## ---- eval=TRUE, warnings = FALSE, echo=TRUE----------------------------- # Sequence names of Loci seqnames(eveLocus) # Names of Loci names(eveLocus) # Naming Loci in GRanges names(eveLocus) <- "eve" ## ---- eval=FALSE, warnings = FALSE, echo=TRUE---------------------------- # # With DNA Accessibility # # PWMScores <- computePWMScore(DNASequenceSet = DNASequenceSet, # genomicProfileParameters = GenomeWide, # setSequence = eveLocus, DNAAccessibility = Access,cores=1) # PWMScores # # # Without DNA Accessibility # # PWMScores <- computePWMScore(DNASequenceSet = DNASequenceSet, # genomicProfileParameters = GenomeWide, # setSequence = eveLocus,cores=1) # PWMScores ## ----eval=FALSE---------------------------------------------------------- # computeOccupancy(AllSitesPWMScore, occupancyProfileParameters = NULL, # norm = TRUE,verbose = TRUE) ## ---- eval=FALSE, warnings = FALSE, echo=TRUE---------------------------- # AllSitesAboveThreshold(PWMScores) ## ---- eval=TRUE, warnings = FALSE, echo=TRUE----------------------------- OPP ## ---- eval=FALSE, warnings = FALSE, echo=TRUE---------------------------- # Occupancy <- computeOccupancy(AllSitesPWMScore = PWMScores, # occupancyProfileParameters = OPP) # Occupancy ## ---- eval=FALSE--------------------------------------------------------- # computeChipProfile( setSequence , # occupancy, occupancyProfileParameters = NULL, norm = TRUE, # method = c("moving_kernel","truncated_kernel","exact"), # peakSignificantThreshold= NULL,cores=1 # verbose = TRUE) ## ---- eval=FALSE, warnings = FALSE, echo=TRUE---------------------------- # chipProfile <- computeChipProfile(setSequence = eveLocus, # occupancy = Occupancy,occupancyProfileParameters = OPP,cores=1) # chipProfile ## ---- eval=FALSE,echo=TRUE----------------------------------------------- # searchSites(Sites,ScalingFactor="all", BoundMolecules="all",Locus="all") ## ---- eval=TRUE, echo=TRUE----------------------------------------------- searchSites(Occupancy) ## ---- eval=TRUE, echo=TRUE----------------------------------------------- searchSites(chipProfile, ScalingFactor=c(1.5,2.5), BoundMolecules=c(1000,1500) ,Locus=c("eve","odd")) ## ---- eval=FALSE--------------------------------------------------------- # profileAccuracyEstimate(LocusProfile, # predictedProfile, occupancyProfileParameters = NULL,method="all") ## ---- eval=TRUE, warnings = FALSE, echo=TRUE----------------------------- str(eveLocusChip) ## ---- eval=FALSE, warnings = FALSE, echo=TRUE---------------------------- # AccuracyEstimate <- profileAccuracyEstimate(LocusProfile = eveLocusChip, # predictedProfile = chipProfile, occupancyProfileParameters = OPP) # AccuracyEstimate ## ----eval=FALSE---------------------------------------------------------- # ScalingFactorPWM(genomicProfileParameters) <- c(0.25, 0.5, 0.75, 1, 1.25, # 1.5, 1.75, 2, 2.5, 3, 3.5 ,4 ,4.5, 5) # # boundMolecules(occupancyProfileParameters) <- c(1, 10, 20, 50, 100, # 200, 500,1000,2000, 5000,10000,20000,50000, 100000, # 200000, 500000, 1000000) ## ---- eval=FALSE--------------------------------------------------------- # computeOptimal(DNASequenceSet, # genomicProfileParameters, # LocusProfile, # setSequence, # DNAAccessibility = NULL, # occupancyProfileParameters = NULL, # optimalMethod = "all", # peakMethod="moving_kernel" # cores=1) ## ---- eval=FALSE, warnings = FALSE--------------------------------------- # optimalParam <- computeOptimal(DNASequenceSet = DNASequenceSet, # genomicProfileParameters = GPP, # LocusProfile = eveLocusChip, # setSequence = eveLocus, # DNAAccessibility = Access, # occupancyProfileParameters = OPP, # optimalMethod = "all", # cores=1) # optimalParam ## ---- eval=F------------------------------------------------------------- # plotOptimalHeatMaps(optimalParam=optimalParam,contour=TRUE,col=NULL,main=NULL, # layout=TRUE) ## ---- eval=FALSE, warnings = FALSE, echo=TRUE, fig.width=4, fig.height=10---- # plotOptimalHeatMaps(optimalParam) ## ----eval=FALSE, fig.width=4, fig.width=10, fig.height=3----------------- # plotOccupancyProfile <- function(predictedProfile, # setSequence, # chipProfile = NULL, # DNAAccessibility = NULL, # occupancy = NULL, # profileAccuracy=NULL, # PWM=FALSE, # occupancyProfileParameters = NULL, # geneRef = NULL,axis=TRUE,...) ## ---- eval=TRUE, warnings = FALSE, echo=TRUE----------------------------- geneRef ## ---- eval=FALSE, warnings = FALSE, echo=TRUE---------------------------- # plotOccupancyProfile(predictedProfile=chipProfile[[1]][[1]], # setSequence=eveLocus, # chipProfile = eveLocusChip[[1]], # DNAAccessibility = Access, # occupancy = AllSitesAboveThreshold(Occupancy)[[1]][[1]], # occupancyProfileParameters = OPP, # geneRef =geneRef) ## ---- eval=TRUE, warnings = FALSE, echo=TRUE----------------------------- sessionInfo()