## ----style, echo = FALSE, results = 'asis'------------------------------------ BiocStyle::markdown() ## ----global_options, include=FALSE---------------------------------------------------------------- knitr::opts_chunk$set(message=FALSE, error=FALSE, warning=FALSE, fig.width=8, fig.height=8) options(width=100) ## ----eval=FALSE----------------------------------------------------------------------------------- # install.packages("devtools") # library("devtools") # install_github("transbioZI/BioMM") ## ----eval=FALSE----------------------------------------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # The following initializes usage of Bioc devel # BiocManager::install(version='devel') # BiocManager::install("BioMM") ## ----loadPkg, eval=TRUE, results="hide"----------------------------------------------------------- library(BioMM) library(BiocParallel) library(parallel) library(ranger) library(rms) library(glmnet) library(e1071) library(pROC) library(vioplot) library(variancePartition) library(CMplot) ## ----studyData, eval=TRUE------------------------------------------------------------------------- ## Get DNA methylation data studyData <- readRDS(system.file("extdata", "/methylData.rds", package="BioMM")) head(studyData[,1:5]) dim(studyData) ## ----annotationFile, eval=TRUE-------------------------------------------------------------------- ## Load annotation data featureAnno <- readRDS(system.file("extdata", "cpgAnno.rds", package="BioMM")) pathlistDB <- readRDS(system.file("extdata", "goDB.rds", package="BioMM")) head(featureAnno) str(pathlistDB[1:3]) ## ----pathlist, eval=TRUE-------------------------------------------------------------------------- ## Map to pathways (only 100 pathways in order to reduce the runtime) pathlistDBsub <- pathlistDB[1:100] pathlist <- omics2pathlist(data=studyData, pathlistDBsub, featureAnno, restrictUp=100, restrictDown=20, minPathSize=10) ## ----BioMMrandForest, eval=TRUE------------------------------------------------------------------- ## Parameters supervisedStage1=TRUE classifier <- "randForest" predMode <- "classification" paramlist <- list(ntree=300, nthreads=10) param1 <- MulticoreParam(workers = 1) param2 <- MulticoreParam(workers = 10) ## If BioMM is installed from Github, please use the following assignment: ## param1 <- 1 ## param2 <- 10 studyDataSub <- studyData[,1:2000] ## to reduce the runtime set.seed(123) result <- BioMM(trainData=studyDataSub, testData=NULL, pathlistDB, featureAnno, restrictUp=100, restrictDown=10, minPathSize=10, supervisedStage1, typePCA="regular", resample1="BS", resample2="CV", dataMode="allTrain", repeatA1=50, repeatA2=1, repeatB1=20, repeatB2=1, nfolds=10, FSmethod1=NULL, FSmethod2=NULL, cutP1=0.1, cutP2=0.1, fdr2=NULL, FScore=param1, classifier, predMode, paramlist, innerCore=param2) print(result) ## ----BioMMpara, eval=TRUE------------------------------------------------------------------------- ## SVM supervisedStage1=TRUE classifier <- "SVM" predMode <- "classification" paramlist <- list(tuneP=FALSE, kernel="radial", gamma=10^(-3:-1), cost=10^(-3:1)) ## GLM with elastic-net supervisedStage1=TRUE classifier <- "glmnet" predMode <- "classification" paramlist <- list(family="binomial", alpha=0.5, ypeMeasure="mse", typePred="class") ## PCA + random forest supervisedStage1=FALSE classifier <- "randForest" predMode <- "classification" paramlist <- list(ntree=300, nthreads=10) ## ----BioMMpathway, eval=TRUE---------------------------------------------------------------------- ## Parameters supervisedStage1=FALSE classifier <- "randForest" predMode <- "classification" paramlist <- list(ntree=300, nthreads=10) param1 <- MulticoreParam(workers = 1) param2 <- MulticoreParam(workers = 10) ## If BioMM is installed from Github, please use the following assignment: ## param1 <- 1 ## param2 <- 10 set.seed(123) result <- BioMM(trainData=studyData, testData=NULL, pathlistDBsub, featureAnno, restrictUp=100, restrictDown=10, minPathSize=10, supervisedStage1, typePCA="regular", resample1="BS", resample2="CV", dataMode="allTrain", repeatA1=40, repeatA2=1, repeatB1=40, repeatB2=1, nfolds=10, FSmethod1=NULL, FSmethod2=NULL, cutP1=0.1, cutP2=0.1, fdr2=NULL, FScore=param1, classifier, predMode, paramlist, innerCore=param2) print(result) ## ----stage2data, eval=TRUE------------------------------------------------------------------------ ## Pathway level data or stage-2 data prepared by reconBySupervised() stage2dataA <- readRDS(system.file("extdata", "/stage2dataA.rds", package="BioMM")) head(stage2dataA[,1:5]) dim(stage2dataA) ## ----stage2dataAprepare, eval=FALSE--------------------------------------------------------------- # #### Alternatively, 'stage2dataA' can be created by the following code: # ## Parameters # classifier <- "randForest" # predMode <- "probability" # paramlist <- list(ntree=300, nthreads=40) # param1 <- MulticoreParam(workers = 1) # param2 <- MulticoreParam(workers = 10) # ## If BioMM is installed from Github, please use the following assignment: # ## param1 <- 1 # ## param2 <- 10 # set.seed(123) # ## This will take a bit longer to run # stage2dataA <- reconBySupervised(trainDataList=pathlist, testDataList=NULL, # resample="BS", dataMode="allTrain", # repeatA=25, repeatB=1, nfolds=10, # FSmethod=NULL, cutP=0.1, fdr=NULL, FScore=param1, # classifier, predMode, paramlist, # innerCore=param2, outFileA=NULL, outFileB=NULL) # ## ----stage2dataViz, eval=TRUE--------------------------------------------------------------------- param <- MulticoreParam(workers = 1) ## If BioMM is installed from Github, please use the following assignment: ## param <- 1 plotVarExplained(data=stage2dataA, posF=TRUE, core=param, horizontal=FALSE, fileName=NULL) ## ----topFstage2data, eval=TRUE-------------------------------------------------------------------- param <- MulticoreParam(workers = 10) ## If BioMM is installed from Github, please use the following assignment: ## param <- 1 topPath <- plotRankedFeature(data=stage2dataA, posF=TRUE, topF=10, blocklist=pathlist, rankMetric="R2", colorMetric="size", core=param, fileName=NULL) ## ----cirPlot, eval=TRUE--------------------------------------------------------------------------- core <- MulticoreParam(workers = 10) ## If BioMM is installed from Github, use the following assignment: ## core <- 10 pathID <- gsub("\\.", ":", rownames(topPath)) pathSet <- pathlist[is.element(names(pathlist), pathID)] pathMatch <- pathSet[match(pathID, names(pathSet))] cirPlot4pathway(datalist=pathMatch, topPathID=names(pathMatch), core, fileName=NULL) ## ----sessioninfo, eval=TRUE----------------------------------------------------------------------- sessionInfo()