## ----settings, include = FALSE-------------------------------------------------------------------- #library(knitr) #opts_chunk$set(warning=TRUE, message = FALSE, cache = FALSE, tidy = FALSE, tidy.opts = list(width.cutoff = 60)) options(width = 100) knitr::opts_chunk$set(collapse = TRUE, comment = "#>",class.source = "whiteCode") ## ----message = FALSE, eval = FALSE---------------------------------------------------------------- # ## From Bioconductor repository # if (!requireNamespace("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # BiocManager::install("fgga") # # ## Or from GitHub repository using devtools # BiocManager::install("devtools") # devtools::install_github("fspetale/fgga") ## ----setup, eval = TRUE, message=FALSE------------------------------------------------------------ library(fgga) ## ----message = FALSE, eval = TRUE----------------------------------------------------------------- # Loading Canis lupus familiaris dataset and example R objects data(CfData) ## ----message = FALSE, eval = TRUE----------------------------------------------------------------- # To see the summarized experiment object summary(CfData) # To see the information of characterized data dim(CfData$dxCf) colnames(CfData$dxCf)[1:20] rownames(CfData$dxCf)[1:10] head.matrix(CfData$dxCf[, 51:61], n = 10) # to see the information of GO data dim(CfData$tableCfGO) colnames(CfData$tableCfGO)[1:10] rownames(CfData$tableCfGO)[1:10] head(CfData$tableCfGO)[, 1:8] ## ----message = FALSE, eval = TRUE----------------------------------------------------------------- # Checking the amount of annotations by GO-term apply(CfData$tableCfGO, MARGIN=2, sum) ## ----message = FALSE, eval = TRUE----------------------------------------------------------------- library(GO.db) library(GOstats) mygraph <- GOGraph(CfData$nodesGO, GOMFPARENTS) # Delete root node called all mygraph <- subGraph(CfData$nodesGO, mygraph) # We adapt the graph to the format used by FGGA mygraph <- t(as(mygraph, "matrix")) mygraphGO <- as(mygraph, "graphNEL") # We search the root GO-term rootGO <- leaves(mygraphGO, "in") rootGO plot(mygraphGO) ## ----message = FALSE, eval = FALSE---------------------------------------------------------------- # # We add GO-terms corresponding to Cellular Component subdomain # myGOs <- c(CfData[['nodesGO']], "GO:1902494", "GO:0032991", "GO:1990234", # "GO:0005575") # # # We build a graph respecting the GO constraints of inference to MF, CC and BP subdomains # mygraphGO <- preCoreFG(myGOs, domains="GOMF") # # plot(mygraphGO) ## ----message=FALSE, include=FALSE, results='hide'------------------------------------------------- mygraphGO <- as(CfData[["graphCfGO"]], "graphNEL") rootGO <- leaves(mygraphGO, "in") ## ----message = FALSE, eval = TRUE----------------------------------------------------------------- modelFGGA <- fgga2bipartite(mygraphGO) ## ----message = FALSE, eval = TRUE----------------------------------------------------------------- # We take a subset of Cf data to train our model idsTrain <- CfData$indexGO[["indexTrain"]][1:750] # We build our model of binary SVM classifiers modelSVMs <- lapply(CfData[["nodesGO"]], FUN = svmTrain, tableOntoTerms = CfData[["tableCfGO"]][idsTrain, ], dxCharacterized = CfData[["dxCf"]][idsTrain, ], graphOnto = mygraphGO, kernelSVM = "radial") ## ----message = FALSE, eval = FALSE---------------------------------------------------------------- # # We calculate the reliability of each GO-term # varianceGOs <- varianceOnto(tableOntoTerms = CfData[["tableCfGO"]][idsTrain, ], # dxCharacterized = CfData[["dxCf"]][idsTrain, ], # kFold = 5, graphOnto = mygraphGO, rootNode = rootGO, # kernelSVM = "radial") # # varianceGOs ## ----echo=FALSE, message=FALSE-------------------------------------------------------------------- CfData[["varianceGOs"]] varianceGOs <- CfData[["varianceGOs"]] ## ----message = FALSE, eval = TRUE----------------------------------------------------------------- dxTestCharacterized <- CfData[["dxCf"]][CfData$indexGO[["indexTest"]][1:50], ] matrixGOTest <- svmOnto(svmMoldel = modelSVMs, dxCharacterized = dxTestCharacterized, rootNode = rootGO, varianceSVM = varianceGOs) head(matrixGOTest)[,1:8] ## ----message = FALSE, eval = TRUE----------------------------------------------------------------- matrixFGGATest <- t(apply(matrixGOTest, MARGIN = 1, FUN = msgFGGA, matrixFGGA = modelFGGA, graphOnto = mygraphGO, tmax = 50, epsilon = 0.001)) head(matrixFGGATest)[,1:8] ## ----message = FALSE, eval = TRUE---------------------------------------------------------------- fHierarchicalMeasures(CfData$tableCfGO[rownames(matrixFGGATest), ], matrixFGGATest, mygraphGO) ## ----message = FALSE, eval = FALSE---------------------------------------------------------------- # # Computing F-score # Fs <- fMeasures(CfData$tableCfGO[rownames(matrixFGGATest), ], matrixFGGATest) # # # Average F-score # Fs$perfByTerms[4] # # library(pROC) # # # Computing ROC curve to the first term # rocGO <- roc(CfData$tableCfGO[rownames(matrixFGGATest), 1], matrixFGGATest[, 1]) # # # Average AUC the first term # auc(roc) # # # Computing precision at different recall levels to the first term # rocGO <- roc(CfData$tableCfGO[rownames(matrixFGGATest), 1], # matrixFGGATest[, 1], percent=TRUE) # PXR <- coords(rocGO, "all", ret = c("recall", "precision"), transpose = FALSE) # # # Average PxR to the first term # apply(as.matrix(PXR$precision[!is.na(PXR$precision)]), MARGIN = 2, mean ## ----session,eval=TRUE,echo=FALSE----------------------------------------------------------------- sessionInfo()