## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, warning = FALSE, message = FALSE, comment = "#>" ) ## ----install, eval=FALSE------------------------------------------------------ # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("msigdb") ## ----load-packages, message=FALSE--------------------------------------------- library(msigdb) library(ExperimentHub) library(GSEABase) ## ----get-msigdb--------------------------------------------------------------- eh = ExperimentHub() query(eh , 'msigdb') ## ----download-msigdb-sym-id--------------------------------------------------- eh[['EH5421']] ## ----download-msigdb-sym-getMsigdb-------------------------------------------- #use the custom accessor to select a specific version of MSigDB msigdb.hs = getMsigdb(org = 'hs', id = 'SYM', version = '7.4') msigdb.hs ## ----append-kegg-------------------------------------------------------------- msigdb.hs = appendKEGG(msigdb.hs) msigdb.hs ## ----process-gsc-------------------------------------------------------------- length(msigdb.hs) ## ----access-gs---------------------------------------------------------------- gs = msigdb.hs[[1000]] gs #get genes in the signature geneIds(gs) #get collection type collectionType(gs) #get MSigDB category bcCategory(collectionType(gs)) #get MSigDB subcategory bcSubCategory(collectionType(gs)) #get description description(gs) #get details details(gs) ## ----summarise-gsc------------------------------------------------------------ #calculate the number of signatures in each category table(sapply(lapply(msigdb.hs, collectionType), bcCategory)) #calculate the number of signatures in each subcategory table(sapply(lapply(msigdb.hs, collectionType), bcSubCategory)) #plot the distribution of sizes hist(sapply(lapply(msigdb.hs, geneIds), length), main = 'MSigDB signature size distribution', xlab = 'Signature size') ## ----list-collections--------------------------------------------------------- listCollections(msigdb.hs) listSubCollections(msigdb.hs) ## ----------------------------------------------------------------------------- #retrieeve the hallmarks gene sets subsetCollection(msigdb.hs, 'h') #retrieve the biological processes category of gene ontology subsetCollection(msigdb.hs, 'c5', 'GO:BP') ## ----load-limma, message=FALSE------------------------------------------------ library(limma) #create expression data allg = unique(unlist(geneIds(msigdb.hs))) emat = matrix(0, nrow = length(allg), ncol = 6) rownames(emat) = allg colnames(emat) = paste0('sample', 1:6) head(emat) ## ----subset-msigdb------------------------------------------------------------ #retrieve collections hallmarks = subsetCollection(msigdb.hs, 'h') msigdb_ids = geneIds(hallmarks) #convert gene sets into a list of gene indices fry_indices = ids2indices(msigdb_ids, rownames(emat)) fry_indices[1:2] ## ----download-msig-sym-id-mouse----------------------------------------------- msigdb.mm = getMsigdb(org = 'mm', id = 'SYM', version = '7.4') msigdb.mm ## ----sessionInfo-------------------------------------------------------------- sessionInfo()