## ----fig.align='center', message=FALSE, warning=FALSE, eval=FALSE------------- # library(gsean) # library(TCGAbiolinks) # library(SummarizedExperiment) # # TCGA LUAD # query <- GDCquery(project = "TCGA-LUAD", # data.category = "Gene expression", # data.type = "Gene expression quantification", # platform = "Illumina HiSeq", # file.type = "normalized_results", # experimental.strategy = "RNA-Seq", # legacy = TRUE) # GDCdownload(query, method = "api") # invisible(capture.output(data <- GDCprepare(query))) # exprs.LUAD <- assay(data) # # remove duplicated gene names # exprs.LUAD <- exprs.LUAD[-which(duplicated(rownames(exprs.LUAD))),] # # list of genes # recur.mut.gene <- c("KRAS", "TP53", "STK11", "RBM10", "SPATA31C1", "KRTAP4-11", # "DCAF8L2", "AGAP6", "KEAP1", "SETD2", "ZNF679", "FSCB", # "BRAF", "ZNF770", "U2AF1", "SMARCA4", "HRNR", "EGFR") # # # KEGG_hsa # load(system.file("data", "KEGG_hsa.rda", package = "gsean")) # # # GSEA # set.seed(1) # result.GSEA <- gsean(KEGG_hsa, recur.mut.gene, exprs.LUAD, threshold = 0.7) # invisible(capture.output(p <- GSEA.barplot(result.GSEA, category = 'pathway', # score = 'NES', pvalue = 'padj', # sort = 'padj', top = 20))) # p <- GSEA.barplot(result.GSEA, category = 'pathway', score = 'NES', # pvalue = 'padj', sort = 'padj', top = 20) # p + theme(plot.margin = margin(10, 10, 10, 75)) ## ----fig.align='center', message=FALSE, warning=FALSE, eval=TRUE-------------- library(gsean) library(pasilla) library(DESeq2) # pasilla count data pasCts <- system.file("extdata", "pasilla_gene_counts.tsv", package = "pasilla", mustWork = TRUE) cts <- as.matrix(read.csv(pasCts, sep="\t", row.names = "gene_id")) condition <- factor(c(rep("untreated", 4), rep("treated", 3))) dds <- DESeqDataSetFromMatrix( countData = cts, colData = data.frame(condition), design = ~ 0 + condition) # filtering keep <- rowSums(counts(dds)) >= 10 dds <- dds[keep,] # differentially expressed genes dds <- DESeq(dds) resultsNames(dds) res <- results(dds, contrast = list("conditiontreated", "conditionuntreated"), listValues = c(1, -1)) statistic <- res$stat names(statistic) <- rownames(res) exprs.pasilla <- counts(dds, normalized = TRUE) # convert gene id library(org.Dm.eg.db) gene.id <- AnnotationDbi::select(org.Dm.eg.db, names(statistic), "ENTREZID", "FLYBASE") names(statistic) <- gene.id[,2] rownames(exprs.pasilla) <- gene.id[,2] # GO_dme load(system.file("data", "GO_dme.rda", package = "gsean")) # GSEA set.seed(1) result.GSEA <- gsean(GO_dme, statistic, exprs.pasilla) invisible(capture.output(p <- GSEA.barplot(result.GSEA, category = 'pathway', score = 'NES', top = 50, pvalue = 'padj', sort = 'padj', numChar = 110) + theme(plot.margin = margin(10, 10, 10, 50)))) plotly::ggplotly(p) ## ----sessionInfo-------------------------------------------------------------- sessionInfo()