## ----setup, echo=FALSE, results='hide', warning=FALSE, error=FALSE, message=FALSE, cache=FALSE---- library(knitr) opts_chunk$set( cache = FALSE, echo = TRUE, warning = FALSE, error = FALSE, message = FALSE ) ## ------------------------------------------------------------------------ library(airway) library(DESeq2) data(airway) # import data to DESeq2 and variance stabilize dset = DESeqDataSetFromMatrix(assay(airway), colData=as.data.frame(colData(airway)), design=~dex) dset = estimateSizeFactors(dset) dset = estimateDispersions(dset) gene_expr = getVarianceStabilizedData(dset) # annotate matrix with HGNC symbols library(biomaRt) mart = useDataset("hsapiens_gene_ensembl", useMart("ensembl")) genes = getBM(attributes = c("ensembl_gene_id","hgnc_symbol"), values=rownames(gene_expr), mart=mart) matched = match(rownames(gene_expr), genes$ensembl_gene_id) rownames(gene_expr) = genes$hgnc_symbol[matched] ## ------------------------------------------------------------------------ library(progeny) pathways = progeny(gene_expr, scale=FALSE) controls = airway$dex == "untrt" ctl_mean = apply(pathways[controls,], 2, mean) ctl_sd = apply(pathways[controls,], 2, sd) pathways = t(apply(pathways, 1, function(x) x - ctl_mean)) pathways = apply(pathways, 1, function(x) x / ctl_sd) ## ------------------------------------------------------------------------ library(dplyr) result = apply(pathways, 1, function(x) { broom::tidy(lm(x ~ !controls)) %>% filter(term == "!controlsTRUE") %>% select(-term) }) mutate(bind_rows(result), pathway=names(result)) ## ------------------------------------------------------------------------ # set up a file cache so we download only once library(BiocFileCache) bfc = BiocFileCache(".") # gene expression and drug response base = "http://www.cancerrxgene.org/gdsc1000/GDSC1000_WebResources/Data/" paths = bfcrpath(bfc, paste0(base, c("suppData/TableS4A.xlsx", "preprocessed/Cell_line_RMA_proc_basalExp.txt.zip"))) ## ------------------------------------------------------------------------ # load the downloaded files drug_table = readxl::read_excel(paths[1], skip=5) gene_table = readr::read_tsv(paths[2]) # we need drug response with COSMIC IDs drug_response = data.matrix(drug_table[,3:ncol(drug_table)]) rownames(drug_response) = drug_table[[1]] # we need genes in rows and samples in columns gene_expr = data.matrix(gene_table[,3:ncol(gene_table)]) colnames(gene_expr) = sub("DATA.", "", colnames(gene_expr), fixed=TRUE) rownames(gene_expr) = gene_table$GENE_SYMBOLS ## ------------------------------------------------------------------------ library(progeny) pathways = progeny(gene_expr) ## ------------------------------------------------------------------------ head(pathways) ## ------------------------------------------------------------------------ cell_lines = intersect(rownames(pathways), rownames(drug_response)) trametinib = drug_response[cell_lines, "Trametinib"] mapk = pathways[cell_lines, "MAPK"] associations = lm(trametinib ~ mapk) summary(associations) ## ----echo=FALSE---------------------------------------------------------- sessionInfo()